1 /* message.c -- Message manipulation/parsing
2  *
3  * Copyright (c) 1994-2008 Carnegie Mellon University.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * 3. The name "Carnegie Mellon University" must not be used to
18  *    endorse or promote products derived from this software without
19  *    prior written permission. For permission or any legal
20  *    details, please contact
21  *      Carnegie Mellon University
22  *      Center for Technology Transfer and Enterprise Creation
23  *      4615 Forbes Avenue
24  *      Suite 302
25  *      Pittsburgh, PA  15213
26  *      (412) 268-7393, fax: (412) 268-7395
27  *      innovation@andrew.cmu.edu
28  *
29  * 4. Redistributions of any form whatsoever must retain the following
30  *    acknowledgment:
31  *    "This product includes software developed by Computing Services
32  *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33  *
34  * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36  * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37  * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41  */
42 
43 #include <config.h>
44 
45 #ifdef HAVE_UNISTD_H
46 #include <unistd.h>
47 #endif
48 #include <errno.h>
49 #include <stdio.h>
50 #include <ctype.h>
51 #include <string.h>
52 #include <sysexits.h>
53 #include <syslog.h>
54 #include <sys/types.h>
55 #include <sys/uio.h>
56 #include <sys/stat.h>
57 #include <netinet/in.h>
58 #include <stdlib.h>
59 
60 #include "arrayu64.h"
61 #include "assert.h"
62 #include "crc32.h"
63 #include "dlist.h"
64 #include "prot.h"
65 #include "hash.h"
66 #include "map.h"
67 #include "mailbox.h"
68 #include "message.h"
69 #include "message_priv.h"
70 #include "message_guid.h"
71 #include "parseaddr.h"
72 #include "charset.h"
73 #include "stristr.h"
74 #include "user.h"
75 #include "util.h"
76 #include "xmalloc.h"
77 #include "xstrlcpy.h"
78 #include "strarray.h"
79 #include "ptrarray.h"
80 #include "global.h"
81 #include "retry.h"
82 #include "rfc822tok.h"
83 #include "times.h"
84 #include "xstrnchr.h"
85 
86 /* generated headers are not necessarily in current directory */
87 #include "imap/imap_err.h"
88 #include "imap/rfc822_header.h"
89 
90 static int message_map_file(message_t *m, const char *fname);
91 static int message_parse_cbodystructure(message_t *m);
92 
93 #define DEBUG 0
94 
95 /* Message being parsed */
96 struct msg {
97     const char *base;
98     unsigned long len;
99     unsigned long offset;
100     int encode;
101 };
102 
103 #define MAX_FIELDNAME_LENGTH   256
104 
105 /* Default MIME Content-type */
106 #define DEFAULT_CONTENT_TYPE "TEXT/PLAIN; CHARSET=us-ascii"
107 
108 static int message_parse_body(struct msg *msg,
109                               struct body *body,
110                               const char *defaultContentType,
111                               strarray_t *boundaries,
112                               const char *efname);
113 static int message_parse_headers(struct msg *msg,
114                                  struct body *body,
115                                  const char *defaultContentType,
116                                  strarray_t *boundaries,
117                                  const char *efname);
118 
119 static void message_parse_address(const char *hdr, struct address **addrp);
120 static void message_parse_encoding(const char *hdr, char **hdrp);
121 static void message_parse_charset(const struct body *body,
122                                   int *encoding, charset_t *charset);
123 static void message_parse_header(const char *hdr, struct buf *buf);
124 static void message_parse_bodytype(const char *hdr, struct body *body);
125 static void message_parse_bodydisposition(const char *hdr, struct body *body);
126 static void message_parse_params(const char *hdr, struct param **paramp);
127 static void message_fold_params(struct param **paramp);
128 static void message_parse_language(const char *hdr, struct param **paramp);
129 static void message_parse_rfc822space(const char **s);
130 static void message_parse_received_date(const char *hdr, char **hdrp);
131 
132 static void message_parse_multipart(struct msg *msg,
133                                     struct body *body,
134                                     strarray_t *boundaries,
135                                     const char *efname);
136 static void message_parse_content(struct msg *msg,
137                                   struct body *body,
138                                   strarray_t *boundaries,
139                                   const char *efname);
140 
141 static char *message_getline(struct buf *, struct msg *msg);
142 static int message_pendingboundary(const char *s, int slen, strarray_t *);
143 
144 static void message_write_envelope(struct buf *buf, const struct body *body);
145 static void message_write_address(struct buf *buf,
146                                   const struct address *addrlist);
147 static void message_write_text_lcase(struct buf *buf, const char *s);
148 static void message_write_section(struct buf *buf, const struct body *body);
149 static void message_write_charset(struct buf *buf, const struct body *body);
150 static void message_write_searchaddr(struct buf *buf,
151                                      const struct address *addrlist);
152 static int message_need(const message_t *m, unsigned int need);
153 static void message_yield(message_t *m, unsigned int yield);
154 
155 /*
156  * Convert a string to uppercase.  Returns the string.
157  *
158  * This differs from the ucase() function in lib/util.c by using the
159  * libc tolower() instead of our hardcoded builtin lookup table.
160  * Whether this is a good thing is unclear, but that's what the old code
161  * did so I'm going to preserve it - gnb
162  */
message_ucase(char * s)163 static char *message_ucase(char *s)
164 {
165     char *p;
166 
167     for (p = s ; *p ; p++)
168         if (Uislower(*p))
169             *p = toupper((int) *p);
170     return s;
171 }
172 
173 /*
174  * Check a message 'from' of 'size' bytes for minimal RFC 822 compliance.
175  * The message is read from 'from'. If 'to' is not NULL, the message
176  * is copied to 'to', otherwise an in-memory buffer of 'from' is checked.
177  *
178  * Caller must have initialized config_* routines (with cyrus_init) to read
179  * imapd.conf before calling.
180  */
message_copy_strict(struct protstream * from,FILE * to,unsigned size,int allow_null)181 EXPORTED int message_copy_strict(struct protstream *from, FILE *to,
182                                  unsigned size, int allow_null)
183 {
184     char buf[4096+1];
185     unsigned char *p, *endp;
186     int r = 0;
187     size_t n;
188     int sawcr = 0, sawnl;
189     int reject8bit = config_getswitch(IMAPOPT_REJECT8BIT);
190     int munge8bit = config_getswitch(IMAPOPT_MUNGE8BIT);
191     int inheader = 1, blankline = 1;
192     struct buf tmp = BUF_INITIALIZER;
193 
194     while (size) {
195         n = prot_read(from, buf, size > 4096 ? 4096 : size);
196         if (!n) {
197             syslog(LOG_ERR, "IOERROR: reading message: unexpected end of file");
198             return IMAP_IOERROR;
199         }
200 
201         buf[n] = '\0';
202 
203         /* Quick check for NUL in entire buffer, if we're not allowing it */
204         if (!allow_null && (n != strlen(buf))) {
205             r = IMAP_MESSAGE_CONTAINSNULL;
206         }
207 
208         size -= n;
209         if (r) continue;
210 
211         for (p = (unsigned char *)buf, endp = p + n; p < endp; p++) {
212             if (!*p && inheader) {
213                 /* NUL in header is always bad */
214                 r = IMAP_MESSAGE_CONTAINSNULL;
215             }
216             else if (*p == '\n') {
217                 if (!sawcr && (inheader || !allow_null))
218                     r = IMAP_MESSAGE_CONTAINSNL;
219                 sawcr = 0;
220                 if (blankline) {
221                     inheader = 0;
222                 }
223                 blankline = 1;
224             }
225             else if (*p == '\r') {
226                 sawcr = 1;
227             }
228             else {
229                 sawcr = 0;
230                 blankline = 0;
231                 if (inheader && *p >= 0x80) {
232                     if (reject8bit) {
233                         /* We have been configured to reject all mail of this
234                            form. */
235                         if (!r) r = IMAP_MESSAGE_CONTAINS8BIT;
236                     } else if (munge8bit) {
237                         /* We have been configured to munge all mail of this
238                            form. */
239                         *p = 'X';
240                     }
241                 }
242             }
243         }
244 
245         if (to)
246             fwrite(buf, 1, n, to);
247         else
248             buf_appendmap(&tmp, buf, n);
249     }
250 
251     if (r) goto done;
252 
253     if (to) {
254         fflush(to);
255         if (ferror(to) || fsync(fileno(to))) {
256             syslog(LOG_ERR, "IOERROR: writing message: %m");
257             r = IMAP_IOERROR;
258             goto done;
259         }
260         rewind(to);
261     }
262 
263     /* Go back and check headers */
264     sawnl = 1;
265     const char *cur = buf_base(&tmp);
266     const char *top = buf_base(&tmp) + buf_len(&tmp);
267     for (;;) {
268         /* Read headers into buffer */
269         if (to) {
270             if (!fgets(buf, sizeof(buf), to)) {
271                 r = sawnl ? 0 : IMAP_MESSAGE_BADHEADER;
272                 goto done;
273             }
274         }
275         else {
276             if (cur >= top) {
277                 r = sawnl ? 0 : IMAP_MESSAGE_BADHEADER;
278                 goto done;
279             }
280             const char *q = strchr(cur, '\n');
281             if (q == NULL) {
282                 q = cur + sizeof(buf);
283                 if (q > top) q = top;
284             }
285             else {
286                 q++;
287             }
288             if (q > cur + sizeof(buf) - 1) {
289                 q = cur + sizeof(buf) - 1;
290             }
291             memcpy(buf, cur, q - cur);
292             buf[q-cur] = '\0';
293             cur = q;
294         }
295 
296         /* End of header section */
297         if (sawnl && buf[0] == '\r') {
298             r = 0;
299             goto done;
300         }
301 
302         /* Check for valid header name */
303         if (sawnl && buf[0] != ' ' && buf[0] != '\t') {
304             if (buf[0] == ':') {
305                 r = IMAP_MESSAGE_BADHEADER;
306                 goto done;
307             }
308             if (strstr(buf, "From ") != buf) {
309                 for (p = (unsigned char *)buf; *p && *p != ':'; p++) {
310                     if (*p <= ' ') {
311                         r = IMAP_MESSAGE_BADHEADER;
312                         goto done;
313                     }
314                 }
315             }
316         }
317 
318         /* Used to be some 8bit checks here but those were moved above so that
319            we could do something other than refuse the message.
320            Unfortunately, we still need to look for the end of the string. */
321         for(p = (unsigned char*) buf; *p; p++);
322 
323         sawnl = (p > (unsigned char *)buf) && (p[-1] == '\n');
324     }
325 done:
326     buf_free(&tmp);
327     return r;
328 }
329 
message_parse(const char * fname,struct index_record * record)330 EXPORTED int message_parse(const char *fname, struct index_record *record)
331 {
332     struct body *body = NULL;
333     FILE *f;
334     int r;
335 
336     f = fopen(fname, "r");
337     if (!f) return IMAP_IOERROR;
338 
339     r = message_parse_file(f, NULL, NULL, &body, fname);
340     if (!r) r = message_create_record(record, body);
341 
342     fclose(f);
343 
344     if (body) {
345         message_free_body(body);
346         free(body);
347     }
348 
349     return r;
350 }
351 
352 /*
353  * Parse the message 'infile'.
354  *
355  * The caller MUST free the allocated body struct.
356  *
357  * If msg_base/msg_len are non-NULL, the file will remain memory-mapped
358  * and returned to the caller.  The caller MUST unmap the file.
359  */
message_parse_file(FILE * infile,const char ** msg_base,size_t * msg_len,struct body ** body,const char * efname)360 EXPORTED int message_parse_file(FILE *infile,
361                                 const char **msg_base, size_t *msg_len,
362                                 struct body **body,
363                                 const char *efname)
364 {
365     int fd = fileno(infile);
366     struct stat sbuf;
367     const char *tmp_base;
368     size_t tmp_len;
369     int unmap = 0, r;
370 
371     if (!msg_base) {
372         unmap = 1;
373         msg_base = &tmp_base;
374         msg_len = &tmp_len;
375     }
376     *msg_base = NULL;
377     *msg_len = 0;
378 
379     if (fstat(fd, &sbuf) == -1) {
380         if (efname)
381             syslog(LOG_ERR, "IOERROR: fstat on new message in spool (%s): %m",
382                    efname);
383         else
384             syslog(LOG_ERR, "IOERROR: fstat on new message in spool: %m");
385         fatal("can't fstat message file", EX_OSFILE);
386     }
387     map_refresh(fd, 1, msg_base, msg_len, sbuf.st_size,
388                 "new message", 0);
389 
390     if (!*msg_base || !*msg_len)
391         return IMAP_IOERROR; /* zero length file? */
392 
393     if (!*body) *body = (struct body *) xzmalloc(sizeof(struct body));
394     r = message_parse_mapped(*msg_base, *msg_len, *body, efname);
395 
396     if (unmap) map_free(msg_base, msg_len);
397 
398     return r;
399 }
400 
401 /*
402  * Parse the message 'infile'.
403  *
404  * The caller MUST free the allocated body struct.
405  *
406  * If msg_base/msg_len are non-NULL, the file will remain memory-mapped
407  * and returned to the caller.  The caller MUST unmap the file.
408  */
message_parse_file_buf(FILE * infile,struct buf * buf,struct body ** body,const char * efname)409 EXPORTED int message_parse_file_buf(FILE *infile,
410                                     struct buf *buf,
411                                     struct body **body,
412                                     const char *efname)
413 {
414     int fd = fileno(infile);
415     struct stat sbuf;
416 
417     // unmap or clear space
418     buf_free(buf);
419 
420     if (fstat(fd, &sbuf) == -1) {
421         if (efname)
422             syslog(LOG_ERR, "IOERROR: fstat on new message in spool (%s): %m",
423                    efname);
424         else
425             syslog(LOG_ERR, "IOERROR: fstat on new message in spool: %m");
426         fatal("can't fstat message file", EX_OSFILE);
427     }
428     buf_refresh_mmap(buf, 1, fd, efname, sbuf.st_size, "new message");
429 
430     if (!*body) *body = (struct body *) xzmalloc(sizeof(struct body));
431     return message_parse_mapped(buf_base(buf), buf_len(buf), *body, efname);
432 }
433 
434 
435 /*
436  * Parse the message 'infile'.
437  *
438  * The caller MUST free the allocated body struct.
439  *
440  * This function differs from message_parse_file() in that we create a
441  * writable buffer rather than memory-mapping the file, so that binary
442  * data can be encoded into the buffer.  The file is rewritten upon
443  * completion.
444  *
445  * XXX can we do this with mmap()?
446  */
message_parse_binary_file(FILE * infile,struct body ** body,const char * efname)447 EXPORTED int message_parse_binary_file(FILE *infile, struct body **body,
448                                        const char *efname)
449 {
450     int fd = fileno(infile);
451     struct stat sbuf;
452     struct msg msg;
453     size_t n;
454 
455     if (fstat(fd, &sbuf) == -1) {
456         if (efname)
457             syslog(LOG_ERR, "IOERROR: fstat on new message in spool (%s): %m",
458                    efname);
459         else
460             syslog(LOG_ERR, "IOERROR: fstat on new message in spool: %m");
461         fatal("can't fstat message file", EX_OSFILE);
462     }
463     msg.len = sbuf.st_size;
464     msg.base = xmalloc(msg.len);
465     msg.offset = 0;
466     msg.encode = 1;
467 
468     lseek(fd, 0L, SEEK_SET);
469 
470     n = retry_read(fd, (char*) msg.base, msg.len);
471     if (n != msg.len) {
472         if (efname)
473             syslog(LOG_ERR, "IOERROR: reading binary file in spool (%s): %m",
474                    efname);
475         else
476             syslog(LOG_ERR, "IOERROR: reading binary file in spool: %m");
477         return IMAP_IOERROR;
478     }
479 
480     if (!*body) *body = (struct body *) xzmalloc(sizeof(struct body));
481     message_parse_body(&msg, *body,
482                        DEFAULT_CONTENT_TYPE, NULL, efname);
483 
484     (*body)->filesize = msg.len;
485 
486     message_guid_generate(&(*body)->guid, msg.base, msg.len);
487 
488     lseek(fd, 0L, SEEK_SET);
489     n = retry_write(fd, msg.base, msg.len);
490 
491     free((char*) msg.base);
492 
493     if (n != msg.len || fsync(fd)) {
494         if (efname)
495             syslog(LOG_ERR, "IOERROR: rewriting binary file in spool (%s): %m",
496                    efname);
497         else
498             syslog(LOG_ERR, "IOERROR: rewriting binary file in spool: %m");
499         return IMAP_IOERROR;
500     }
501 
502     return 0;
503 }
504 
505 /*
506  * Parse the message at 'msg_base' of length 'msg_len'.
507  */
message_parse_mapped(const char * msg_base,unsigned long msg_len,struct body * body,const char * efname)508 EXPORTED int message_parse_mapped(const char *msg_base, unsigned long msg_len,
509                                   struct body *body, const char *efname)
510 {
511     struct msg msg;
512 
513     msg.base = msg_base;
514     msg.len = msg_len;
515     msg.offset = 0;
516     msg.encode = 0;
517 
518     message_parse_body(&msg, body, DEFAULT_CONTENT_TYPE, NULL, efname);
519 
520     body->filesize = msg_len;
521 
522     message_guid_generate(&body->guid, msg_base, msg_len);
523 
524     if (body->filesize != body->header_size + body->content_size) {
525         if (efname)
526             syslog(LOG_NOTICE, "IOERROR: size mismatch on parse %s (%s) (%d, %d)",
527                    message_guid_encode(&body->guid), efname,
528                    (int)body->filesize,
529                    (int)(body->header_size + body->content_size));
530         else
531             syslog(LOG_NOTICE, "IOERROR: size mismatch on parse %s (%d, %d)",
532                    message_guid_encode(&body->guid), (int)body->filesize,
533                    (int)(body->header_size + body->content_size));
534     }
535 
536     return 0;
537 }
538 
539 /*
540  * Prune the header section in buf to include only those headers
541  * listed in headers or (if headers_not is non-empty) those headers
542  * not in headers_not.
543  */
message_pruneheader(char * buf,const strarray_t * headers,const strarray_t * headers_not)544 HIDDEN void message_pruneheader(char *buf, const strarray_t *headers,
545                          const strarray_t *headers_not)
546 {
547     char *p, *colon, *nextheader;
548     int goodheader;
549     char *endlastgood = buf;
550     char **l;
551     int count = 0;
552     int maxlines = config_getint(IMAPOPT_MAXHEADERLINES);
553 
554     p = buf;
555     while (*p && *p != '\r') {
556         colon = strchr(p, ':');
557         if (colon && headers_not && headers_not->count) {
558             goodheader = 1;
559             for (l = headers_not->data ; *l ; l++) {
560                 if ((size_t) (colon - p) == strlen(*l) &&
561                     !strncasecmp(p, *l, colon - p)) {
562                     goodheader = 0;
563                     break;
564                 }
565             }
566         } else {
567             goodheader = 0;
568         }
569         if (colon && headers && headers->count) {
570             for (l = headers->data ; *l ; l++) {
571                 if ((size_t) (colon - p) == strlen(*l) &&
572                     !strncasecmp(p, *l, colon - p)) {
573                     goodheader = 1;
574                     break;
575                 }
576             }
577         }
578 
579         nextheader = p;
580         do {
581             nextheader = strchr(nextheader, '\n');
582             if (nextheader) nextheader++;
583             else nextheader = p + strlen(p);
584         } while (*nextheader == ' ' || *nextheader == '\t');
585 
586         if (goodheader) {
587             if (endlastgood != p) {
588                 /* memmove and not strcpy since this is all within a
589                  * single buffer */
590                 memmove(endlastgood, p, strlen(p) + 1);
591                 nextheader -= p - endlastgood;
592             }
593             endlastgood = nextheader;
594         }
595         p = nextheader;
596 
597         /* stop giant headers causing massive loops */
598         if (maxlines) {
599             count++;
600             if (count > maxlines) break;
601         }
602     }
603 
604     *endlastgood = '\0';
605 }
606 
message_find_part(struct body * body,const char * section,const char ** content_types,const char * msg_base,unsigned long msg_len,struct bodypart *** parts,int * n)607 static void message_find_part(struct body *body, const char *section,
608                               const char **content_types,
609                               const char *msg_base, unsigned long msg_len,
610                               struct bodypart ***parts, int *n)
611 {
612     int match;
613     const char **type;
614     char nextsection[128];
615 
616     for (match = 0, type = content_types; !match && *type; type++) {
617         const char *subtype = strchr(*type, '/');
618         size_t tlen = subtype ? (size_t) (subtype++ - *type) : strlen(*type);
619 
620         if ((!(*type)[0] || (tlen == strlen(body->type) &&
621                              !strncasecmp(body->type, *type, tlen))) &&
622             (!subtype || !subtype[0] || !strcasecmp(body->subtype, subtype))) {
623             match = 1;
624         }
625     }
626 
627     if (match) {
628         /* matching part, sanity check the size against the mmap'd file */
629         if (body->content_offset + body->content_size > msg_len) {
630             syslog(LOG_ERR, "IOERROR: body part exceeds size of message file");
631             fatal("body part exceeds size of message file", EX_OSFILE);
632         }
633 
634         if (!body->decoded_body) {
635             int encoding;
636             charset_t charset = CHARSET_UNKNOWN_CHARSET;
637             message_parse_charset(body, &encoding, &charset);
638             if (charset == CHARSET_UNKNOWN_CHARSET)
639                 /* try ASCII */
640                 charset = charset_lookupname("us-ascii");
641             body->decoded_body = charset_to_utf8(
642                 msg_base + body->content_offset, body->content_size,
643                 charset, encoding); /* returns a cstring */
644             charset_free(&charset);
645         }
646 
647         /* grow the array and add the new part */
648         *parts = xrealloc(*parts, (*n+2)*sizeof(struct bodypart *));
649         (*parts)[*n] = xzmalloc(sizeof(struct bodypart));
650         strlcpy((*parts)[*n]->section, section, sizeof((*parts)[*n]->section));
651         (*parts)[*n]->decoded_body = body->decoded_body;
652         (*parts)[++(*n)] = NULL;
653     }
654     else if (!strcmp(body->type, "MULTIPART")) {
655         int i;
656 
657         for (i = 0; i < body->numparts; i++) {
658             snprintf(nextsection, sizeof(nextsection), "%s.%d", section, i+1);
659             message_find_part(&body->subpart[i], nextsection, content_types,
660                               msg_base, msg_len, parts, n);
661         }
662     }
663     else if (!strcmp(body->type, "MESSAGE") &&
664              !strcmp(body->subtype, "RFC822")) {
665         snprintf(nextsection, sizeof(nextsection), "%s.1", section);
666         message_find_part(body->subpart, nextsection, content_types,
667                           msg_base, msg_len, parts, n);
668     }
669 }
670 
671 /*
672  * Fetch the bodypart(s) which match the given content_type and return
673  * them as an allocated array.
674  *
675  * The caller MUST free the array of allocated bodypart(s).
676  */
message_fetch_part(struct message_content * msg,const char ** content_types,struct bodypart *** parts)677 EXPORTED void message_fetch_part(struct message_content *msg,
678                                  const char **content_types,
679                                  struct bodypart ***parts)
680 {
681     int n = 0;  /* running count of the number of matching parts */
682 
683     *parts = NULL;
684     message_find_part(msg->body, "1", content_types,
685                       buf_base(&msg->map), buf_len(&msg->map), parts, &n);
686 }
687 
688 /*
689  * Appends the message's cache information to the cache file
690  * and fills in appropriate information in the index record pointed to
691  * by 'record'.
692  */
message_create_record(struct index_record * record,const struct body * body)693 HIDDEN int message_create_record(struct index_record *record,
694                           const struct body *body)
695 {
696     /* used for sent time searching, truncated to day with no TZ */
697     if (time_from_rfc5322(body->date, &record->sentdate, DATETIME_DATE_ONLY) < 0)
698         record->sentdate = 0;
699 
700     /* used for sent time sorting, full gmtime of Date: header */
701     if (time_from_rfc5322(body->date, &record->gmtime, DATETIME_FULL) < 0)
702         record->gmtime = 0;
703 
704     record->size = body->filesize;
705     record->header_size = body->header_size;
706     message_guid_copy(&record->guid, &body->guid);
707 
708     message_write_cache(record, body);
709 
710     return 0;
711 }
712 
713 static enum rfc822_header
message_header_lookup(const char * buf,const char ** valp)714 message_header_lookup(const char *buf, const char **valp)
715 {
716     unsigned int len = strcspn(buf, ":\r\n");
717     if (buf[len] != ':')
718         return RFC822_BAD;
719     if (valp)
720         *valp = buf+len+1;
721     return rfc822_header_from_string_len(buf, len);
722 }
723 
724 
body_add_content_guid(const char * base,struct body * body)725 static void body_add_content_guid(const char *base, struct body *body)
726 {
727     int encoding = ENCODING_NONE;
728     char *decbuf = NULL;
729     charset_t cs = NULL;
730     size_t len = body->content_size;
731     message_parse_charset(body, &encoding, &cs);
732     base = charset_decode_mimebody(base, len, encoding, &decbuf, &len);
733     if (base) {
734         message_guid_generate(&body->content_guid, base, len);
735         body->decoded_content_size = len;
736     }
737     else {
738         message_guid_set_null(&body->content_guid);
739         body->decoded_content_size = 0;
740     }
741     charset_free(&cs);
742     free(decbuf);
743 }
744 
745 
746 /*
747  * Parse a body-part
748  */
message_parse_body(struct msg * msg,struct body * body,const char * defaultContentType,strarray_t * boundaries,const char * efname)749 static int message_parse_body(struct msg *msg, struct body *body,
750                               const char *defaultContentType,
751                               strarray_t *boundaries,
752                               const char *efname)
753 {
754     strarray_t newboundaries = STRARRAY_INITIALIZER;
755     int sawboundary;
756 
757     memset(body, 0, sizeof(struct body));
758 
759     /* No passed-in boundary structure, create a new, empty one */
760     if (!boundaries) {
761         boundaries = &newboundaries;
762         /* We're at top-level--preallocate space to store cached headers */
763         buf_ensure(&body->cacheheaders, 1024);
764     }
765 
766 
767     sawboundary = message_parse_headers(msg, body, defaultContentType,
768                                         boundaries, efname);
769 
770     /* Charset id and encoding id are stored in the binary
771      * bodystructure, but we don't have that one here. */
772     struct param *param = body->params;
773     while (param) {
774         if (!strcasecmp(param->attribute, "CHARSET")) {
775             body->charset_id = xstrdupnull(param->value);
776             break;
777         }
778         param = param->next;
779     }
780 
781     body->charset_enc = encoding_lookupname(body->encoding);
782 
783     /* Recurse according to type */
784     if (strcmp(body->type, "MULTIPART") == 0) {
785         if (!sawboundary) {
786             message_parse_multipart(msg, body, boundaries, efname);
787         }
788     }
789     else if (strcmp(body->type, "MESSAGE") == 0 &&
790         strcmp(body->subtype, "RFC822") == 0) {
791         const char *base = msg->base + msg->offset;
792         body->subpart = (struct body *)xzmalloc(sizeof(struct body));
793 
794         if (sawboundary) {
795             memset(body->subpart, 0, sizeof(struct body));
796             message_parse_bodytype(DEFAULT_CONTENT_TYPE, body->subpart);
797         }
798         else {
799             message_parse_body(msg, body->subpart,
800                                DEFAULT_CONTENT_TYPE, boundaries, efname);
801 
802             /* Calculate our size/lines information */
803             body->content_size = body->subpart->header_size +
804               body->subpart->content_size;
805             body->content_lines = body->subpart->header_lines +
806               body->subpart->content_lines;
807 
808             /* Move any enclosing boundary information up to our level */
809             body->boundary_size = body->subpart->boundary_size;
810             body->boundary_lines = body->subpart->boundary_lines;
811 
812             /* it's nice to have a GUID for the message/rfc822 itself */
813             body_add_content_guid(base, body);
814         }
815     }
816     else {
817         if (!sawboundary) {
818             message_parse_content(msg, body, boundaries, efname);
819         }
820     }
821 
822     /* Free up boundary storage if necessary */
823     strarray_fini(&newboundaries);
824 
825     return 0;
826 }
827 
828 /*
829  * Parse the headers of a body-part
830  */
message_parse_headers(struct msg * msg,struct body * body,const char * defaultContentType,strarray_t * boundaries,const char * efname)831 static int message_parse_headers(struct msg *msg, struct body *body,
832                                  const char *defaultContentType,
833                                  strarray_t *boundaries,
834                                  const char *efname)
835 {
836     struct buf headers = BUF_INITIALIZER;
837     char *next;
838     int len;
839     int sawboundary = 0;
840     uint32_t maxlines = config_getint(IMAPOPT_MAXHEADERLINES);
841     int have_max = 0;
842     const char *value;
843 
844     body->header_offset = msg->offset;
845 
846     buf_putc(&headers, '\n');   /* Leading newline to prime the pump */
847 
848     /* Slurp up all of the headers into 'headers' */
849     while ((next = message_getline(&headers, msg)) &&
850            (next[-1] != '\n' ||
851             (*next != '\r' || next[1] != '\n'))) {
852 
853         len = strlen(next);
854 
855         if (next[-1] == '\n' && *next == '-' &&
856             message_pendingboundary(next, len, boundaries)) {
857             body->boundary_size = len;
858             body->boundary_lines++;
859             if (next - 1 > headers.s) {
860                 body->boundary_size += 2;
861                 body->boundary_lines++;
862                 next[-2] = '\0';
863             }
864             else {
865                 *next = '\0';
866             }
867             sawboundary = 1;
868             break;
869         }
870     }
871 
872     body->content_offset = msg->offset;
873     body->header_size = strlen(headers.s+1);
874 
875     /* Scan over the slurped-up headers for interesting header information */
876     body->header_lines = -1;    /* Correct for leading newline */
877     for (next = headers.s; *next; next++) {
878         if (*next == '\n') {
879             body->header_lines++;
880 
881             /* if we're skipping, skip now */
882             if (have_max) continue;
883 
884             /* check if we've hit a limit and flag it */
885             if (maxlines && body->header_lines > maxlines) {
886                 if (efname)
887                     syslog(LOG_ERR, "ERROR: message (%s) has more than %d header lines "
888                                     "not caching any more",
889                            efname, maxlines);
890                 else
891                     syslog(LOG_ERR, "ERROR: message has more than %d header lines "
892                                     "not caching any more",
893                            maxlines);
894                 have_max = 1;
895                 continue;
896             }
897 
898             if (/* space preallocated, i.e. must be top-level body */
899                 body->cacheheaders.s &&
900                 /* this is not a continuation line */
901                 (next[1] != ' ') && (next[1] != '\t') &&
902                 /* this header is supposed to be cached */
903                 mailbox_cached_header_inline(next+1) != BIT32_MAX) {
904                     /* append to the headers cache */
905                     message_parse_header(next+1, &body->cacheheaders);
906             }
907 
908             switch (message_header_lookup(next+1, &value)) {
909             case RFC822_BCC:
910                 message_parse_address(value, &body->bcc);
911                 break;
912             case RFC822_CC:
913                 message_parse_address(value, &body->cc);
914                 break;
915             case RFC822_CONTENT_DESCRIPTION:
916                 message_parse_string(value, &body->description);
917                 break;
918             case RFC822_CONTENT_DISPOSITION:
919                 message_parse_bodydisposition(value, body);
920                 break;
921             case RFC822_CONTENT_ID:
922                 message_parse_string(value, &body->id);
923                 break;
924             case RFC822_CONTENT_LANGUAGE:
925                 message_parse_language(value, &body->language);
926                 break;
927             case RFC822_CONTENT_LOCATION:
928                 message_parse_string(value, &body->location);
929                 break;
930             case RFC822_CONTENT_MD5:
931                 message_parse_string(value, &body->md5);
932                 break;
933             case RFC822_CONTENT_TRANSFER_ENCODING:
934                 message_parse_encoding(value, &body->encoding);
935 
936                 /* If we're encoding binary, replace "binary"
937                    with "base64" in CTE header body */
938                 if (msg->encode &&
939                     !strcmpsafe(body->encoding, "BINARY")) {
940                     char *p = (char*)
941                         stristr(msg->base + body->header_offset +
942                                 (next - headers.s) + 27,
943                                 "binary");
944                     memcpy(p, "base64", 6);
945                 }
946                 break;
947             case RFC822_CONTENT_TYPE:
948                 message_parse_bodytype(value, body);
949                 break;
950             case RFC822_DATE:
951                 message_parse_string(value, &body->date);
952                 break;
953             case RFC822_FROM:
954                 message_parse_address(value, &body->from);
955                 break;
956             case RFC822_IN_REPLY_TO:
957                 message_parse_string(value, &body->in_reply_to);
958                 break;
959             case RFC822_MESSAGE_ID:
960                 message_parse_string(value, &body->message_id);
961                 break;
962             case RFC822_REPLY_TO:
963                 message_parse_address(value, &body->reply_to);
964                 break;
965             case RFC822_RECEIVED:
966                 message_parse_received_date(value, &body->received_date);
967                 break;
968             case RFC822_REFERENCES:
969                 message_parse_string(value, &body->references);
970                 break;
971             case RFC822_SUBJECT:
972                 message_parse_string(value, &body->subject);
973                 break;
974             case RFC822_SENDER:
975                 message_parse_address(value, &body->sender);
976                 break;
977             case RFC822_TO:
978                 message_parse_address(value, &body->to);
979                 break;
980             case RFC822_X_DELIVEREDINTERNALDATE:
981                 /* Explicit x-deliveredinternaldate overrides received: headers */
982                 message_parse_string(value, &body->x_deliveredinternaldate);
983                 break;
984             case RFC822_X_ME_MESSAGE_ID:
985                 message_parse_string(value, &body->x_me_message_id);
986                 break;
987             default:
988                 break;
989             } /* switch() */
990         } /* if (*next == '\n') */
991     }
992 
993     /* If didn't find Content-Type: header, use the passed-in default type */
994     if (!body->type) {
995         message_parse_bodytype(defaultContentType, body);
996     }
997     buf_free(&headers);
998     return sawboundary;
999 }
1000 
1001 /*
1002  * Parse a list of RFC 822 addresses from a header
1003  */
message_parse_address(const char * hdr,struct address ** addrp)1004 static void message_parse_address(const char *hdr, struct address **addrp)
1005 {
1006     char *hdrend, hdrendchar = '\0';
1007 
1008     /* If we saw this header already, discard the earlier value */
1009     if (*addrp) {
1010         parseaddr_free(*addrp);
1011         *addrp = NULL;
1012     }
1013 
1014     /* Find end of header */
1015     hdrend = (char *)hdr;
1016     do {
1017         hdrend = strchr(hdrend+1, '\n');
1018     } while (hdrend && (hdrend[1] == ' ' || hdrend[1] == '\t'));
1019 
1020     /* Put a NUL character at the end of header */
1021     /* gnb:TODO this is evil and should be stopped */
1022     if (hdrend) {
1023         if (hdrend > hdr && hdrend[-1] == '\r') hdrend--;
1024         hdrendchar = *hdrend;
1025         *hdrend = '\0';
1026     }
1027 
1028     parseaddr_list(hdr, addrp);
1029 
1030     /* Put character at end of header back */
1031     if (hdrend) *hdrend = hdrendchar;
1032 }
1033 
1034 /*
1035  * Parse a Content-Transfer-Encoding from a header.
1036  */
message_parse_encoding(const char * hdr,char ** hdrp)1037 static void message_parse_encoding(const char *hdr, char **hdrp)
1038 {
1039     int len;
1040     const char *p;
1041 
1042     /* If we saw this header already, discard the earlier value */
1043     if (*hdrp) {
1044         free(*hdrp);
1045         *hdrp = NULL;
1046     }
1047 
1048     /* Skip leading whitespace, ignore header if blank */
1049     message_parse_rfc822space(&hdr);
1050     if (!hdr) return;
1051 
1052     /* Find end of encoding token */
1053     for (p = hdr; *p && !Uisspace(*p) && *p != '('; p++) {
1054         if (*p < ' ' || strchr(MIME_TSPECIALS, *p)) return;
1055     }
1056     len = p - hdr;
1057 
1058     /* Skip trailing whitespace, ignore header if trailing garbage */
1059     message_parse_rfc822space(&p);
1060     if (p) return;
1061 
1062     /* Save encoding token */
1063     *hdrp = message_ucase(xstrndup(hdr, len));
1064 }
1065 
1066 /*
1067  * parse a charset and encoding out of a body structure
1068  */
message_parse_charset(const struct body * body,int * e_ptr,charset_t * c_ptr)1069 static void message_parse_charset(const struct body *body,
1070                                   int *e_ptr, charset_t *c_ptr)
1071 {
1072 
1073     int encoding = ENCODING_NONE;
1074     charset_t charset = charset_lookupname("us-ascii");
1075     struct param *param;
1076 
1077 
1078     if (body->encoding) {
1079         switch (body->encoding[0]) {
1080         case '7':
1081         case '8':
1082             if (!strcmp(body->encoding+1, "BIT"))
1083                 encoding = ENCODING_NONE;
1084             else
1085                 encoding = ENCODING_UNKNOWN;
1086             break;
1087 
1088         case 'B':
1089             if (!strcmp(body->encoding, "BASE64"))
1090                 encoding = ENCODING_BASE64;
1091             else if (!strcmp(body->encoding, "BINARY"))
1092                 encoding = ENCODING_NONE;
1093             else
1094                 encoding = ENCODING_UNKNOWN;
1095             break;
1096 
1097         case 'Q':
1098             if (!strcmp(body->encoding, "QUOTED-PRINTABLE"))
1099                 encoding = ENCODING_QP;
1100             else
1101                 encoding = ENCODING_UNKNOWN;
1102             break;
1103 
1104         default:
1105             encoding = ENCODING_UNKNOWN;
1106         }
1107     }
1108 
1109     if (!body->type || !strcmp(body->type, "TEXT")) {
1110         for (param = body->params; param; param = param->next) {
1111             if (!strcasecmp(param->attribute, "charset")) {
1112                 if (param->value && *param->value) {
1113                     charset_free(&charset);
1114                     charset = charset_lookupname(param->value);
1115                     if (charset == CHARSET_UNKNOWN_CHARSET)
1116                         syslog(LOG_NOTICE, "message_parse_charset: unknown charset %s for text/%s", param->value, body->subtype);
1117                 }
1118                 break;
1119             }
1120         }
1121     }
1122     else if (!strcmp(body->type, "MESSAGE")) {
1123         if (!strcmp(body->subtype, "RFC822")) {
1124             charset_free(&charset);
1125             charset = CHARSET_UNKNOWN_CHARSET;
1126         }
1127         encoding = ENCODING_NONE;
1128     }
1129     else {
1130         charset_free(&charset);
1131         charset = CHARSET_UNKNOWN_CHARSET;
1132     }
1133 
1134     if (e_ptr) *e_ptr = encoding;
1135     if (c_ptr) *c_ptr = charset;
1136     else charset_free(&charset);
1137 }
1138 
1139 /*
1140  * Parse an uninterpreted header
1141  */
message_parse_string(const char * hdr,char ** hdrp)1142 EXPORTED void message_parse_string(const char *hdr, char **hdrp)
1143 {
1144     const char *hdrend;
1145     char *he;
1146 
1147     /* If we saw this header already, discard the earlier value */
1148     if (*hdrp) {
1149         free(*hdrp);
1150         *hdrp = NULL;
1151     }
1152 
1153     /* Skip initial whitespace */
1154     while (*hdr == ' ' || *hdr == '\t') hdr++;
1155 
1156     /* Find end of header */
1157     hdrend = hdr;
1158     do {
1159         hdrend = strchr(hdrend+1, '\n');
1160     } while (hdrend && (hdrend[1] == ' ' || hdrend[1] == '\t'));
1161     if (hdrend) {
1162         if (hdrend > hdr && hdrend[-1] == '\r') hdrend--;
1163     }
1164     else {
1165         hdrend = hdr + strlen(hdr);
1166     }
1167 
1168     /* Save header value */
1169     *hdrp = xstrndup(hdr, (hdrend - hdr));
1170 
1171     /* Un-fold header (overlapping buffers, use memmove) */
1172     he = *hdrp;
1173     while ((he = strchr(he, '\n'))!=NULL) {
1174         if (he > *hdrp && he[-1] == '\r') {
1175             he--;
1176             memmove(he, he+2, strlen(he+2)+1);
1177         }
1178         else {
1179             memmove(he, he+1, strlen(he+1)+1);
1180         }
1181     }
1182 }
1183 
1184 /*
1185  * Cache a header
1186  */
1187 static void
message_parse_header(const char * hdr,struct buf * buf)1188 message_parse_header(const char *hdr, struct buf *buf)
1189 {
1190     int len;
1191     const char *hdrend;
1192 
1193     /* Find end of header */
1194     hdrend = hdr;
1195     do {
1196         hdrend = strchr(hdrend+1, '\n');
1197     } while (hdrend && (hdrend[1] == ' ' || hdrend[1] == '\t'));
1198     if (hdrend) {
1199         if (hdrend > hdr && hdrend[-1] == '\r') hdrend--;
1200     }
1201     else {
1202         hdrend = hdr + strlen(hdr);
1203     }
1204 
1205     /* Save header value */
1206     len = hdrend - hdr;
1207     buf_appendmap(buf, hdr, len);
1208     buf_putc(buf, '\r');
1209     buf_putc(buf, '\n');
1210 }
1211 
1212 /*
1213  * Parse a Content-Type from a header.
1214  */
message_parse_type(const char * hdr,char ** typep,char ** subtypep,struct param ** paramp)1215 EXPORTED void message_parse_type(const char *hdr, char **typep, char **subtypep, struct param **paramp)
1216 {
1217     const char *type;
1218     int typelen;
1219     const char *subtype;
1220     int subtypelen;
1221     char *decbuf = NULL;
1222 
1223     /* Skip leading whitespace, ignore header if blank */
1224     message_parse_rfc822space(&hdr);
1225     if (!hdr) return;
1226 
1227     /* Very old versions of macOS Mail.app encode the Content-Type header
1228      * in MIME words, if the attachment name contains non-ASCII characters */
1229     if (strlen(hdr) > 2 && hdr[0] == '=' && hdr[1] == '?') {
1230         int flags = CHARSET_KEEPCASE;
1231         decbuf = charset_decode_mimeheader(hdr, flags);
1232         if (strcmpsafe(decbuf, hdr)) hdr = decbuf;
1233     }
1234 
1235     /* Find end of type token */
1236     type = hdr;
1237     for (; *hdr && !Uisspace(*hdr) && *hdr != '/' && *hdr != '('; hdr++) {
1238         if (*hdr < ' ' || strchr(MIME_TSPECIALS, *hdr)) goto done;
1239     }
1240     typelen = hdr - type;
1241 
1242     /* Skip whitespace after type */
1243     message_parse_rfc822space(&hdr);
1244     if (!hdr) goto done;
1245 
1246     /* Ignore header if no '/' character */
1247     if (*hdr++ != '/') goto done;
1248 
1249     /* Skip whitespace before subtype, ignore header if no subtype */
1250     message_parse_rfc822space(&hdr);
1251     if (!hdr) return;
1252 
1253     /* Find end of subtype token */
1254     subtype = hdr;
1255     for (; *hdr && !Uisspace(*hdr) && *hdr != ';' && *hdr != '('; hdr++) {
1256         if (*hdr < ' ' || strchr(MIME_TSPECIALS, *hdr)) goto done;
1257     }
1258     subtypelen = hdr - subtype;
1259 
1260     /* Skip whitespace after subtype */
1261     message_parse_rfc822space(&hdr);
1262 
1263     /* Ignore header if not at end of header or parameter delimiter */
1264     if (hdr && *hdr != ';') goto done;
1265 
1266     /* Save content type & subtype */
1267     *typep = message_ucase(xstrndup(type, typelen));
1268     *subtypep = message_ucase(xstrndup(subtype, subtypelen));
1269 
1270     /* Parse parameter list */
1271     if (hdr) {
1272         message_parse_params(hdr+1, paramp);
1273         message_fold_params(paramp);
1274         if (decbuf && paramp && *paramp) {
1275             /* The type header was erroneously encoded as a RFC 2407 encoded word
1276              * (rather than encoding its attributes), and the parameter values
1277              * might now contain non-ASCII characters. Let's reencode them. */
1278             struct param *param = *paramp;
1279             for (; param; param = param->next) {
1280                 const char *attr = param->attribute;
1281                 /* Skip extended parameters */
1282                 size_t attrlen = strlen(attr);
1283                 if (!attrlen || attr[attrlen-1] == '*') continue;
1284                 /* Check if the parameter value has non-ASCII characters */
1285                 int has_highbit = 0;
1286                 const char *val = param->value;
1287                 for (val = param->value; *val && !has_highbit; val++) {
1288                     has_highbit = *val & 0x80;
1289                 }
1290                 if (!has_highbit) continue;
1291                 /* Reencode the parameter value */
1292                 char *encvalue = charset_encode_mimeheader(param->value, strlen(param->value), 0);
1293                 if (encvalue) {
1294                     free(param->value);
1295                     param->value = encvalue;
1296                 }
1297             }
1298         }
1299     }
1300 
1301 done:
1302     free(decbuf);
1303 }
1304 
message_parse_bodytype(const char * hdr,struct body * body)1305 static void message_parse_bodytype(const char *hdr, struct body *body)
1306 {
1307     /* If we saw this header already, discard the earlier value */
1308     if (body->type) {
1309         free(body->type);
1310         free(body->subtype);
1311         body->type = body->subtype = NULL;
1312         param_free(&body->params);
1313     }
1314 
1315     message_parse_type(hdr, &body->type, &body->subtype, &body->params);
1316 }
1317 
1318 /*
1319  * Parse a Content-Disposition from a header.
1320  */
message_parse_disposition(const char * hdr,char ** hdrp,struct param ** paramp)1321 EXPORTED void message_parse_disposition(const char *hdr, char **hdrp, struct param **paramp)
1322 {
1323     const char *disposition;
1324     int dispositionlen;
1325 
1326     /* Skip leading whitespace, ignore header if blank */
1327     message_parse_rfc822space(&hdr);
1328     if (!hdr) return;
1329 
1330     /* Find end of disposition token */
1331     disposition = hdr;
1332     for (; *hdr && !Uisspace(*hdr) && *hdr != ';' && *hdr != '('; hdr++) {
1333         if (*hdr < ' ' || strchr(MIME_TSPECIALS, *hdr)) return;
1334     }
1335     dispositionlen = hdr - disposition;
1336 
1337     /* Skip whitespace after type */
1338     message_parse_rfc822space(&hdr);
1339 
1340     /* Ignore header if not at end of header or parameter delimiter */
1341     if (hdr && *hdr != ';') return;
1342 
1343     /* Save content disposition */
1344     *hdrp = message_ucase(xstrndup(disposition, dispositionlen));
1345 
1346     /* Parse parameter list */
1347     if (hdr) {
1348         message_parse_params(hdr+1, paramp);
1349         message_fold_params(paramp);
1350     }
1351 }
1352 
1353 /*
1354  * Parse a Content-Disposition from a header.
1355  */
message_parse_bodydisposition(const char * hdr,struct body * body)1356 static void message_parse_bodydisposition(const char *hdr, struct body *body)
1357 {
1358     /* If we saw this header already, discard the earlier value */
1359     if (body->disposition) {
1360         free(body->disposition);
1361         body->disposition = NULL;
1362         param_free(&body->disposition_params);
1363     }
1364 
1365     message_parse_disposition(hdr, &body->disposition, &body->disposition_params);
1366 }
1367 
1368 /*
1369  * Parse a parameter list from a header.
1370  *
1371  * 'hdr' points into the message, and is not expected to
1372  * be nul-terminated.  Handles continuation headers.
1373  *
1374  * Malformed parameters are handled by skipping to the
1375  * next ';' or end of line, which should mark the next
1376  * parameter.
1377  */
message_parse_params(const char * hdr,struct param ** paramp)1378 static void message_parse_params(const char *hdr, struct param **paramp)
1379 {
1380     struct param *param;
1381     const char *attribute;
1382     int attributelen;
1383     const char *value;
1384     int valuelen;
1385     char *p;
1386 
1387     for (;;) {
1388         /* Skip over leading whitespace */
1389         message_parse_rfc822space(&hdr);
1390         if (!hdr) return;
1391 
1392         /* Find end of attribute */
1393         attribute = hdr;
1394         for (; *hdr && !Uisspace(*hdr) && *hdr != '=' && *hdr != '('; hdr++) {
1395             if (*hdr < ' ' || strchr(MIME_TSPECIALS, *hdr)) goto skip;
1396         }
1397         attributelen = hdr - attribute;
1398 
1399         /* Skip whitespace after attribute */
1400         message_parse_rfc822space(&hdr);
1401         if (!hdr) return;
1402 
1403         /* Ignore param if no '=' character */
1404         if (*hdr++ != '=') goto skip;
1405 
1406         /* Skip whitespace before value */
1407         message_parse_rfc822space(&hdr);
1408         if (!hdr) return;
1409 
1410         /* Find end of value */
1411         value = hdr;
1412         if (*hdr == '\"') {
1413             /* Parse quoted-string */
1414             hdr++;
1415             while (*hdr && *hdr != '\"') {
1416                 if (*hdr == '\\') {
1417                     hdr++;
1418                     if (!*hdr) return;
1419                 }
1420                 if (*hdr == '\r') {
1421                     /* check for continuation headers */
1422                     if (hdr[1] == '\n' && (hdr[2] == ' ' || hdr[2] == '\t')) hdr += 2;
1423                     else return;    /* end of header field */
1424                 }
1425                 hdr++;
1426             }
1427             if (!*hdr++) return;
1428         }
1429         else {
1430             /* Parse token (leniently allow space and tspecials) */
1431             const char *endval = hdr;
1432             while (*hdr && *hdr != ';' && *hdr != '(') {
1433                 if (*hdr == '\r') {
1434                     /* Skip FWS and stop at CRLF */
1435                     if (hdr[1] == '\n' && (hdr[2] == ' ' || hdr[2] == '\t')) {
1436                         hdr += 2;
1437                         continue;
1438                     }
1439                     else break;
1440                 }
1441                 if (*hdr & 0x80) {
1442                     /* Allow unencoded non-ASCII characters */
1443                     /* XXX  We should probably make sure this is valid UTF-8 */
1444                 }
1445                 else if (*hdr < ' ' && *hdr != '\t') {
1446                     /* Reject control characters */
1447                     goto skip;
1448                 }
1449                 if (*hdr != ' ' && *hdr != '\t') {
1450                     /* Keep last non-WSP position */
1451                     endval = hdr;
1452                 }
1453                 hdr++;
1454             }
1455             /* Right-strip white space */
1456             hdr = endval + 1;
1457         }
1458         valuelen = hdr - value;
1459 
1460         /* Skip whitespace after value */
1461         message_parse_rfc822space(&hdr);
1462 
1463         /* Ignore parameter if not at end of header or parameter delimiter */
1464         if (hdr && *hdr++ != ';') {
1465 skip:
1466             hdr += strcspn(hdr, ";\r\n");
1467             if (*hdr == ';') hdr++;
1468             continue;
1469         }
1470 
1471         /* Save attribute/value pair */
1472         *paramp = param = (struct param *)xzmalloc(sizeof(struct param));
1473         param->attribute = message_ucase(xstrndup(attribute, attributelen));
1474         param->value = xzmalloc(valuelen + 1);  /* xzmalloc for trailing NUL */
1475         if (*value == '\"') {
1476             p = param->value;
1477             value++;
1478             while (*value != '\"') {
1479                 if (*value == '\\') value++;
1480                 else if (*value == '\r') value += 2;
1481                 *p++ = *value++;
1482             }
1483             *p = '\0';
1484         }
1485         else {
1486             memcpy(param->value, value, valuelen);
1487         }
1488 
1489         /* Get ready to parse the next parameter */
1490         paramp = &param->next;
1491     }
1492 }
1493 
1494 /*
1495  * Decode RFC 2231 parameter continuations
1496  *
1497  * Algorithm: Run down the list of parameters looking for
1498  * an attribute of the form "foo*0" or "foo*0*".  When we find
1499  * such an attribute, we look for "foo*1"/"foo*1*", "foo*2"/"foo*2*"
1500  * etc, appending each value to that of "foo*0" and then removing the
1501  * parameter we just appended from the list.  When appending values,
1502  * if either parameter has extended syntax, we have to convert the other
1503  * value from simple to extended syntax.  At the end, we change the name
1504  * of "foo*0"/"foo*0*" to either "foo" or "foo*", depending on whether
1505  * the value has extended syntax or not.
1506  */
message_fold_params(struct param ** params)1507 static void message_fold_params(struct param **params)
1508 {
1509     struct param *thisparam;    /* The "foo*1" param we're folding */
1510     struct param **continuation; /* Pointer to the "foo*2" param */
1511     struct param *tmpparam;     /* Placeholder for removing "foo*2" */
1512     char *asterisk;
1513     int section;
1514     int is_extended;
1515     char sectionbuf[5];
1516     int attributelen, sectionbuflen;
1517     char *from, *to;
1518 
1519     for (thisparam = *params; thisparam; thisparam = thisparam->next) {
1520         asterisk = strchr(thisparam->attribute, '*');
1521         if (asterisk && asterisk[1] == '0' &&
1522             (!asterisk[2] || (asterisk[2] == '*' && !asterisk[3]))) {
1523             /* An initial section.  Find and collect the rest */
1524             is_extended = (asterisk[2] == '*');
1525             *asterisk = '\0';
1526             attributelen = asterisk - thisparam->attribute;
1527             section = 1;
1528             for (;;) {
1529                 if (section == 100) break;
1530                 sectionbuf[0] = '*';
1531                 if (section > 9) {
1532                     sectionbuf[1] = section/10 + '0';
1533                     sectionbuf[2] = section%10 + '0';
1534                     sectionbuf[3] = '\0';
1535                     sectionbuflen = 3;
1536                 }
1537                 else {
1538                     sectionbuf[1] = section + '0';
1539                     sectionbuf[2] = '\0';
1540                     sectionbuflen = 2;
1541                 }
1542 
1543                 /* Find the next continuation */
1544                 for (continuation = params; *continuation;
1545                      continuation = &((*continuation)->next)) {
1546                     if (!strncmp((*continuation)->attribute, thisparam->attribute,
1547                                  attributelen) &&
1548                         !strncmp((*continuation)->attribute + attributelen,
1549                                  sectionbuf, sectionbuflen) &&
1550                         ((*continuation)->attribute[attributelen+sectionbuflen] == '\0' ||
1551                          ((*continuation)->attribute[attributelen+sectionbuflen] == '*' && (*continuation)->attribute[attributelen+sectionbuflen+1] == '\0'))) {
1552                         break;
1553                     }
1554                 }
1555 
1556                 /* No more continuations to find */
1557                 if (!*continuation) break;
1558 
1559                 if ((*continuation)->attribute[attributelen+sectionbuflen] == '\0') {
1560                     /* Continuation is simple */
1561                     if (is_extended) {
1562                         /* Have to re-encode continuation value */
1563                         thisparam->value =
1564                             xrealloc(thisparam->value,
1565                                      strlen(thisparam->value) +
1566                                      3*strlen((*continuation)->value) + 1);
1567                         from = (*continuation)->value;
1568                         to = thisparam->value + strlen(thisparam->value);
1569                         while (*from) {
1570                             if (*from <= ' ' || *from >= 0x7f ||
1571                                 *from == '*' || *from == '\'' ||
1572                                 *from == '%' || strchr(MIME_TSPECIALS, *from)) {
1573                                 *to++ = '%';
1574                                 to += bin_to_hex(from, 1, to, BH_UPPER);
1575                             } else {
1576                                 *to++ = *from;
1577                             }
1578                             from++;
1579                         }
1580                         *to++ = '\0';
1581                     }
1582                     else {
1583                         thisparam->value =
1584                             xrealloc(thisparam->value,
1585                                      strlen(thisparam->value) +
1586                                      strlen((*continuation)->value) + 1);
1587                         from = (*continuation)->value;
1588                         to = thisparam->value + strlen(thisparam->value);
1589                         while ((*to++ = *from++)!= 0)
1590                             { }
1591                     }
1592                 }
1593                 else {
1594                     /* Continuation is extended */
1595                     if (is_extended) {
1596                         thisparam->value =
1597                             xrealloc(thisparam->value,
1598                                      strlen(thisparam->value) +
1599                                      strlen((*continuation)->value) + 1);
1600                         from = (*continuation)->value;
1601                         to = thisparam->value + strlen(thisparam->value);
1602                         while ((*to++ = *from++) != 0)
1603                             { }
1604                     }
1605                     else {
1606                         /* Have to re-encode thisparam value */
1607                         char *tmpvalue =
1608                             xmalloc(2 + 3*strlen(thisparam->value) +
1609                                     strlen((*continuation)->value) + 1);
1610 
1611                         from = thisparam->value;
1612                         to = tmpvalue;
1613                         *to++ = '\''; /* Unspecified charset */
1614                         *to++ = '\''; /* Unspecified language */
1615                         while (*from) {
1616                             if (*from <= ' ' || *from >= 0x7f ||
1617                                 *from == '*' || *from == '\'' ||
1618                                 *from == '%' || strchr(MIME_TSPECIALS, *from)) {
1619                                 *to++ = '%';
1620                                 to += bin_to_hex(from, 1, to, BH_UPPER);
1621                             } else {
1622                                 *to++ = *from;
1623                             }
1624                             from++;
1625                         }
1626                         from = (*continuation)->value;
1627 
1628                         while ((*to++ = *from++)!=0)
1629                             { }
1630 
1631                         free(thisparam->value);
1632                         thisparam->value = tmpvalue;
1633                         is_extended = 1;
1634                     }
1635                 }
1636 
1637                 /* Remove unneeded continuation */
1638                 free((*continuation)->attribute);
1639                 free((*continuation)->value);
1640                 tmpparam = *continuation;
1641                 *continuation = (*continuation)->next;
1642                 free(tmpparam);
1643                 section++;
1644             }
1645 
1646             /* Fix up attribute name */
1647             if (is_extended) {
1648                 asterisk[0] = '*';
1649                 asterisk[1] = '\0';
1650             } else {
1651                 asterisk[0] = '\0';
1652             }
1653         }
1654     }
1655 }
1656 
1657 
1658 /*
1659  * Parse a language list from a header
1660  */
message_parse_language(const char * hdr,struct param ** paramp)1661 static void message_parse_language(const char *hdr, struct param **paramp)
1662 {
1663     struct param *param;
1664     const char *value;
1665     int valuelen;
1666 
1667     /* If we saw this header already, discard the earlier value */
1668     if (*paramp) param_free(paramp);
1669 
1670     for (;;) {
1671         /* Skip over leading whitespace */
1672         message_parse_rfc822space(&hdr);
1673         if (!hdr) return;
1674 
1675         /* Skip whitespace before value */
1676         message_parse_rfc822space(&hdr);
1677         if (!hdr) return;
1678 
1679         /* Find end of value */
1680         value = hdr;
1681         for (; *hdr && !Uisspace(*hdr) && *hdr != ',' && *hdr != '('; hdr++) {
1682             if (*hdr != '-' && !Uisalpha((*hdr))) return;
1683         }
1684         valuelen = hdr - value;
1685 
1686         /* Skip whitespace after value */
1687         message_parse_rfc822space(&hdr);
1688 
1689         /* Ignore parameter if not at end of header or language delimiter */
1690         if (hdr && *hdr++ != ',') return;
1691 
1692         /* Save value pair */
1693         *paramp = param = (struct param *)xzmalloc(sizeof(struct param));
1694         param->value = message_ucase(xstrndup(value, valuelen));
1695 
1696         /* Get ready to parse the next parameter */
1697         paramp = &param->next;
1698     }
1699 }
1700 
1701 /*
1702  * Skip over RFC 822 whitespace and comments
1703  */
message_parse_rfc822space(const char ** s)1704 static void message_parse_rfc822space(const char **s)
1705 {
1706     const char *p = *s;
1707     int commentlevel = 0;
1708 
1709     if (!p) return;
1710     while (*p && (Uisspace(*p) || *p == '(')) {
1711         if (*p == '\n') {
1712             p++;
1713             if (*p != ' ' && *p != '\t') {
1714                 *s = 0;     /* end of header field, no continuation */
1715                 return;
1716             }
1717         }
1718         else if (*p == '(') {
1719             p++;
1720             commentlevel++;
1721             while (commentlevel) {
1722                 switch (*p) {
1723                 case '\n':
1724                     p++;
1725                     if (*p == ' ' || *p == '\t') break;
1726                     /* FALL THROUGH */
1727                 case '\0':
1728                     *s = 0;
1729                     return;
1730 
1731                 case '\\':
1732                     p++;
1733                     break;
1734 
1735                 case '(':
1736                     commentlevel++;
1737                     break;
1738 
1739                 case ')':
1740                     commentlevel--;
1741                     break;
1742                 }
1743                 p++;
1744             }
1745         }
1746         else p++;
1747     }
1748     if (*p == 0) {
1749         *s = 0;     /* embedded NUL */
1750     }
1751     else {
1752         *s = p;
1753     }
1754 }
1755 
1756 /*
1757  * Parse the content of a MIME multipart body-part
1758  */
message_parse_multipart(struct msg * msg,struct body * body,strarray_t * boundaries,const char * efname)1759 static void message_parse_multipart(struct msg *msg, struct body *body,
1760                                     strarray_t *boundaries, const char *efname)
1761 {
1762     struct body preamble, epilogue;
1763     struct param *boundary;
1764     const char *defaultContentType = DEFAULT_CONTENT_TYPE;
1765     int i, depth;
1766     int limit = config_getint(IMAPOPT_BOUNDARY_LIMIT);
1767 
1768     memset(&preamble, 0, sizeof(struct body));
1769     memset(&epilogue, 0, sizeof(struct body));
1770     if (strcmp(body->subtype, "DIGEST") == 0) {
1771         defaultContentType = "MESSAGE/RFC822";
1772     }
1773 
1774     /* Find boundary id */
1775     boundary = body->params;
1776     while (boundary &&
1777            strcmp(boundary->attribute, "BOUNDARY") != 0 &&
1778            strcmp(boundary->attribute, "BOUNDARY*") != 0) {
1779         boundary = boundary->next;
1780     }
1781 
1782     if (!boundary) {
1783         /* Invalid MIME--treat as zero-part multipart */
1784         message_parse_content(msg, body, boundaries, efname);
1785         return;
1786     }
1787 
1788     /* Add the new boundary id */
1789     char *id = NULL;
1790     if (boundary->attribute[8] == '*') {
1791         /* Decode boundary id */
1792         id = charset_parse_mimexvalue(boundary->value, NULL);
1793     }
1794     if (!id) id = xstrdup(boundary->value);
1795     strarray_appendm(boundaries, id);
1796     depth = boundaries->count;
1797 
1798     /* Parse preamble */
1799     message_parse_content(msg, &preamble, boundaries, efname);
1800 
1801     /* Parse the component body-parts */
1802     while (boundaries->count == depth &&
1803             (limit == 0 ? 1 : boundaries->count < limit)) {
1804         body->subpart = (struct body *)xrealloc((char *)body->subpart,
1805                                  (body->numparts+1)*sizeof(struct body));
1806         message_parse_body(msg, &body->subpart[body->numparts],
1807                            defaultContentType, boundaries, efname);
1808         if (msg->offset == msg->len &&
1809             body->subpart[body->numparts].boundary_size == 0) {
1810             /* hit the end of the message, therefore end all pending
1811                multiparts */
1812             strarray_truncate(boundaries, 0);
1813         }
1814         body->numparts++;
1815     }
1816 
1817     if (boundaries->count == depth-1) {
1818         /* Parse epilogue */
1819         message_parse_content(msg, &epilogue, boundaries, efname);
1820     }
1821     else if (body->numparts) {
1822         /*
1823          * We hit the boundary of an enclosing multipart while parsing
1824          * a component body-part.  Move the enclosing boundary information
1825          * up to our level.
1826          */
1827         body->boundary_size = body->subpart[body->numparts-1].boundary_size;
1828         body->boundary_lines = body->subpart[body->numparts-1].boundary_lines;
1829         body->subpart[body->numparts-1].boundary_size = 0;
1830         body->subpart[body->numparts-1].boundary_lines = 0;
1831     }
1832     else {
1833         /*
1834          * We hit the boundary of an enclosing multipart while parsing
1835          * the preamble.  Move the enclosing boundary information
1836          * up to our level.
1837          */
1838         body->boundary_size = preamble.boundary_size;
1839         body->boundary_lines = preamble.boundary_lines;
1840         preamble.boundary_size = 0;
1841         preamble.boundary_lines = 0;
1842     }
1843 
1844     /*
1845      * Calculate our size/lines information
1846      */
1847     body->content_size = preamble.content_size + preamble.boundary_size;
1848     body->content_lines = preamble.content_lines + preamble.boundary_lines;
1849     for (i=0; i< body->numparts; i++) {
1850         body->content_size += body->subpart[i].header_size +
1851           body->subpart[i].content_size +
1852           body->subpart[i].boundary_size;
1853         body->content_lines += body->subpart[i].header_lines +
1854           body->subpart[i].content_lines +
1855           body->subpart[i].boundary_lines;
1856     }
1857     body->content_size += epilogue.content_size;
1858     body->content_lines += epilogue.content_lines;
1859 
1860     /*
1861      * Move any enclosing boundary information up to our level.
1862      */
1863     body->boundary_size += epilogue.boundary_size;
1864     body->boundary_lines += epilogue.boundary_lines;
1865 
1866     /* check if we've hit a limit and flag it */
1867     if (limit && depth == limit) {
1868         if (efname)
1869             syslog(LOG_ERR, "ERROR: mime boundary limit %i exceeded, "
1870                             "not parsing anymore (%s)",
1871                    limit, efname);
1872         else
1873             syslog(LOG_ERR, "ERROR: mime boundary limit %i exceeded, "
1874                             "not parsing anymore",
1875                    limit);
1876     }
1877 }
1878 
1879 /*
1880  * Parse the content of a generic body-part
1881  */
message_parse_content(struct msg * msg,struct body * body,strarray_t * boundaries,const char * efname)1882 static void message_parse_content(struct msg *msg, struct body *body,
1883                                   strarray_t *boundaries,
1884                                   const char *efname __attribute__((unused)))
1885 {
1886     const char *line, *endline;
1887     unsigned long s_offset = msg->offset;
1888     int encode;
1889     int len;
1890 
1891     /* Should we encode a binary part? */
1892     encode = msg->encode &&
1893         body->encoding && !strcasecmp(body->encoding, "binary");
1894 
1895     while (msg->offset < msg->len) {
1896         line = msg->base + msg->offset;
1897         endline = memchr(line, '\n', msg->len - msg->offset);
1898         if (endline) {
1899             endline++;
1900         }
1901         else {
1902             endline = msg->base + msg->len;
1903         }
1904         len = endline - line;
1905         msg->offset += len;
1906 
1907         if (line[0] == '-' && line[1] == '-' &&
1908             message_pendingboundary(line, len, boundaries)) {
1909             body->boundary_size = len;
1910             body->boundary_lines++;
1911             if (body->content_lines) {
1912                 body->content_lines--;
1913                 body->boundary_lines++;
1914             }
1915             if (body->content_size > 1) {
1916                 body->content_size -= 2;
1917                 body->boundary_size += 2;
1918             }
1919             break;
1920         }
1921 
1922         body->content_size += len;
1923 
1924         /* Count the content lines, unless we're encoding
1925            (we always count blank lines) */
1926         if (endline[-1] == '\n' &&
1927             (!encode || line[0] == '\r')) {
1928             body->content_lines++;
1929         }
1930     }
1931 
1932     if (encode) {
1933         size_t b64_size;
1934         int b64_lines, delta;
1935 
1936         /* Determine encoded size */
1937         charset_encode_mimebody(NULL, body->content_size, NULL,
1938                                 &b64_size, NULL, 1 /* wrap */);
1939 
1940         delta = b64_size - body->content_size;
1941 
1942         /* Realloc buffer to accomodate encoding overhead */
1943         msg->base = xrealloc((char*) msg->base, msg->len + delta);
1944 
1945         /* Shift content and remaining data by delta */
1946         memmove((char*) msg->base + s_offset + delta, msg->base + s_offset,
1947                 msg->len - s_offset);
1948 
1949         /* Encode content into buffer at current position */
1950         charset_encode_mimebody(msg->base + s_offset + delta,
1951                                 body->content_size,
1952                                 (char*) msg->base + s_offset,
1953                                 NULL, &b64_lines, 1 /* wrap */);
1954 
1955         /* Adjust buffer position and length to account for encoding */
1956         msg->offset += delta;
1957         msg->len += delta;
1958 
1959         /* Adjust body structure to account for encoding */
1960         free(body->encoding);
1961         body->encoding = xstrdup("BASE64");
1962         body->content_size = b64_size;
1963         body->content_lines += b64_lines;
1964     }
1965 
1966     body_add_content_guid(msg->base + s_offset, body);
1967 }
1968 
message_parse_received_date(const char * hdr,char ** hdrp)1969 static void message_parse_received_date(const char *hdr, char **hdrp)
1970 {
1971   char *curp, *hdrbuf = 0;
1972 
1973   /* Ignore if we already saw one of these headers.
1974    * We want the date from the first Received header we see.
1975    */
1976   if (*hdrp) return;
1977 
1978   /* Copy header to temp buffer */
1979   message_parse_string(hdr, &hdrbuf);
1980 
1981   /* From rfc2822, 3.6.7
1982    *   received = "Received:" name-val-list ";" date-time CRLF
1983    * So scan backwards for ; and assume everything after is a date.
1984    * Failed parsing will return 0, and we'll use time() elsewhere
1985    * instead anyway
1986    */
1987   curp = hdrbuf + strlen(hdrbuf) - 1;
1988   while (curp > hdrbuf && *curp != ';')
1989     curp--;
1990 
1991   /* Didn't find ; - fill in hdrp so we don't look at next received header */
1992   if (curp == hdrbuf) {
1993     *hdrp = hdrbuf;
1994     return;
1995   }
1996 
1997   /* Found it, copy out date string part */
1998   curp++;
1999   message_parse_string(curp, hdrp);
2000   free(hdrbuf);
2001 }
2002 
2003 
2004 /*
2005  * Read a line from @msg into @buf.  Returns a pointer to the start of
2006  * the line inside @buf, or NULL at the end of @msg.
2007  */
message_getline(struct buf * buf,struct msg * msg)2008 static char *message_getline(struct buf *buf, struct msg *msg)
2009 {
2010     unsigned int oldlen = buf_len(buf);
2011     int c;
2012 
2013     while (msg->offset < msg->len) {
2014         c = msg->base[msg->offset++];
2015         buf_putc(buf, c);
2016         if (c == '\n')
2017             break;
2018     }
2019     buf_cstring(buf);
2020 
2021     if (buf_len(buf) == oldlen)
2022         return 0;
2023     return buf->s + oldlen;
2024 }
2025 
2026 
2027 /*
2028  * Return nonzero if s is an enclosing boundary delimiter.
2029  * If we hit a terminating boundary, the integer pointed to by
2030  * 'boundaryct' is modified appropriately.
2031  */
message_pendingboundary(const char * s,int slen,strarray_t * boundaries)2032 static int message_pendingboundary(const char *s, int slen,
2033                                    strarray_t *boundaries)
2034 {
2035     int i, len;
2036     int rfc2046_strict = config_getswitch(IMAPOPT_RFC2046_STRICT);
2037     const char *bbase;
2038     int blen;
2039 
2040     /* skip initial '--' */
2041     if (slen < 2) return 0;
2042     if (s[0] != '-' || s[1] != '-') return 0;
2043     bbase = s + 2;
2044     blen = slen - 2;
2045 
2046     for (i = 0; i < boundaries->count ; ++i) {
2047         len = strlen(boundaries->data[i]);
2048         /* basic sanity check and overflow protection */
2049         if (blen < len) continue;
2050 
2051         if (!strncmp(bbase, boundaries->data[i], len)) {
2052             /* trailing '--', it's the end of this part */
2053             if (blen >= len+2 && bbase[len] == '-' && bbase[len+1] == '-')
2054                 strarray_truncate(boundaries, i);
2055             else if (!rfc2046_strict && blen > len+1 &&
2056                      bbase[len] && !Uisspace(bbase[len])) {
2057                 /* Allow substring matches in the boundary.
2058                  *
2059                  * If rfc2046_strict is enabled, boundaries containing
2060                  * other boundaries as substrings will be treated as identical
2061                  * (per RFC 2046 section 5.1.1).  Note that this will
2062                  * break some messages created by Eudora 5.1 (and earlier).
2063                  */
2064                 continue;
2065             }
2066             return 1;
2067         }
2068     }
2069     return 0;
2070 }
2071 
2072 
2073 /*
2074  * Write the cache information for the message parsed to 'body'
2075  * to 'outfile'.
2076  */
message_write_cache(struct index_record * record,const struct body * body)2077 EXPORTED int message_write_cache(struct index_record *record, const struct body *body)
2078 {
2079     static struct buf cacheitem_buffer;
2080     struct buf ib[NUM_CACHE_FIELDS];
2081     struct body toplevel;
2082     char *subject;
2083     int i;
2084 
2085     /* initialise data structures */
2086     buf_reset(&cacheitem_buffer);
2087     memset(ib, 0, sizeof(ib));
2088 
2089     toplevel.type = "MESSAGE";
2090     toplevel.subtype = "RFC822";
2091     /* we cast away const because we know that we're only using
2092      * toplevel.subpart as const in message_write_section(). */
2093     toplevel.subpart = (struct body *)body;
2094 
2095     subject = charset_parse_mimeheader(body->subject, charset_flags);
2096 
2097     /* copy into bufs */
2098     message_write_envelope(&ib[CACHE_ENVELOPE], body);
2099     message_write_body(&ib[CACHE_BODYSTRUCTURE], body, 1);
2100     buf_copy(&ib[CACHE_HEADERS], &body->cacheheaders);
2101     message_write_body(&ib[CACHE_BODY], body, 0);
2102     message_write_section(&ib[CACHE_SECTION], &toplevel);
2103     message_write_searchaddr(&ib[CACHE_FROM], body->from);
2104     message_write_searchaddr(&ib[CACHE_TO], body->to);
2105     message_write_searchaddr(&ib[CACHE_CC], body->cc);
2106     message_write_searchaddr(&ib[CACHE_BCC], body->bcc);
2107     message_write_nstring(&ib[CACHE_SUBJECT], subject);
2108 
2109     free(subject);
2110 
2111     /* append the records to the buffer */
2112     for (i = 0; i < NUM_CACHE_FIELDS; i++) {
2113         record->crec.item[i].len = buf_len(&ib[i]);
2114         record->crec.item[i].offset = buf_len(&cacheitem_buffer) + sizeof(uint32_t);
2115         message_write_xdrstring(&cacheitem_buffer, &ib[i]);
2116         buf_free(&ib[i]);
2117     }
2118 
2119     /* copy the fields into the message */
2120     record->cache_offset = 0; /* calculate on write! */
2121     record->cache_version = MAILBOX_CACHE_MINOR_VERSION;
2122     record->cache_crc = crc32_buf(&cacheitem_buffer);
2123     record->crec.buf = &cacheitem_buffer;
2124     record->crec.offset = 0; /* we're at the start of the buffer */
2125     record->crec.len = buf_len(&cacheitem_buffer);
2126 
2127     return 0;
2128 }
2129 
2130 
2131 /*
2132  * Write the IMAP envelope for 'body' to 'buf'
2133  */
message_write_envelope(struct buf * buf,const struct body * body)2134 static void message_write_envelope(struct buf *buf, const struct body *body)
2135 {
2136     buf_putc(buf, '(');
2137     message_write_nstring(buf, body->date);
2138     buf_putc(buf, ' ');
2139     message_write_nstring(buf, body->subject);
2140     buf_putc(buf, ' ');
2141     message_write_address(buf, body->from);
2142     buf_putc(buf, ' ');
2143     message_write_address(buf, body->sender ? body->sender : body->from);
2144     buf_putc(buf, ' ');
2145     message_write_address(buf, body->reply_to ? body->reply_to : body->from);
2146     buf_putc(buf, ' ');
2147     message_write_address(buf, body->to);
2148     buf_putc(buf, ' ');
2149     message_write_address(buf, body->cc);
2150     buf_putc(buf, ' ');
2151     message_write_address(buf, body->bcc);
2152     buf_putc(buf, ' ');
2153     message_write_nstring(buf, body->in_reply_to);
2154     buf_putc(buf, ' ');
2155     message_write_nstring(buf, body->message_id);
2156     buf_putc(buf, ')');
2157 }
2158 
2159 /*
2160  * Write the BODY (if 'newformat' is zero) or BODYSTRUCTURE
2161  * (if 'newformat' is nonzero) for 'body' to 'buf'.
2162  */
message_write_body(struct buf * buf,const struct body * body,int newformat)2163 EXPORTED void message_write_body(struct buf *buf, const struct body *body,
2164                                  int newformat)
2165 {
2166     struct param *param;
2167 
2168     if (strcmp(body->type, "MULTIPART") == 0) {
2169         int i;
2170 
2171         /* 0-part multiparts are illegal--convert to 0-len text parts */
2172         if (body->numparts == 0) {
2173             static struct body zerotextbody;
2174 
2175             if (!zerotextbody.type) {
2176                 message_parse_bodytype(DEFAULT_CONTENT_TYPE, &zerotextbody);
2177             }
2178             message_write_body(buf, &zerotextbody, newformat);
2179             return;
2180         }
2181 
2182         /* Multipart types get a body_multipart */
2183         buf_putc(buf, '(');
2184         for (i = 0; i < body->numparts; i++) {
2185             message_write_body(buf, &body->subpart[i], newformat);
2186         }
2187         buf_putc(buf, ' ');
2188         message_write_nstring(buf, body->subtype);
2189 
2190         if (newformat) {
2191             buf_putc(buf, ' ');
2192             if ((param = body->params)!=NULL) {
2193                 buf_putc(buf, '(');
2194                 while (param) {
2195                     message_write_nstring(buf, param->attribute);
2196                     buf_putc(buf, ' ');
2197                     message_write_nstring(buf, param->value);
2198                     if ((param = param->next)!=NULL) {
2199                         buf_putc(buf, ' ');
2200                     }
2201                 }
2202                 buf_putc(buf, ')');
2203             }
2204             else message_write_nstring(buf, (char *)0);
2205             buf_putc(buf, ' ');
2206             if (body->disposition) {
2207                 buf_putc(buf, '(');
2208                 message_write_nstring(buf, body->disposition);
2209                 buf_putc(buf, ' ');
2210                 if ((param = body->disposition_params)!=NULL) {
2211                     buf_putc(buf, '(');
2212                     while (param) {
2213                         message_write_nstring(buf, param->attribute);
2214                         buf_putc(buf, ' ');
2215                         message_write_nstring(buf, param->value);
2216                         if ((param = param->next)!=NULL) {
2217                             buf_putc(buf, ' ');
2218                         }
2219                     }
2220                     buf_putc(buf, ')');
2221                 }
2222                 else message_write_nstring(buf, (char *)0);
2223                 buf_putc(buf, ')');
2224             }
2225             else {
2226                 message_write_nstring(buf, (char *)0);
2227             }
2228             buf_putc(buf, ' ');
2229             if ((param = body->language)!=NULL) {
2230                 buf_putc(buf, '(');
2231                 while (param) {
2232                     message_write_nstring(buf, param->value);
2233                     if ((param = param->next)!=NULL) {
2234                         buf_putc(buf, ' ');
2235                     }
2236                 }
2237                 buf_putc(buf, ')');
2238             }
2239             else message_write_nstring(buf, (char *)0);
2240             buf_putc(buf, ' ');
2241             message_write_nstring(buf, body->location);
2242         }
2243 
2244         buf_putc(buf, ')');
2245         return;
2246     }
2247 
2248     buf_putc(buf, '(');
2249     message_write_nstring(buf, body->type);
2250     buf_putc(buf, ' ');
2251     message_write_nstring(buf, body->subtype);
2252     buf_putc(buf, ' ');
2253 
2254     if ((param = body->params)!=NULL) {
2255         buf_putc(buf, '(');
2256         while (param) {
2257             message_write_nstring(buf, param->attribute);
2258             buf_putc(buf, ' ');
2259             message_write_nstring(buf, param->value);
2260             if ((param = param->next)!=NULL) {
2261                 buf_putc(buf, ' ');
2262             }
2263         }
2264         buf_putc(buf, ')');
2265     }
2266     else message_write_nstring(buf, (char *)0);
2267     buf_putc(buf, ' ');
2268 
2269     message_write_nstring(buf, body->id);
2270     buf_putc(buf, ' ');
2271     message_write_nstring(buf, body->description);
2272     buf_putc(buf, ' ');
2273     message_write_nstring(buf, body->encoding ? body->encoding : "7BIT");
2274     buf_putc(buf, ' ');
2275     buf_printf(buf, "%u", body->content_size);
2276 
2277     if (strcmp(body->type, "TEXT") == 0) {
2278         /* Text types get a line count */
2279         buf_putc(buf, ' ');
2280         buf_printf(buf, "%u", body->content_lines);
2281     }
2282     else if (strcmp(body->type, "MESSAGE") == 0
2283              && strcmp(body->subtype, "RFC822") == 0) {
2284         /* Message/rfc822 gets a body_msg */
2285         buf_putc(buf, ' ');
2286         message_write_envelope(buf, body->subpart);
2287         buf_putc(buf, ' ');
2288         message_write_body(buf, body->subpart, newformat);
2289         buf_putc(buf, ' ');
2290         buf_printf(buf, "%u", body->content_lines);
2291     }
2292 
2293     if (newformat) {
2294         /* Add additional fields for BODYSTRUCTURE */
2295         buf_putc(buf, ' ');
2296         message_write_nstring(buf, body->md5);
2297         buf_putc(buf, ' ');
2298         if (body->disposition) {
2299             buf_putc(buf, '(');
2300             message_write_nstring(buf, body->disposition);
2301             buf_putc(buf, ' ');
2302             if ((param = body->disposition_params)!=NULL) {
2303                 buf_putc(buf, '(');
2304                 while (param) {
2305                     message_write_nstring(buf, param->attribute);
2306                     buf_putc(buf, ' ');
2307                     message_write_nstring(buf, param->value);
2308                     if ((param = param->next)!=NULL) {
2309                         buf_putc(buf, ' ');
2310                     }
2311                 }
2312                 buf_putc(buf, ')');
2313             }
2314             else message_write_nstring(buf, (char *)0);
2315             buf_putc(buf, ')');
2316         }
2317         else {
2318             message_write_nstring(buf, (char *)0);
2319         }
2320         buf_putc(buf, ' ');
2321         if ((param = body->language)!=NULL) {
2322             buf_putc(buf, '(');
2323             while (param) {
2324                 message_write_nstring(buf, param->value);
2325                 if ((param = param->next)!=NULL) {
2326                     buf_putc(buf, ' ');
2327                 }
2328             }
2329             buf_putc(buf, ')');
2330         }
2331         else message_write_nstring(buf, (char *)0);
2332         buf_putc(buf, ' ');
2333         message_write_nstring(buf, body->location);
2334 
2335         if (newformat > 1 && !body->numparts) {
2336             /* even newer extension fields for annotation callout */
2337             buf_printf(buf, " (OFFSET %u HEADERSIZE %u)",
2338                        body->content_offset,
2339                        body->header_size);
2340         }
2341     }
2342 
2343     buf_putc(buf, ')');
2344 }
2345 
2346 /*
2347  * Write the address list 'addrlist' to 'buf'
2348  */
message_write_address(struct buf * buf,const struct address * addrlist)2349 static void message_write_address(struct buf *buf,
2350                                   const struct address *addrlist)
2351 {
2352     /* If no addresses, write out NIL */
2353     if (!addrlist) {
2354         message_write_nstring(buf, (char *)0);
2355         return;
2356     }
2357 
2358     buf_putc(buf, '(');
2359 
2360     while (addrlist) {
2361         buf_putc(buf, '(');
2362         message_write_nstring(buf, addrlist->name);
2363         buf_putc(buf, ' ');
2364         message_write_nstring(buf, addrlist->route);
2365         buf_putc(buf, ' ');
2366         message_write_nstring(buf, addrlist->mailbox);
2367         buf_putc(buf, ' ');
2368         message_write_nstring(buf, addrlist->domain);
2369         buf_putc(buf, ')');
2370         addrlist = addrlist->next;
2371     }
2372 
2373     buf_putc(buf, ')');
2374 }
2375 
2376 /*
2377  * Write the nil-or-string 's' to 'buf'
2378  */
message_write_nstring(struct buf * buf,const char * s)2379 EXPORTED void message_write_nstring(struct buf *buf, const char *s)
2380 {
2381     message_write_nstring_map(buf, s, (s ? strlen(s) : 0));
2382 }
2383 
message_write_nstring_map(struct buf * buf,const char * s,unsigned int len)2384 EXPORTED void message_write_nstring_map(struct buf *buf,
2385                                const char *s,
2386                                unsigned int len)
2387 {
2388     const char *p;
2389     int is_literal = 0;
2390 
2391     /* Write null pointer as NIL */
2392     if (!s) {
2393         buf_appendcstr(buf, "NIL");
2394         return;
2395     }
2396 
2397     if (len >= 1024)
2398     {
2399         is_literal = 1;
2400     }
2401     else
2402     {
2403         /* Look for any non-QCHAR characters */
2404         for (p = s; (unsigned)(p-s) < len ; p++) {
2405             if (!*p || *p & 0x80 || *p == '\r' || *p == '\n'
2406                 || *p == '\"' || *p == '%' || *p == '\\') {
2407                 is_literal = 1;
2408                 break;
2409             }
2410         }
2411     }
2412 
2413     if (is_literal) {
2414         /* Write out as literal */
2415         buf_printf(buf, "{%u}\r\n", len);
2416         buf_appendmap(buf, s, len);
2417     }
2418     else {
2419         /* Write out as quoted string */
2420         buf_putc(buf, '"');
2421         buf_appendmap(buf, s, len);
2422         buf_putc(buf, '"');
2423     }
2424 }
2425 
2426 /*
2427  * Append the string @s to the buffer @buf in a binary
2428  * format almost exactly
2429  */
message_write_xdrstring(struct buf * buf,const struct buf * s)2430 EXPORTED void message_write_xdrstring(struct buf *buf, const struct buf *s)
2431 {
2432     unsigned padlen;
2433 
2434     /* 32b string length in network order */
2435     buf_appendbit32(buf, buf_len(s));
2436     /* bytes of string */
2437     buf_appendmap(buf, s->s, s->len);
2438     /* 0 to 3 bytes padding */
2439     padlen = (4 - (s->len & 3)) & 3;
2440     buf_appendmap(buf, "\0\0\0", padlen);
2441 }
2442 
2443 /*
2444  * Write the text 's' to 'buf', converting to lower case as we go.
2445  */
message_write_text_lcase(struct buf * buf,const char * s)2446 static void message_write_text_lcase(struct buf *buf, const char *s)
2447 {
2448     const char *p;
2449 
2450     for (p = s; *p; p++) buf_putc(buf, TOLOWER(*p));
2451 }
2452 
message_write_nocharset(struct buf * buf,const struct body * body)2453 static void message_write_nocharset(struct buf *buf, const struct body *body)
2454 {
2455     buf_appendbit32(buf, 0x0000ffff);
2456 
2457     char guidbuf[MESSAGE_GUID_SIZE];
2458     if (body) message_guid_export(&body->content_guid, guidbuf);
2459     else memset(&guidbuf, 0, MESSAGE_GUID_SIZE);
2460     buf_appendmap(buf, guidbuf, MESSAGE_GUID_SIZE);
2461     buf_appendbit32(buf, body ? body->decoded_content_size : 0);
2462     buf_appendbit32(buf, body ? body->content_lines : 0);
2463 }
2464 
2465 /*
2466  * Write out the FETCH BODY[section] location/size information to 'buf'.
2467  */
message_write_section(struct buf * buf,const struct body * body)2468 static void message_write_section(struct buf *buf, const struct body *body)
2469 {
2470     int part;
2471 
2472     if (strcmp(body->type, "MESSAGE") == 0
2473         && strcmp(body->subtype, "RFC822") == 0) {
2474         if (body->subpart->numparts) {
2475             /*
2476              * Part 0 of a message/rfc822 is the message header/text.
2477              * Nested parts of a message/rfc822 containing a multipart
2478              * are the sub-parts of the multipart.
2479              */
2480             buf_appendbit32(buf, body->subpart->numparts+1);
2481             buf_appendbit32(buf, body->subpart->header_offset);
2482             buf_appendbit32(buf, body->subpart->header_size);
2483             buf_appendbit32(buf, body->subpart->content_offset);
2484             buf_appendbit32(buf, body->subpart->content_size);
2485             message_write_nocharset(buf, body->subpart);
2486             for (part = 0; part < body->subpart->numparts; part++) {
2487                 buf_appendbit32(buf, body->subpart->subpart[part].header_offset);
2488                 buf_appendbit32(buf, body->subpart->subpart[part].header_size);
2489                 buf_appendbit32(buf, body->subpart->subpart[part].content_offset);
2490                 if (body->subpart->subpart[part].numparts == 0 &&
2491                     strcmp(body->subpart->subpart[part].type, "MULTIPART") == 0) {
2492                     /* Treat 0-part multipart as 0-length text */
2493                     buf_appendbit32(buf, 0);
2494                 }
2495                 else {
2496                     buf_appendbit32(buf, body->subpart->subpart[part].content_size);
2497                 }
2498                 message_write_charset(buf, &body->subpart->subpart[part]);
2499             }
2500             for (part = 0; part < body->subpart->numparts; part++) {
2501                 message_write_section(buf, &body->subpart->subpart[part]);
2502             }
2503         }
2504         else {
2505             /*
2506              * Part 0 of a message/rfc822 is the message header/text.
2507              * Part 1 of a message/rfc822 containing a non-multipart
2508              * is the message body.
2509              */
2510             buf_appendbit32(buf, 2);
2511             buf_appendbit32(buf, body->subpart->header_offset);
2512             buf_appendbit32(buf, body->subpart->header_size);
2513             buf_appendbit32(buf, body->subpart->content_offset);
2514             buf_appendbit32(buf, body->subpart->content_size);
2515             message_write_nocharset(buf, body->subpart);
2516             buf_appendbit32(buf, body->subpart->header_offset);
2517             buf_appendbit32(buf, body->subpart->header_size);
2518             buf_appendbit32(buf, body->subpart->content_offset);
2519             if (strcmp(body->subpart->type, "MULTIPART") == 0) {
2520                 /* Treat 0-part multipart as 0-length text */
2521                 buf_appendbit32(buf, 0);
2522                 message_write_nocharset(buf, NULL);
2523             }
2524             else {
2525                 buf_appendbit32(buf, body->subpart->content_size);
2526                 message_write_charset(buf, body->subpart);
2527             }
2528             message_write_section(buf, body->subpart);
2529         }
2530     }
2531     else if (body->numparts) {
2532         /*
2533          * Cannot fetch part 0 of a multipart.
2534          * Nested parts of a multipart are the sub-parts.
2535          */
2536         buf_appendbit32(buf, body->numparts+1);
2537         buf_appendbit32(buf, 0);
2538         buf_appendbit32(buf, -1);
2539         buf_appendbit32(buf, 0);
2540         buf_appendbit32(buf, -1);
2541         message_write_nocharset(buf, NULL);
2542         for (part = 0; part < body->numparts; part++) {
2543             buf_appendbit32(buf, body->subpart[part].header_offset);
2544             buf_appendbit32(buf, body->subpart[part].header_size);
2545             buf_appendbit32(buf, body->subpart[part].content_offset);
2546             if (body->subpart[part].numparts == 0 &&
2547                 strcmp(body->subpart[part].type, "MULTIPART") == 0) {
2548                 /* Treat 0-part multipart as 0-length text */
2549                 buf_appendbit32(buf, 0);
2550                 message_write_nocharset(buf, &body->subpart[part]);
2551             }
2552             else {
2553                 buf_appendbit32(buf, body->subpart[part].content_size);
2554                 message_write_charset(buf, &body->subpart[part]);
2555             }
2556         }
2557         for (part = 0; part < body->numparts; part++) {
2558             message_write_section(buf, &body->subpart[part]);
2559         }
2560     }
2561     else {
2562         /*
2563          * Leaf section--no part 0 or nested parts
2564          */
2565         buf_appendbit32(buf, 0);
2566     }
2567 }
2568 
2569 /*
2570  * Write the 32-bit charset/encoding value and the charset identifier
2571  * for section 'body' to 'buf'
2572  */
message_write_charset(struct buf * buf,const struct body * body)2573 static void message_write_charset(struct buf *buf, const struct body *body)
2574 {
2575     int encoding;
2576     charset_t charset;
2577     size_t len = 0;
2578     const char *name = NULL;
2579 
2580     message_parse_charset(body, &encoding, &charset);
2581 
2582     /* write charset/encoding preamble */
2583     if (charset != CHARSET_UNKNOWN_CHARSET) {
2584         size_t itemsize;
2585 
2586         name = charset_alias_name(charset);
2587         len = strlen(name);
2588 
2589         /* charset name length is a multiple of cache item size,
2590          * including the terminating zero byte(s) */
2591         itemsize = (size_t) CACHE_ITEM_SIZE_SKIP;
2592         len = ((len / itemsize) + 1) * itemsize;
2593         if (len > 0xffff) len = 0;
2594     }
2595     /* we stored 0x100 here to say that it was a version 4 cache with the
2596      * charset length stored, which is all very well and nice, but it's
2597      * useless once we added sha1, so it's been removed again */
2598     buf_appendbit32(buf, ((len & 0xffff) << 16)|(encoding & 0xff));
2599 
2600     /* write charset identifier */
2601     if (len) {
2602         char *tmp = (char*) xcalloc(sizeof(char), len);
2603         memcpy(tmp, name, strlen(name));
2604         buf_appendmap(buf, tmp, len);
2605         free(tmp);
2606     }
2607     charset_free(&charset);
2608 
2609     /* NOTE - this stuff doesn't really belong in a method called
2610      * message_write_charset, but it's the fields that are always
2611      * written immediately after the charset! */
2612     char guidbuf[MESSAGE_GUID_SIZE];
2613     if (body) message_guid_export(&body->content_guid, guidbuf);
2614     else memset(&guidbuf, 0, MESSAGE_GUID_SIZE);
2615     buf_appendmap(buf, guidbuf, MESSAGE_GUID_SIZE);
2616     buf_appendbit32(buf, body ? body->decoded_content_size : 0);
2617     buf_appendbit32(buf, body ? body->content_lines : 0);
2618 }
2619 
2620 /*
2621  * Unparse the address list 'addrlist' to 'buf'
2622  */
message_write_searchaddr(struct buf * buf,const struct address * addrlist)2623 static void message_write_searchaddr(struct buf *buf,
2624                                      const struct address *addrlist)
2625 {
2626     int prevaddr = 0;
2627     char* tmp;
2628 
2629     while (addrlist) {
2630 
2631         /* Handle RFC 822 group addresses */
2632         if (!addrlist->domain) {
2633             if (addrlist->mailbox) {
2634                 if (prevaddr) buf_putc(buf, ',');
2635 
2636                 tmp = charset_parse_mimeheader(addrlist->mailbox, charset_flags);
2637                 buf_appendcstr(buf, tmp);
2638                 free(tmp);
2639                 tmp = NULL;
2640                 buf_putc(buf, ':');
2641 
2642                 /* Suppress a trailing comma */
2643                 prevaddr = 0;
2644             }
2645             else {
2646                 buf_putc(buf, ';');
2647                 prevaddr = 1;
2648             }
2649         }
2650         else {
2651             if (prevaddr) buf_putc(buf, ',');
2652 
2653             if (addrlist->name) {
2654                 tmp = charset_parse_mimeheader(addrlist->name, charset_flags);
2655                 buf_appendcstr(buf, tmp);
2656                 free(tmp); tmp = NULL;
2657                 buf_putc(buf, ' ');
2658             }
2659 
2660             buf_putc(buf, '<');
2661             if (addrlist->route) {
2662                 message_write_text_lcase(buf, addrlist->route);
2663                 buf_putc(buf, ':');
2664             }
2665 
2666             message_write_text_lcase(buf, addrlist->mailbox);
2667             buf_putc(buf, '@');
2668 
2669             message_write_text_lcase(buf, addrlist->domain);
2670             buf_putc(buf, '>');
2671             prevaddr = 1;
2672         }
2673 
2674         addrlist = addrlist->next;
2675     }
2676 }
2677 
param_free(struct param ** paramp)2678 EXPORTED void param_free(struct param **paramp)
2679 {
2680     struct param *param, *nextparam;
2681 
2682     param = *paramp;
2683     *paramp = NULL;
2684 
2685     for (; param; param = nextparam) {
2686         nextparam = param->next;
2687         if (param->attribute) free(param->attribute);
2688         if (param->value) free(param->value);
2689         free(param);
2690     }
2691 }
2692 
2693 /*
2694  * Free the parsed body-part 'body'
2695  */
message_free_body(struct body * body)2696 EXPORTED void message_free_body(struct body *body)
2697 {
2698     int part;
2699 
2700     if (!body) return;
2701 
2702     if (body->type) {
2703         free(body->type);
2704         free(body->subtype);
2705         param_free(&body->params);
2706     }
2707     if (body->id) free(body->id);
2708     if (body->description) free(body->description);
2709     if (body->encoding) free(body->encoding);
2710     if (body->md5) free(body->md5);
2711     if (body->disposition) {
2712         free(body->disposition);
2713         param_free(&body->disposition_params);
2714     }
2715     param_free(&body->language);
2716     if (body->location) free(body->location);
2717     if (body->date) free(body->date);
2718     if (body->subject) free(body->subject);
2719     if (body->from) parseaddr_free(body->from);
2720     if (body->sender) parseaddr_free(body->sender);
2721     if (body->reply_to) parseaddr_free(body->reply_to);
2722     if (body->to) parseaddr_free(body->to);
2723     if (body->cc) parseaddr_free(body->cc);
2724     if (body->bcc) parseaddr_free(body->bcc);
2725     if (body->in_reply_to) free(body->in_reply_to);
2726     if (body->message_id) free(body->message_id);
2727     if (body->x_me_message_id) free(body->x_me_message_id);
2728     if (body->references) free(body->references);
2729     if (body->received_date) free(body->received_date);
2730     if (body->x_deliveredinternaldate) free(body->x_deliveredinternaldate);
2731     if (body->charset_id) free(body->charset_id);
2732     if (body->part_id) free(body->part_id);
2733 
2734     if (body->subpart) {
2735         if (body->numparts) {
2736             for (part=0; part < body->numparts; part++) {
2737                 message_free_body(&body->subpart[part]);
2738             }
2739         }
2740         else {
2741             message_free_body(body->subpart);
2742         }
2743         free(body->subpart);
2744     }
2745 
2746     buf_free(&body->cacheheaders);
2747 
2748     if (body->decoded_body) free(body->decoded_body);
2749 }
2750 
2751 /*
2752  * Parse a cached envelope into individual tokens
2753  *
2754  * When inside a list (ncom > 0), we parse the individual tokens but don't
2755  * isolate them -- we return the entire list as a single token.
2756  */
parse_cached_envelope(char * env,char * tokens[],int tokens_size)2757 HIDDEN void parse_cached_envelope(char *env, char *tokens[], int tokens_size)
2758 {
2759     char *c;
2760     int i = 0, ncom = 0, len;
2761 
2762     /*
2763      * We have no way of indicating that we parsed less than
2764      * the requested number of tokens, but we can at least
2765      * ensure that the array is correctly initialised to NULL.
2766      */
2767     memset(tokens, 0, tokens_size*sizeof(char*));
2768 
2769     c = env;
2770     while (*c != '\0') {
2771         switch (*c) {
2772         case ' ':                       /* end of token */
2773             if (!ncom) *c = '\0';       /* mark end of token */
2774             c++;
2775             break;
2776         case 'N':                       /* "NIL" */
2777         case 'n':
2778             if (!ncom) {
2779                 if(i>=tokens_size) break;
2780                 tokens[i++] = NULL;     /* empty token */
2781             }
2782             c += 3;                     /* skip "NIL" */
2783             break;
2784         case '"':                       /* quoted string */
2785             c++;                        /* skip open quote */
2786             if (!ncom) {
2787                 if(i>=tokens_size) break;
2788                 tokens[i++] = c;        /* start of string */
2789             }
2790             while (*c && *c != '"') {           /* find close quote */
2791                 if (*c == '\\') c++;    /* skip quoted-specials */
2792                 if (*c) c++;
2793             }
2794             if (*c) {
2795                 if (!ncom) *c = '\0';   /* end of string */
2796                 c++;                    /* skip close quote */
2797             }
2798             break;
2799         case '{':                       /* literal */
2800             c++;                        /* skip open brace */
2801             len = 0;                    /* determine length of literal */
2802             while (cyrus_isdigit((int) *c)) {
2803                 len = len*10 + *c - '0';
2804                 c++;
2805             }
2806             c += 3;                     /* skip close brace & CRLF */
2807             if (!ncom){
2808                 if(i>=tokens_size) break;
2809                 tokens[i++] = c;        /* start of literal */
2810             }
2811             c += len;                   /* skip literal */
2812             break;
2813         case '(':                       /* start of address */
2814             c++;                        /* skip open paren */
2815             if (!ncom) {
2816                 if(i>=tokens_size) break;
2817                 tokens[i++] = c;        /* start of address list */
2818             }
2819             ncom++;                     /* new open - inc counter */
2820             break;
2821         case ')':                       /* end of address */
2822             c++;                        /* skip close paren */
2823             if (ncom) {                 /* paranoia */
2824                 ncom--;                 /* close - dec counter */
2825                 if (!ncom)              /* all open paren are closed */
2826                     *(c-1) = '\0';      /* end of list - trim close paren */
2827             }
2828             break;
2829         default:
2830             /* yikes! unparsed junk, just skip it */
2831             c++;
2832             break;
2833         }
2834     }
2835 }
2836 
parse_nstring(char ** str)2837 EXPORTED char *parse_nstring(char **str)
2838 {
2839     char *cp = *str, *val;
2840 
2841     if (*cp == '"') { /* quoted string */
2842         val = cp+1; /* skip " */
2843         do {
2844             cp = strchr(cp+1, '"');
2845             if (!cp) return NULL; /* whole thing is broken */
2846         } while (*(cp-1) == '\\'); /* skip escaped " */
2847         *cp++ = '\0';
2848     }
2849     else if (*cp == '{') {
2850         int len = 0;
2851         /* yeah, it may be a literal too */
2852         cp++;
2853         while (cyrus_isdigit((int) *cp)) {
2854             len = len*10 + *cp - '0';
2855             cp++;
2856         }
2857         cp += 3;                /* skip close brace & CRLF */
2858         val = cp;
2859         val[len] = '\0';
2860         cp += len;
2861     }
2862     else { /* NIL */
2863         val = NULL;
2864         cp += 3;
2865     }
2866 
2867     *str = cp;
2868     return val;
2869 }
2870 
message_parse_env_address(char * str,struct address * addr)2871 EXPORTED void message_parse_env_address(char *str, struct address *addr)
2872 {
2873     if (*str == '(') str++; /* skip ( */
2874     addr->name = parse_nstring(&str);
2875     str++; /* skip SP */
2876     addr->route = parse_nstring(&str);
2877     str++; /* skip SP */
2878     addr->mailbox = parse_nstring(&str);
2879     str++; /* skip SP */
2880     addr->domain = parse_nstring(&str);
2881 }
2882 
2883 /*
2884  * Read an nstring from cached bodystructure.
2885  * Analog to message_write_nstring().
2886  * If 'copy' is set, returns a freshly allocated copy of the string,
2887  * otherwise is returns a pointer to the string which will be overwritten
2888  * on the next call to message_read_nstring()
2889  */
message_read_nstring(struct protstream * strm,char ** str,int copy)2890 static int message_read_nstring(struct protstream *strm, char **str, int copy)
2891 {
2892     static struct buf buf = BUF_INITIALIZER;
2893     int c;
2894 
2895     c = getnstring(strm, NULL, &buf);
2896 
2897     if (str) {
2898         if (!buf.s) *str = NULL;
2899         else if (copy) *str = xstrdup(buf.s);
2900         else *str = buf.s;
2901     }
2902 
2903     return c;
2904 }
2905 
2906 /*
2907  * Read a parameter list from cached bodystructure.
2908  * If withattr is set, attribute/value pairs will be read,
2909  * otherwise, just values are read.
2910  */
message_read_params(struct protstream * strm,struct param ** paramp,int withattr)2911 static int message_read_params(struct protstream *strm, struct param **paramp,
2912                                int withattr)
2913 {
2914     int c;
2915 
2916     if ((c = prot_getc(strm)) == '(') {
2917         /* parse list */
2918         struct param *param;
2919 
2920         do {
2921             *paramp = param = (struct param *) xzmalloc(sizeof(struct param));
2922 
2923             if (withattr) {
2924                 /* attribute */
2925                 c = message_read_nstring(strm, &param->attribute, 1);
2926             }
2927 
2928             /* value */
2929             c = message_read_nstring(strm, &param->value, 1);
2930 
2931             /* get ready to append the next parameter */
2932             paramp = &param->next;
2933 
2934         } while (c == ' ');
2935 
2936         if (c == ')') c = prot_getc(strm);
2937     }
2938     else {
2939         /* NIL */
2940         prot_ungetc(c, strm);
2941         c = message_read_nstring(strm, NULL, 0);
2942     }
2943 
2944     return c;
2945 }
2946 
2947 /*
2948  * Read an address part from cached bodystructure.
2949  * The string is appended to 'buf' (including NUL).
2950  */
message_read_addrpart(struct protstream * strm,const char ** part,unsigned * off,struct buf * buf)2951 static int message_read_addrpart(struct protstream *strm,
2952                                  const char **part, unsigned *off, struct buf *buf)
2953 {
2954     int c;
2955 
2956     c = message_read_nstring(strm, (char **)part, 0);
2957     if (*part) {
2958         *off = buf->len;
2959         buf_appendmap(buf, *part, strlen(*part)+1);
2960     }
2961 
2962     return c;
2963 }
2964 
2965 /*
2966  * Read an address list from cached bodystructure.
2967  * Analog to message_write_address()
2968  */
message_read_address(struct protstream * strm,struct address ** addrp)2969 static int message_read_address(struct protstream *strm, struct address **addrp)
2970 {
2971     int c;
2972 
2973     if ((c = prot_getc(strm)) == '(') {
2974         /* parse list */
2975         struct address *addr;
2976         unsigned nameoff = 0, rtoff = 0, mboxoff = 0, domoff = 0;
2977 
2978         do {
2979             struct buf buf = BUF_INITIALIZER;
2980             *addrp = addr = (struct address *) xzmalloc(sizeof(struct address));
2981 
2982             /* opening '(' */
2983             c = prot_getc(strm);
2984 
2985             /* name */
2986             c = message_read_addrpart(strm, &addr->name, &nameoff, &buf);
2987 
2988             /* route */
2989             c = message_read_addrpart(strm, &addr->route, &rtoff, &buf);
2990 
2991             /* mailbox */
2992             c = message_read_addrpart(strm, &addr->mailbox, &mboxoff, &buf);
2993 
2994             /* host */
2995             c = message_read_addrpart(strm, &addr->domain, &domoff, &buf);
2996 
2997             /* addr parts must now point into our freeme string */
2998             if (buf.len) {
2999                 char *freeme = addr->freeme = buf_release(&buf);
3000 
3001                 if (addr->name) addr->name = freeme+nameoff;
3002                 if (addr->route) addr->route = freeme+rtoff;
3003                 if (addr->mailbox) addr->mailbox = freeme+mboxoff;
3004                 if (addr->domain) addr->domain = freeme+domoff;
3005             }
3006 
3007             buf_free(&buf);
3008 
3009             /* get ready to append the next address */
3010             addrp = &addr->next;
3011 
3012         } while (((c = prot_getc(strm)) == '(') && prot_ungetc(c, strm));
3013 
3014         if (c == ')') c = prot_getc(strm);
3015     }
3016     else {
3017         /* NIL */
3018         prot_ungetc(c, strm);
3019         c = message_read_nstring(strm, NULL, 0);
3020     }
3021 
3022     return c;
3023 }
3024 
3025 /*
3026  * Read a cached envelope response.
3027  * Analog to message_write_envelope()
3028  */
message_read_envelope(struct protstream * strm,struct body * body)3029 static int message_read_envelope(struct protstream *strm, struct body *body)
3030 {
3031     int c;
3032 
3033     /* opening '(' */
3034     c = prot_getc(strm);
3035 
3036     /* date */
3037     c = message_read_nstring(strm, &body->date, 1);
3038 
3039     /* subject */
3040     c = message_read_nstring(strm, &body->subject, 1);
3041 
3042     /* from */
3043     c = message_read_address(strm, &body->from);
3044 
3045     /* sender */
3046     c = message_read_address(strm, &body->sender);
3047 
3048     /* reply-to */
3049     c = message_read_address(strm, &body->reply_to);
3050 
3051     /* to */
3052     c = message_read_address(strm, &body->to);
3053 
3054     /* cc */
3055     c = message_read_address(strm, &body->cc);
3056 
3057     /* bcc */
3058     c = message_read_address(strm, &body->bcc);
3059 
3060     /* in-reply-to */
3061     c = message_read_nstring(strm, &body->in_reply_to, 1);
3062 
3063     /* message-id */
3064     c = message_read_nstring(strm, &body->message_id, 1);
3065 
3066     if (c == ')') c = prot_getc(strm);
3067 
3068     return c;
3069 }
3070 
3071 /*
3072  * Read cached bodystructure response.
3073  * Analog to message_write_body()
3074  */
message_read_body(struct protstream * strm,struct body * body,const char * part_id)3075 static int message_read_body(struct protstream *strm, struct body *body, const char *part_id)
3076 {
3077     int c;
3078     struct buf buf = BUF_INITIALIZER;
3079 
3080     /* opening '(' */
3081     c = prot_getc(strm);
3082     if (c == EOF) goto done;
3083 
3084     /* check for multipart */
3085     if ((c = prot_peek(strm)) == '(') {
3086 
3087         body->type = xstrdup("MULTIPART");
3088         do {
3089             body->subpart =
3090                 (struct body *)xrealloc((char *)body->subpart,
3091                                         (body->numparts+1)*sizeof(struct body));
3092             memset(&body->subpart[body->numparts], 0, sizeof(struct body));
3093             buf_reset(&buf);
3094             if (part_id) buf_printf(&buf, "%s.", part_id);
3095             buf_printf(&buf, "%d", body->numparts + 1);
3096             struct body *subbody = &body->subpart[body->numparts++];
3097             subbody->part_id = buf_release(&buf);
3098             c = message_read_body(strm, subbody, subbody->part_id);
3099         } while (((c = prot_getc(strm)) == '(') && prot_ungetc(c, strm));
3100 
3101         /* remove the part_id here, you can't address multiparts directly */
3102         free(body->part_id);
3103         body->part_id = NULL;
3104 
3105         /* body subtype */
3106         c = message_read_nstring(strm, &body->subtype, 1);
3107         if (c == EOF) goto done;
3108 
3109         /* extension data */
3110 
3111         /* body parameters */
3112         c = message_read_params(strm, &body->params, 1);
3113         if (c == EOF) goto done;
3114     }
3115     else {
3116         if (!body->part_id) {
3117             buf_reset(&buf);
3118             if (part_id) buf_printf(&buf, "%s.", part_id);
3119             buf_printf(&buf, "%d", 1);
3120             body->part_id = buf_release(&buf);
3121         }
3122         /* non-multipart */
3123 
3124         /* body type */
3125         c = message_read_nstring(strm, &body->type, 1);
3126         if (c == EOF) goto done;
3127 
3128         /* body subtype */
3129         c = message_read_nstring(strm, &body->subtype, 1);
3130         if (c == EOF) goto done;
3131 
3132         /* body parameters */
3133         c = message_read_params(strm, &body->params, 1);
3134         if (c == EOF) goto done;
3135 
3136         /* body id */
3137         c = message_read_nstring(strm, &body->id, 1);
3138         if (c == EOF) goto done;
3139 
3140         /* body description */
3141         c = message_read_nstring(strm, &body->description, 1);
3142         if (c == EOF) goto done;
3143 
3144         /* body encoding */
3145         c = message_read_nstring(strm, &body->encoding, 1);
3146         if (c == EOF) goto done;
3147 
3148         /* body size */
3149         c = getuint32(strm, &body->content_size);
3150         if (c == EOF) goto done;
3151 
3152         if (!strcmp(body->type, "TEXT")) {
3153             /* body lines */
3154             c = getint32(strm, (int32_t *) &body->content_lines);
3155             if (c == EOF) goto done;
3156         }
3157         else if (!strcmp(body->type, "MESSAGE") &&
3158                  !strcmp(body->subtype, "RFC822")) {
3159 
3160             body->subpart = (struct body *) xzmalloc(sizeof(struct body));
3161 
3162             /* envelope structure */
3163             c = message_read_envelope(strm, body->subpart);
3164             if (c == EOF) goto done;
3165 
3166             /* body structure */
3167             c = message_read_body(strm, body->subpart, body->part_id);
3168             if (c == EOF) goto done;
3169             c = prot_getc(strm); /* trailing SP */
3170             if (c == EOF) goto done;
3171 
3172             /* body lines */
3173             c = getint32(strm, (int32_t *) &body->content_lines);
3174             if (c == EOF) goto done;
3175         }
3176 
3177         /* extension data */
3178 
3179         /* body MD5 */
3180         c = message_read_nstring(strm, &body->md5, 1);
3181         if (c == EOF) goto done;
3182     }
3183 
3184     /* common extension data */
3185 
3186     /* body disposition */
3187     if ((c = prot_getc(strm)) == '(') {
3188         c = message_read_nstring(strm, &body->disposition, 1);
3189         if (c == EOF) goto done;
3190 
3191         c = message_read_params(strm, &body->disposition_params, 1);
3192         if (c == ')') c = prot_getc(strm); /* trailing SP */
3193         if (c == EOF) goto done;
3194     }
3195     else {
3196         /* NIL */
3197         prot_ungetc(c, strm);
3198         c = message_read_nstring(strm, &body->disposition, 1);
3199         if (c == EOF) goto done;
3200     }
3201 
3202     /* body language */
3203     if ((c = prot_peek(strm)) == '(') {
3204         c = message_read_params(strm, &body->language, 0);
3205         if (c == EOF) goto done;
3206     }
3207     else {
3208         char *lang;
3209 
3210         c = message_read_nstring(strm, &lang, 1);
3211         if (c == EOF) goto done;
3212         if (lang) {
3213             body->language = (struct param *) xzmalloc(sizeof(struct param));
3214             body->language->value = lang;
3215         }
3216     }
3217 
3218     /* body location */
3219     c = message_read_nstring(strm, &body->location, 1);
3220 
3221     /* XXX  We currently don't store any other extension data.
3222             MUST keep in sync with message_write_body() */
3223 
3224 done:
3225     buf_free(&buf);
3226     return c;
3227 }
3228 
3229 /*
3230  * Read cached binary bodystructure.
3231  * Analog to message_write_section()
3232  */
message_read_binarybody(struct body * body,const char ** sect,uint32_t cache_version)3233 static void message_read_binarybody(struct body *body, const char **sect,
3234                                     uint32_t cache_version)
3235 {
3236     bit32 n, i;
3237     const char *p = *sect;
3238     struct body *subpart;
3239     size_t len;
3240     uint32_t cte;
3241 
3242     n = CACHE_ITEM_BIT32(*sect);
3243     p = *sect += CACHE_ITEM_SIZE_SKIP;
3244     if (!n) return;
3245 
3246     if (!strcmp(body->type, "MESSAGE") && !strcmp(body->subtype, "RFC822") &&
3247         body->subpart->numparts) {
3248         subpart = body->subpart->subpart;
3249         body = body->subpart;
3250     }
3251     else {
3252         /* If a message/rfc822 contains a non-multipart,
3253            we don't care about part 0 (message header) */
3254         subpart = body->subpart;
3255         body = NULL;
3256     }
3257 
3258     if (!body) {
3259         /* skip header part */
3260         p += 5 * CACHE_ITEM_SIZE_SKIP;
3261         if (cache_version >= 5)
3262             p += MESSAGE_GUID_SIZE;
3263         if (cache_version >= 8)
3264             p += CACHE_ITEM_SIZE_SKIP;
3265         if (cache_version >= 9)
3266             p += CACHE_ITEM_SIZE_SKIP;
3267     }
3268     else {
3269         /* read header part */
3270         body->header_offset = CACHE_ITEM_BIT32(p);
3271         p += CACHE_ITEM_SIZE_SKIP;
3272         body->header_size = CACHE_ITEM_BIT32(p);
3273         p += CACHE_ITEM_SIZE_SKIP;
3274         body->content_offset = CACHE_ITEM_BIT32(p);
3275         p += CACHE_ITEM_SIZE_SKIP;
3276         body->content_size = CACHE_ITEM_BIT32(p);
3277         p += CACHE_ITEM_SIZE_SKIP;
3278         cte = CACHE_ITEM_BIT32(p);
3279         p += CACHE_ITEM_SIZE_SKIP;
3280 
3281         /* read encoding and charset identifier */
3282         /* Cache versions <= 3 store charset and encoding in 4 bytes,
3283          * but the code was broken. Just presume the charset unknown. */
3284         body->charset_enc = cte & 0xff;
3285         body->charset_id = NULL;
3286         if (cache_version >= 4) {
3287             /* determine the length of the charset identifer */
3288             len = (cte >> 16) & 0xffff;
3289             if (len) {
3290                 /* XXX - assert (cte & 0xff00) == 0x100 */
3291                 /* read len bytes as charset id */
3292                 body->charset_id = xstrndup(p, len);
3293                 p += len;
3294             }
3295         }
3296         if (cache_version >= 5)
3297             p = message_guid_import(&body->content_guid, p);
3298 
3299         if (cache_version >= 8) {
3300             body->decoded_content_size = CACHE_ITEM_BIT32(p);
3301             p += CACHE_ITEM_SIZE_SKIP;
3302         }
3303         if (cache_version >= 9) {
3304             body->content_lines = CACHE_ITEM_BIT32(p);
3305             p += CACHE_ITEM_SIZE_SKIP;
3306         }
3307     }
3308 
3309     /* read body parts */
3310     for (i = 0; i < n-1; i++) {
3311         subpart[i].header_offset = CACHE_ITEM_BIT32(p);
3312         p += CACHE_ITEM_SIZE_SKIP;
3313         subpart[i].header_size = CACHE_ITEM_BIT32(p);
3314         p += CACHE_ITEM_SIZE_SKIP;
3315         subpart[i].content_offset = CACHE_ITEM_BIT32(p);
3316         p += CACHE_ITEM_SIZE_SKIP;
3317         subpart[i].content_size = CACHE_ITEM_BIT32(p);
3318         p += CACHE_ITEM_SIZE_SKIP;
3319         cte = CACHE_ITEM_BIT32(p);
3320         p += CACHE_ITEM_SIZE_SKIP;
3321 
3322         /* read encoding and charset identifier */
3323         /* Cache versions <= 3 store charset and encoding in 4 bytes,
3324          * but the code was broken. Just presume the charset unknown. */
3325         subpart[i].charset_enc = cte & 0xff;
3326         subpart[i].charset_id = NULL;
3327         if (cache_version >= 4) {
3328             /* determine the length of the charset identifer */
3329             len = (cte >> 16) & 0xffff;
3330             if (len) {
3331                 /* XXX - assert (cte & 0xff00) == 0x100 */
3332                 /* read len bytes as charset id */
3333                 subpart[i].charset_id = xstrndup(p, len);
3334                 p += len;
3335             }
3336         }
3337         if (cache_version >= 5)
3338             p = message_guid_import(&subpart[i].content_guid, p);
3339 
3340         if (cache_version >= 8) {
3341             subpart[i].decoded_content_size = CACHE_ITEM_BIT32(p);
3342             p += CACHE_ITEM_SIZE_SKIP;
3343         }
3344         if (cache_version >= 9) {
3345             subpart[i].content_lines = CACHE_ITEM_BIT32(p);
3346             p += CACHE_ITEM_SIZE_SKIP;
3347         }
3348     }
3349 
3350     /* read sub-parts */
3351     for (*sect = p, i = 0; i < n-1; i++) {
3352         message_read_binarybody(&subpart[i], sect, cache_version);
3353     }
3354 }
3355 
3356 /*
3357  * Read cached envelope, binary bodystructure response and binary bodystructure
3358  * of the specified record.  Populates 'body' which must be freed by the caller.
3359  */
message_read_bodystructure(const struct index_record * record,struct body ** body)3360 EXPORTED void message_read_bodystructure(const struct index_record *record, struct body **body)
3361 {
3362     struct protstream *strm;
3363     struct body toplevel;
3364     const char *binbody;
3365 
3366     memset(&toplevel, 0, sizeof(struct body));
3367     toplevel.type = "MESSAGE";
3368     toplevel.subtype = "RFC822";
3369     toplevel.subpart = *body = xzmalloc(sizeof(struct body));
3370 
3371     /* Read envelope response from cache */
3372     strm = prot_readmap(cacheitem_base(record, CACHE_ENVELOPE),
3373                         cacheitem_size(record, CACHE_ENVELOPE));
3374     prot_setisclient(strm, 1);  /* no-sync literals */
3375 
3376     message_read_envelope(strm, *body);
3377     prot_free(strm);
3378 
3379     /* Read bodystructure response from cache */
3380     strm = prot_readmap(cacheitem_base(record, CACHE_BODYSTRUCTURE),
3381                         cacheitem_size(record, CACHE_BODYSTRUCTURE));
3382     prot_setisclient(strm, 1);  /* no-sync literals */
3383 
3384     message_read_body(strm, *body, NULL);
3385     prot_free(strm);
3386 
3387     /* Read binary bodystructure from cache */
3388     binbody = cacheitem_base(record, CACHE_SECTION);
3389     message_read_binarybody(&toplevel, &binbody, record->cache_version);
3390 }
3391 
de_nstring_buf(struct buf * src,struct buf * dst)3392 static void de_nstring_buf(struct buf *src, struct buf *dst)
3393 {
3394     char *p, *q;
3395 
3396     if (src->s && src->len == 3 && !memcmp(src->s, "NIL", 3)) {
3397         buf_free(dst);
3398         return;
3399     }
3400     buf_cstring(src); /* ensure nstring parse doesn't overrun */
3401     q = src->s;
3402     p = parse_nstring(&q);
3403     buf_setmap(dst, p, q-p);
3404     buf_cstring(dst);
3405 }
3406 
message1_get_subject(const struct index_record * record,struct buf * buf)3407 static void message1_get_subject(const struct index_record *record, struct buf *buf)
3408 {
3409     struct buf tmp = BUF_INITIALIZER;
3410     buf_copy(&tmp, cacheitem_buf(record, CACHE_SUBJECT));
3411     de_nstring_buf(&tmp, buf);
3412     buf_free(&tmp);
3413 }
3414 
3415 /*
3416  * Generate a conversation id from the given message.
3417  * The conversation id is derived from the first 64b of
3418  * the SHA1 of the message, except that an all-zero
3419  * conversation id is not valid.
3420  */
generate_conversation_id(const struct index_record * record)3421 static conversation_id_t generate_conversation_id(
3422                             const struct index_record *record)
3423 {
3424     conversation_id_t cid = 0;
3425     size_t i;
3426 
3427     assert(record->guid.status == GUID_NONNULL);
3428 
3429     for (i = 0 ; i < sizeof(cid) ; i++) {
3430         cid <<= 8;
3431         cid |= record->guid.value[i];
3432     }
3433 
3434     // we make sure the cid doesn't look anything like the sha1 so
3435     // that people don't make assumptions
3436     cid ^= 0x91f3d9e10b690b12; // chosen by fair dice roll
3437 
3438     /*
3439      * We carefully avoid returning NULLCONVERSATION as
3440      * a new cid, as that would confuse matters no end.
3441      */
3442     if (cid == NULLCONVERSATION)
3443         cid = 1;
3444 
3445     return cid;
3446 }
3447 
3448 /*
3449  * In RFC 2822, the In-Reply-To field is explicitly required to contain
3450  * only message-ids, whitespace and commas.  The old RFC 822 was less
3451  * well specified and allowed all sorts of stuff.  We used to be equally
3452  * liberal here in parsing the field.  Sadly some versions of the NMH
3453  * mailer will generate In-Reply-To containing email addresses which we
3454  * cannot tell from message-ids, leading to massively confused
3455  * threading.  So we have to be slightly stricter.
3456  */
is_valid_rfc2822_inreplyto(const char * p)3457 static int is_valid_rfc2822_inreplyto(const char *p)
3458 {
3459     if (!p)
3460         return 1;
3461 
3462     /* skip any whitespace */
3463     while (*p && (isspace(*p) || *p == ','))
3464         p++;
3465 
3466     return (!*p || *p == '<');
3467 }
3468 
3469 /* XXX - refactor this whole thing to an "open or create" API */
getconvmailbox(const char * mboxname,struct mailbox ** mailboxptr)3470 static int getconvmailbox(const char *mboxname, struct mailbox **mailboxptr)
3471 {
3472     int r = mailbox_open_iwl(mboxname, mailboxptr);
3473     if (r != IMAP_MAILBOX_NONEXISTENT) return r;
3474 
3475     struct mboxlock *namespacelock = mboxname_usernamespacelock(mboxname);
3476 
3477     // try again - maybe we lost the race!
3478     r = mailbox_open_iwl(mboxname, mailboxptr);
3479     if (r == IMAP_MAILBOX_NONEXISTENT) {
3480         /* create the mailbox - it's OK to do as admin because this only ever gets
3481          * a user subfolder for this conversations.db owner */
3482         r = mboxlist_createmailbox(mboxname, MBTYPE_COLLECTION, NULL, 1 /* admin */, NULL, NULL,
3483                                    0, 0, 0, 0, mailboxptr);
3484     }
3485 
3486     mboxname_release(&namespacelock);
3487 
3488     return r;
3489 }
3490 
3491 /*
3492  * This is the legacy code version to generate conversation subjects.
3493  * We keep it here to allow matching messages to conversations that
3494  * already got that oldstyle subject set.
3495  */
3496 /*
3497  * Normalise a subject string, to a form which can be used for deciding
3498  * whether a message belongs in the same conversation as it's antecedent
3499  * messages.  What we're doing here is the same idea as the "base
3500  * subject" algorithm described in RFC 5256 but slightly adapted from
3501  * experience.  Differences are:
3502  *
3503  *  - We eliminate all whitespace; RFC 5256 normalises any sequence
3504  *    of whitespace characters to a single SP.  We do this because
3505  *    we have observed combinations of buggy client software both
3506  *    add and remove whitespace around folding points.
3507  *
3508  *  - We include the Unicode U+00A0 (non-breaking space) codepoint in our
3509  *    determination of whitespace (as the UTF-8 sequence \xC2\xA0) because
3510  *    we have seen it in the wild, but do not currently generalise this to
3511  *    other Unicode "whitespace" codepoints. (XXX)
3512  *
3513  *  - Because we eliminate whitespace entirely, and whitespace helps
3514  *    delimit some of our other replacements, we do that whitespace
3515  *    step last instead of first.
3516  *
3517  *  - We eliminate leading tokens like Re: and Fwd: using a simpler
3518  *    and more generic rule than RFC 5256's; this rule catches a number
3519  *    of semantically identical prefixes in other human languages, but
3520  *    unfortunately also catches lots of other things.  We think we can
3521  *    get away with this because the normalised subject is never directly
3522  *    seen by human eyes, so some information loss is acceptable as long
3523  *    as the subjects in different messages match correctly.
3524  *
3525  *  - We eliminate trailing tokens like [SEC=UNCLASSIFIED],
3526  *    [DLM=Sensitive], etc which are automatically added by Australian
3527  *    Government department email systems.  In theory there should be no
3528  *    more than one of these on an email subject but in practice multiple
3529  *    have been seen.
3530  *    http://www.finance.gov.au/files/2012/04/EPMS2012.3.pdf
3531  */
oldstyle_normalise_subject(struct buf * s)3532 static void oldstyle_normalise_subject(struct buf *s)
3533 {
3534     static int initialised_res = 0;
3535     static regex_t whitespace_re;
3536     static regex_t relike_token_re;
3537     static regex_t blob_start_re;
3538     static regex_t blob_end_re;
3539     int r;
3540 
3541     if (!initialised_res) {
3542         r = regcomp(&whitespace_re, "([ \t\r\n]+|\xC2\xA0)", REG_EXTENDED);
3543         assert(r == 0);
3544         r = regcomp(&relike_token_re, "^[ \t]*[A-Za-z0-9]+(\\[[0-9]+\\])?:", REG_EXTENDED);
3545         assert(r == 0);
3546         r = regcomp(&blob_start_re, "^[ \t]*\\[[^]]+\\]", REG_EXTENDED);
3547         assert(r == 0);
3548         r = regcomp(&blob_end_re, "\\[(SEC|DLM)=[^]]+\\][ \t]*$", REG_EXTENDED);
3549         assert(r == 0);
3550         initialised_res = 1;
3551     }
3552 
3553     /* step 1 is to decode any RFC 2047 MIME encoding of the header
3554      * field, but we assume that has already happened */
3555 
3556     /* step 2 is to eliminate all "Re:"-like tokens and [] blobs
3557      * at the start, and AusGov [] blobs at the end */
3558     while (buf_replace_one_re(s, &relike_token_re, NULL) ||
3559            buf_replace_one_re(s, &blob_start_re, NULL) ||
3560            buf_replace_one_re(s, &blob_end_re, NULL))
3561         ;
3562 
3563     /* step 3 is eliminating whitespace. */
3564     buf_replace_all_re(s, &whitespace_re, NULL);
3565 }
3566 
extract_convsubject(const struct index_record * record,struct buf * msubject,void (* normalise)(struct buf *))3567 static void extract_convsubject(const struct index_record *record,
3568                                 struct buf *msubject,
3569                                 void (*normalise)(struct buf*))
3570 {
3571     if (cacheitem_base(record, CACHE_HEADERS)) {
3572         message1_get_subject(record, msubject);
3573         normalise(msubject);
3574     }
3575 }
3576 
message_extract_convsubject(const struct index_record * record)3577 EXPORTED char *message_extract_convsubject(const struct index_record *record)
3578 {
3579     if (cacheitem_base(record, CACHE_HEADERS)) {
3580         struct buf msubject = BUF_INITIALIZER;
3581         extract_convsubject(record, &msubject, conversation_normalise_subject);
3582         return buf_release(&msubject);
3583     }
3584     return NULL;
3585 }
3586 
3587 /*
3588  * Update the conversations database for the given
3589  * mailbox, to account for the given message.
3590  * @body may be NULL, in which case we get everything
3591  * we need out of the cache item in @record.
3592  */
message_update_conversations(struct conversations_state * state,struct mailbox * mailbox,struct index_record * record,conversation_t ** convp)3593 EXPORTED int message_update_conversations(struct conversations_state *state,
3594                                           struct mailbox *mailbox,
3595                                           struct index_record *record,
3596                                           conversation_t **convp)
3597 {
3598     char *hdrs[4];
3599     char *c_refs = NULL, *c_env = NULL, *c_me_msgid = NULL;
3600     strarray_t msgidlist = STRARRAY_INITIALIZER;
3601     arrayu64_t matchlist = ARRAYU64_INITIALIZER;
3602     arrayu64_t cids = ARRAYU64_INITIALIZER;
3603     int mustkeep = 0;
3604     conversation_t *conv = NULL;
3605     char *msubj = NULL;
3606     char *msubj_oldstyle = NULL;
3607     int i;
3608     int j;
3609     int r = 0;
3610     struct mailbox *local_mailbox = NULL;
3611 
3612     /*
3613      * Gather all the msgids mentioned in the message, starting with
3614      * the oldest message in the References: header, then any mesgids
3615      * mentioned in the In-Reply-To: header, and finally the message's
3616      * own Message-Id:.  In general this will result in duplicates (a
3617      * correct References: header will contain as its last entry the
3618      * msgid in In-Reply-To:), so we weed those out before proceeding
3619      * to the database.
3620      */
3621     if (cacheitem_base(record, CACHE_HEADERS)) {
3622         /* we have cache loaded, get what we need there */
3623         strarray_t want = STRARRAY_INITIALIZER;
3624         char *envtokens[NUMENVTOKENS];
3625 
3626         /* get References from cached headers */
3627         c_refs = xstrndup(cacheitem_base(record, CACHE_HEADERS),
3628                           cacheitem_size(record, CACHE_HEADERS));
3629         strarray_append(&want, "references");
3630         message_pruneheader(c_refs, &want, 0);
3631         hdrs[0] = c_refs;
3632 
3633         /* get In-Reply-To, Message-ID out of the envelope
3634          *
3635          * get a working copy; strip outer ()'s
3636          * +1 -> skip the leading paren
3637          * -2 -> don't include the size of the outer parens
3638          */
3639         c_env = xstrndup(cacheitem_base(record, CACHE_ENVELOPE) + 1,
3640                          cacheitem_size(record, CACHE_ENVELOPE) - 2);
3641         parse_cached_envelope(c_env, envtokens, NUMENVTOKENS);
3642         hdrs[1] = envtokens[ENV_INREPLYTO];
3643         hdrs[2] = envtokens[ENV_MSGID];
3644 
3645         /* get X-ME-Message-ID from cached headers */
3646         c_me_msgid = xstrndup(cacheitem_base(record, CACHE_HEADERS),
3647                               cacheitem_size(record, CACHE_HEADERS));
3648         strarray_set(&want, 0, "x-me-message-id");
3649         message_pruneheader(c_me_msgid, &want, 0);
3650         hdrs[3] = c_me_msgid;
3651 
3652         strarray_fini(&want);
3653 
3654         /* work around stupid message_guid API */
3655         message_guid_isnull(&record->guid);
3656     }
3657     else {
3658         /* nope, now we're screwed */
3659         return IMAP_INTERNAL;
3660     }
3661 
3662     if (!is_valid_rfc2822_inreplyto(hdrs[1]))
3663         hdrs[1] = NULL;
3664 
3665     /* Note that a NULL subject, e.g. due to a missing Subject: header
3666      * field in the original message, is normalised to "" not NULL */
3667     if (cacheitem_base(record, CACHE_HEADERS)) {
3668         struct buf msubject = BUF_INITIALIZER;
3669         extract_convsubject(record, &msubject, conversation_normalise_subject);
3670         msubj = xstrdup(buf_cstring(&msubject));
3671         buf_reset(&msubject);
3672         extract_convsubject(record, &msubject, oldstyle_normalise_subject);
3673         msubj_oldstyle = buf_release(&msubject);
3674     }
3675 
3676     for (i = 0 ; i < 4 ; i++) {
3677         int hcount = 0;
3678         char *msgid = NULL;
3679         while ((msgid = find_msgid(hdrs[i], &hdrs[i])) != NULL) {
3680             hcount++;
3681             if (hcount > 20) {
3682                 free(msgid);
3683                 syslog(LOG_DEBUG, "too many references, skipping the rest");
3684                 break;
3685             }
3686             /*
3687              * The issue of case sensitivity of msgids is curious.
3688              * RFC 2822 seems to imply they're case-insensitive,
3689              * without explicitly stating so.  So here we punt
3690              * on that being the case.
3691              *
3692              * Note that the THREAD command elsewhere in Cyrus
3693              * assumes otherwise.
3694              */
3695             msgid = lcase(msgid);
3696 
3697             /* already seen this one? */
3698             if (strarray_find(&msgidlist, msgid, 0) >= 0) {
3699                 free(msgid);
3700                 continue;
3701             }
3702 
3703             /* won't be accepted as valid, ignore it! */
3704             if (conversations_check_msgid(msgid, strlen(msgid))) {
3705                 free(msgid);
3706                 continue;
3707             }
3708 
3709             strarray_appendm(&msgidlist, msgid);
3710 
3711             /* Lookup the conversations database to work out which
3712              * conversation ids that message belongs to. */
3713             r = conversations_get_msgid(state, msgid, &cids);
3714             if (r) goto out;
3715 
3716             for (j = 0; j < cids.count; j++) {
3717                 conversation_id_t cid = arrayu64_nth(&cids, j);
3718                 conversation_free(conv);
3719                 conv = NULL;
3720                 r = conversation_load(state, cid, &conv);
3721                 if (r) goto out;
3722                 /* [IRIS-1576] if X-ME-Message-ID says the messages are
3723                 * linked, ignore any difference in Subject: header fields. */
3724                 if (!conv || i == 3 || !conv->subject ||
3725                         !strcmpsafe(conv->subject, msubj) ||
3726                         !strcmpsafe(conv->subject, msubj_oldstyle)) {
3727                     arrayu64_add(&matchlist, cid);
3728                 }
3729             }
3730 
3731             conversation_free(conv);
3732             conv = NULL;
3733         }
3734     }
3735 
3736     /* calculate the CID if needed */
3737     if (!record->silentupdate) {
3738         /* match for GUID, it always has the same CID */
3739         conversation_id_t currentcid = conversations_guid_cid_lookup(state, message_guid_encode(&record->guid));
3740         if (currentcid) {
3741             // would love to have this, but might hit bogus broken existing data...
3742             // assert(record->cid == 0 || record->cid == currentcid);
3743             record->cid = currentcid;
3744             mustkeep = 1;
3745         }
3746         if (!record->cid) record->cid = arrayu64_max(&matchlist);
3747         if (!record->cid) {
3748             record->cid = generate_conversation_id(record);
3749             if (record->cid) mustkeep = 1;
3750         }
3751         if (!mustkeep && !record->basecid) {
3752             /* try finding a CID in the match list, or if we came in with it */
3753             struct buf annotkey = BUF_INITIALIZER;
3754             struct buf annotval = BUF_INITIALIZER;
3755             buf_printf(&annotkey, "%snewcid/%016llx", IMAP_ANNOT_NS, record->cid);
3756             r = annotatemore_lookup(state->annotmboxname, buf_cstring(&annotkey), "", &annotval);
3757             if (annotval.len == 16) {
3758                 const char *p = buf_cstring(&annotval);
3759                 /* we have a new canonical CID */
3760                 record->basecid = record->cid;
3761                 r = parsehex(p, &p, 16, &record->cid);
3762             }
3763             else {
3764                 r = 0; /* we're just going to pretend this wasn't found, worst case we split
3765                         * more than we should */
3766             }
3767             buf_free(&annotkey);
3768             buf_free(&annotval);
3769             if (r) goto out;
3770         }
3771     }
3772 
3773     if (!record->cid) goto out;
3774     if (!record->basecid) record->basecid = record->cid;
3775 
3776     r = conversation_load(state, record->cid, &conv);
3777     if (r) goto out;
3778 
3779     if (!conv) conv = conversation_new();
3780 
3781     uint32_t max_thread = config_getint(IMAPOPT_CONVERSATIONS_MAX_THREAD);
3782     if (conv->exists >= max_thread && !mustkeep && !record->silentupdate) {
3783         /* time to reset the conversation */
3784         conversation_id_t was = record->cid;
3785         record->cid = generate_conversation_id(record);
3786 
3787         syslog(LOG_NOTICE, "splitting conversation for %s %u base:%016llx was:%016llx now:%016llx",
3788                mailbox->name, record->uid, record->basecid, was, record->cid);
3789 
3790         if (!record->basecid) record->basecid = was;
3791 
3792         conversation_free(conv);
3793         r = conversation_load(state, record->cid, &conv);
3794         if (r) goto out;
3795         if (!conv) conv = conversation_new();
3796 
3797         /* and update the pointer for next time */
3798         if (strcmpsafe(state->annotmboxname, mailbox->name)) {
3799             r = getconvmailbox(state->annotmboxname, &local_mailbox);
3800             if (r) goto out;
3801             mailbox = local_mailbox;
3802         }
3803 
3804         struct annotate_state *astate = NULL;
3805         r = mailbox_get_annotate_state(mailbox, 0, &astate);
3806         if (r) goto out;
3807 
3808         struct buf annotkey = BUF_INITIALIZER;
3809         struct buf annotval = BUF_INITIALIZER;
3810         buf_printf(&annotkey, "%snewcid/%016llx", IMAP_ANNOT_NS, record->basecid);
3811         buf_printf(&annotval, "%016llx", record->cid);
3812         r = annotate_state_write(astate, buf_cstring(&annotkey), "", &annotval);
3813         buf_free(&annotkey);
3814         buf_free(&annotval);
3815         if (r) goto out;
3816     }
3817 
3818     /* Create the subject header if not already set and this isn't a Draft */
3819     if (!conv->subject && !(record->system_flags & FLAG_DRAFT))
3820         conv->subject = xstrdupnull(msubj);
3821 
3822     /*
3823      * Update the database to add records for all the message-ids
3824      * not already mentioned.  Note that add_msgid does the right
3825      * thing[tm] when the cid already exists.
3826      */
3827 
3828     for (i = 0 ; i < msgidlist.count ; i++) {
3829         r = conversations_add_msgid(state, strarray_nth(&msgidlist, i), record->basecid);
3830         if (r) goto out;
3831     }
3832 
3833     /* mark that it's split so basecid gets saved */
3834     if (record->basecid != record->cid)
3835         record->internal_flags |= FLAG_INTERNAL_SPLITCONVERSATION;
3836 
3837 out:
3838     strarray_fini(&msgidlist);
3839     arrayu64_fini(&matchlist);
3840     arrayu64_fini(&cids);
3841     free(c_refs);
3842     free(c_env);
3843     free(c_me_msgid);
3844     free(msubj);
3845     free(msubj_oldstyle);
3846     if (local_mailbox)
3847         mailbox_close(&local_mailbox);
3848 
3849     if (r)
3850         conversation_free(conv);
3851     else if (convp)
3852         *convp = conv;
3853     else {
3854         r = conversation_save(state, record->cid, conv);
3855         conversation_free(conv);
3856     }
3857 
3858     return r;
3859 }
3860 
3861 
3862 /*
3863   Format of the CACHE_SECTION cache item is a binary encoding
3864   tree of MIME sections.  In something like rpcgen notation
3865   (see RFC 4506):
3866 
3867     struct part {
3868         uint32_t header_offset;
3869         uint32_t header_size;
3870         uint32_t content_offset;
3871         uint32_t content_size;
3872 
3873         uint32_t encoding & 0x100 & (len << 16)
3874                  length of charset identifier in bytes (=len)
3875         uint8_t[len] charset identifier
3876     };
3877 
3878     struct section {
3879         unsigned int numparts;
3880         struct part parts[numparts];
3881         struct section[numparts-1];
3882     };
3883 */
3884 
3885 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
3886 
message_new(void)3887 EXPORTED message_t *message_new(void)
3888 {
3889     message_t *m = xzmalloc(sizeof(*m));
3890 
3891     m->refcount = 1;
3892 
3893     return m;
3894 }
3895 
message_free(message_t * m)3896 static void message_free(message_t *m)
3897 {
3898     assert(m->refcount == 0);
3899 
3900     message_yield(m, M_ALL);
3901 
3902     free(m);
3903 }
3904 
message_set_from_data(const char * base,size_t len,message_t * m)3905 EXPORTED void message_set_from_data(const char *base, size_t len, message_t *m)
3906 {
3907     assert(m->refcount == 1);
3908     message_yield(m, M_ALL);
3909     memset(m, 0, sizeof(message_t));
3910     buf_init_ro(&m->map, base, len);
3911     m->have = m->given = M_MAP;
3912     m->refcount = 1;
3913 }
3914 
message_new_from_data(const char * base,size_t len)3915 EXPORTED message_t *message_new_from_data(const char *base, size_t len)
3916 {
3917     message_t *m = message_new();
3918     buf_init_ro(&m->map, base, len);
3919     m->have = m->given = M_MAP;
3920     return m;
3921 }
3922 
message_set_from_mailbox(struct mailbox * mailbox,unsigned int recno,message_t * m)3923 EXPORTED void message_set_from_mailbox(struct mailbox *mailbox, unsigned int recno, message_t *m)
3924 {
3925     assert(m->refcount == 1);
3926     message_yield(m, M_ALL);
3927     memset(m, 0, sizeof(message_t));
3928     m->mailbox = mailbox;
3929     m->record.recno = recno;
3930     m->have = m->given = M_MAILBOX;
3931     m->refcount = 1;
3932 }
3933 
message_new_from_mailbox(struct mailbox * mailbox,unsigned int recno)3934 EXPORTED message_t *message_new_from_mailbox(struct mailbox *mailbox, unsigned int recno)
3935 {
3936     message_t *m = message_new();
3937     m->mailbox = mailbox;
3938     m->record.recno = recno;
3939     m->have = m->given = M_MAILBOX;
3940     return m;
3941 }
3942 
message_set_from_record(struct mailbox * mailbox,const struct index_record * record,message_t * m)3943 EXPORTED void message_set_from_record(struct mailbox *mailbox,
3944                                       const struct index_record *record,
3945                                       message_t *m)
3946 {
3947     assert(m->refcount == 1);
3948     message_yield(m, M_ALL);
3949     memset(m, 0, sizeof(message_t));
3950     assert(record->uid > 0);
3951     m->mailbox = mailbox;
3952     m->record = *record;
3953     m->have = m->given = M_MAILBOX|M_RECORD|M_UID;
3954     m->refcount = 1;
3955 }
3956 
message_new_from_record(struct mailbox * mailbox,const struct index_record * record)3957 EXPORTED message_t *message_new_from_record(struct mailbox *mailbox,
3958                                             const struct index_record *record)
3959 {
3960     message_t *m = message_new();
3961     assert(record->uid > 0);
3962     m->mailbox = mailbox;
3963     m->record = *record;
3964     m->have = m->given = M_MAILBOX|M_RECORD|M_UID;
3965     return m;
3966 }
3967 
message_set_from_index(struct mailbox * mailbox,const struct index_record * record,uint32_t msgno,uint32_t indexflags,message_t * m)3968 EXPORTED void message_set_from_index(struct mailbox *mailbox,
3969                                      const struct index_record *record,
3970                                      uint32_t msgno,
3971                                      uint32_t indexflags,
3972                                      message_t *m)
3973 {
3974     assert(m->refcount == 1);
3975     message_yield(m, M_ALL);
3976     memset(m, 0, sizeof(message_t));
3977     assert(record->uid > 0);
3978     m->mailbox = mailbox;
3979     m->record = *record;
3980     m->msgno = msgno;
3981     m->indexflags = indexflags;
3982     m->have = m->given = M_MAILBOX|M_RECORD|M_UID|M_INDEX;
3983     m->refcount = 1;
3984 }
3985 
message_new_from_index(struct mailbox * mailbox,const struct index_record * record,uint32_t msgno,uint32_t indexflags)3986 EXPORTED message_t *message_new_from_index(struct mailbox *mailbox,
3987                                            const struct index_record *record,
3988                                            uint32_t msgno,
3989                                            uint32_t indexflags)
3990 {
3991     message_t *m = message_new();
3992     assert(record->uid > 0);
3993     m->mailbox = mailbox;
3994     m->record = *record;
3995     m->msgno = msgno;
3996     m->indexflags = indexflags;
3997     m->have = m->given = M_MAILBOX|M_RECORD|M_UID|M_INDEX;
3998     return m;
3999 }
4000 
message_new_from_filename(const char * filename)4001 EXPORTED message_t *message_new_from_filename(const char *filename)
4002 {
4003     message_t *m = message_new();
4004     m->filename = xstrdup(filename);
4005     m->have = m->given = M_FILENAME;
4006     return m;
4007 }
4008 
message_ref(message_t * m)4009 EXPORTED message_t *message_ref(message_t *m)
4010 {
4011     m->refcount++;
4012     assert(m->refcount >= 1);
4013     return m;
4014 }
4015 
message_unref(message_t ** mp)4016 EXPORTED void message_unref(message_t **mp)
4017 {
4018     message_t *m;
4019 
4020     if (!mp || !(m = *mp)) return;
4021     assert(m->refcount >= 1);
4022     if (--m->refcount == 0)
4023         message_free(m);
4024     *mp = NULL;
4025 }
4026 
4027 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
4028 
4029 /*
4030  * Open or create resources which we need but do not yet have.
4031  */
message_need(const message_t * cm,unsigned int need)4032 static int message_need(const message_t *cm, unsigned int need)
4033 {
4034 #define is_missing(flags)    ((need & ~(m->have)) & (flags))
4035 #define found(flags)         (m->have |= (flags))
4036     int r = 0;
4037     message_t *m = (message_t *)cm;
4038 
4039     if (!is_missing(M_ALL))
4040         return 0;       /* easy, we already have it */
4041 
4042     if (is_missing(M_MAILBOX)) {
4043         /* We can't get this for ourselves,
4044          * it needs to be passed in by the caller */
4045         return IMAP_NOTFOUND;
4046     }
4047 
4048     if (is_missing(M_FILENAME)) {
4049         const char *filename;
4050         r = message_need(m, M_MAILBOX|M_RECORD);
4051         if (r) return r;
4052         filename = mailbox_record_fname(m->mailbox, &m->record);
4053         if (!filename) return IMAP_NOTFOUND;
4054         m->filename = xstrdup(filename);
4055         found(M_FILENAME);
4056     }
4057 
4058     if (is_missing(M_RECORD|M_UID)) {
4059         r = message_need(m, M_MAILBOX);
4060         if (r) return r;
4061         r = mailbox_reload_index_record(m->mailbox, &m->record);
4062         if (r) return r;
4063         found(M_RECORD|M_UID);
4064     }
4065 
4066     if (is_missing(M_MAP)) {
4067         r = message_need(m, M_FILENAME);
4068         if (r) return r;
4069         r = message_map_file(m, m->filename);
4070         if (r) return r;
4071         found(M_MAP);
4072     }
4073 
4074     if (is_missing(M_CACHE)) {
4075         r = message_need(m, M_MAILBOX|M_RECORD);
4076         if (r) return r;
4077         r = mailbox_cacherecord(m->mailbox, &m->record);
4078         if (r) return r;
4079         found(M_CACHE);
4080     }
4081 
4082     if (is_missing(M_CACHEBODY)) {
4083         if (message_need(m, M_CACHE) == 0) {
4084             r = message_parse_cbodystructure(m);
4085             if (r) return r;
4086             found(M_CACHEBODY);
4087         }
4088         else
4089             return message_need(m, M_FULLBODY);
4090     }
4091 
4092     if (is_missing(M_FULLBODY)) {
4093         r = message_need(m, M_MAP);
4094         if (r) return r;
4095         m->body = (struct body *)xzmalloc(sizeof(struct body));
4096         r = message_parse_mapped(m->map.s, m->map.len, m->body, NULL);
4097         if (r) return r;
4098         found(M_CACHEBODY|M_FULLBODY);
4099     }
4100 
4101     /* Check that we got everything we asked for and could get */
4102     assert(!is_missing(M_ALL));
4103 
4104     return 0;
4105 #undef found
4106 #undef is_missing
4107 }
4108 
4109 /*
4110  * Yield open resources.
4111  */
message_yield(message_t * m,unsigned int yield)4112 static void message_yield(message_t *m, unsigned int yield)
4113 {
4114     /* Can only yield those resources we have. */
4115     yield &= m->have;
4116 
4117     /* Do not yield resources we were given at initialisation
4118      * time, they cannot be rebuilt again later. */
4119     yield &= ~m->given;
4120 
4121     /* nothing to free for these - they're not constructed
4122      * or have no dynamically allocated memory */
4123     yield &= ~(M_MAILBOX|M_RECORD|M_UID|M_CACHE);
4124 
4125     if ((yield & M_MAP)) {
4126         buf_free(&m->map);
4127         m->have &= ~M_MAP;
4128     }
4129 
4130     if ((yield & M_BODY)) {
4131         message_free_body(m->body);
4132         free(m->body);
4133         m->body = NULL;
4134         m->have &= ~M_BODY;
4135     }
4136 
4137     if ((yield & M_FILENAME)) {
4138         free(m->filename);
4139         m->filename = NULL;
4140         m->have &= ~M_FILENAME;
4141     }
4142 
4143     /* Check we yielded everything we could */
4144     assert((yield & m->have) == 0);
4145 }
4146 
4147 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
4148 
4149 /*
4150  * Parse various information out of the cyrus.cache.
4151  */
4152 
4153 /*
4154  * Skip either a single NIL or a balanced possibly-nested list of
4155  * nstrings.  Useful for ignoring various constructs from the
4156  * BODYSTRUCTURE cache.
4157  */
skip_nil_or_nstring_list(struct protstream * prot)4158 static int skip_nil_or_nstring_list(struct protstream *prot)
4159 {
4160     int r = IMAP_MAILBOX_BADFORMAT;
4161     int c;
4162     struct buf word = BUF_INITIALIZER;
4163 
4164     c = prot_getc(prot);
4165     if (c == EOF)
4166         goto out;   /* ran out of data */
4167     if (c == '(') {
4168         /* possibly-nested list of atoms */
4169         int treedepth = 1;
4170         do {
4171             c = prot_getc(prot);
4172             if (c == ' ')
4173                 c = prot_getc(prot);
4174             if (c != ')' && c != '(') {
4175                 prot_ungetc(c, prot);
4176                 c = getnstring(prot, NULL, &word);
4177 #if DEBUG
4178                 if (word.len)
4179                     fprintf(stderr, "%sskipping string \"%s\" at %d\n",
4180                             indent(depth), word.s, treedepth);
4181 #endif
4182             }
4183             if (c == '(')
4184                 treedepth++;
4185             else if (c == ')')
4186                 treedepth--;
4187             else if (c == ' ')
4188                 prot_ungetc(c, prot);
4189             else
4190                 goto out;
4191         } while (treedepth);
4192         c = prot_getc(prot);
4193         if (c != ' ') goto out;
4194         r = 0;
4195     }
4196     else {
4197         prot_ungetc(c, prot);
4198         c = getnstring(prot, NULL, &word);
4199         if (c == ' ' && !word.len) {
4200             /* 'NIL' */
4201 #if DEBUG
4202             fprintf(stderr, "%sskipping NIL\n", indent(depth));
4203 #endif
4204             r = 0;
4205             goto out;
4206         }
4207     }
4208     /* else, error */
4209 
4210 out:
4211     buf_free(&word);
4212     return r;
4213 }
4214 
parse_mime_params(struct protstream * prot,struct param ** prev)4215 static int parse_mime_params(struct protstream *prot, struct param **prev)
4216 {
4217     int c;
4218     struct buf key = BUF_INITIALIZER;
4219     struct buf val = BUF_INITIALIZER;
4220     struct param *param;
4221 
4222     c = prot_getc(prot);
4223     if (c != '(') {
4224         /* must be NIL */
4225         if (c != 'N') goto err;
4226         c = prot_getc(prot);
4227         if (c != 'I') goto err;
4228         c = prot_getc(prot);
4229         if (c != 'L') goto err;
4230         return prot_getc(prot);
4231     }
4232 
4233     /* otherwise we have a list */
4234     do {
4235         c = getnstring(prot, NULL, &key);
4236         if (c != ' ') goto err;
4237         c = getnstring(prot, NULL, &val);
4238         if (c != ' ' && c != ')') goto err;
4239         param = (struct param *)xzmalloc(sizeof(struct param));
4240         param->attribute = buf_releasenull(&key);
4241         param->value = buf_releasenull(&val);
4242         *prev = param;
4243         prev = &param->next;
4244     } while (c == ' ');
4245 
4246     return prot_getc(prot);
4247 
4248 err:
4249     buf_free(&key);
4250     buf_free(&val);
4251     return EOF;
4252 }
4253 
parse_bodystructure_part(struct protstream * prot,struct body * body,const char * part_id)4254 static int parse_bodystructure_part(struct protstream *prot, struct body *body, const char *part_id)
4255 {
4256     int c;
4257     int r = 0;
4258     struct buf buf = BUF_INITIALIZER;
4259 
4260     memset(body, 0, sizeof(struct body));
4261 
4262     c = prot_getc(prot);
4263     if (c != '(') {
4264 badformat:
4265         r = IMAP_MAILBOX_BADFORMAT;
4266         goto out;
4267     }
4268 
4269     c = prot_getc(prot);
4270     prot_ungetc(c, prot);
4271     if (c == '(') {
4272         while (c == '(') {
4273             body->numparts++;
4274             body->subpart = (struct body *)xrealloc((char *)body->subpart,
4275                                           body->numparts*sizeof(struct body));
4276 
4277             buf_reset(&buf);
4278             if (part_id) buf_printf(&buf, "%s.", part_id);
4279             buf_printf(&buf, "%d", body->numparts);
4280             char *part_id = buf_release(&buf);
4281             struct body *subbody = &body->subpart[body->numparts-1];
4282             r = parse_bodystructure_part(prot, subbody, part_id);
4283             subbody->part_id = part_id;
4284             if (r) goto out;
4285 
4286             c = prot_getc(prot);
4287             prot_ungetc(c, prot);
4288         }
4289 
4290         c = prot_getc(prot);
4291         if (c != ' ') goto badformat;
4292 
4293         body->type = xstrdup("MULTIPART");
4294     }
4295     else {
4296         /* parse mime-type */
4297         c = getnstring(prot, NULL, &buf);
4298         if (c != ' ') goto badformat;
4299 
4300         body->type = buf_releasenull(&buf);
4301     }
4302 
4303     /* parse mime-subtype */
4304     c = getnstring(prot, NULL, &buf);
4305     if (c != ' ') goto badformat;
4306     body->subtype = buf_releasenull(&buf);
4307 
4308     /* parse mime-params */
4309     c = parse_mime_params(prot, &body->params);
4310     if (c != ' ') goto badformat;
4311 
4312     if (strcmp(body->type, "MULTIPART")) {
4313         /* msgid */
4314         c = getnstring(prot, NULL, &buf);
4315         if (c != ' ') goto badformat;
4316         body->message_id = buf_releasenull(&buf);
4317 
4318         /* description */
4319         c = getnstring(prot, NULL, &buf);
4320         if (c != ' ') goto badformat;
4321         body->description = buf_releasenull(&buf);
4322 
4323         /* encoding */
4324         c = getnstring(prot, NULL, &buf);
4325         if (c != ' ') goto badformat;
4326         body->encoding = buf_releasenull(&buf);
4327 
4328         /* content-size */
4329         c = getword(prot, &buf);
4330         if (c != ' ') goto badformat;
4331         body->content_size = atoi(buf_cstring(&buf));
4332 
4333         if (!strcmpsafe(body->type, "TEXT")) {
4334             /* parse content-lines */
4335             c = getword(prot, &buf);
4336             if (c != ' ') goto badformat;
4337             body->content_lines = atoi(buf_cstring(&buf));
4338         }
4339 
4340         else if (!strcmpsafe(body->type, "MESSAGE") &&
4341                  !strcmpsafe(body->subtype, "RFC822")) {
4342             body->numparts = 1;
4343             body->subpart = xzmalloc(sizeof(struct body));
4344 
4345             /* skip envelope */
4346             r = skip_nil_or_nstring_list(prot);
4347             if (r) goto out;
4348 
4349             /* process body */
4350             r = parse_bodystructure_part(prot, body->subpart, part_id);
4351             if (r) goto out;
4352 
4353             /* skip trailing space (parse_bs_part doesn't eat it) */
4354             c = prot_getc(prot);
4355             if (c != ' ') goto badformat;
4356 
4357             /* parse content-lines */
4358             c = getword(prot, &buf);
4359             if (c != ' ') goto badformat;
4360             body->content_lines = atoi(buf_cstring(&buf));
4361         }
4362 
4363         /* parse md5sum */
4364         c = getnstring(prot, NULL, &buf);
4365         if (c != ' ') goto badformat;
4366         body->md5 = buf_releasenull(&buf);
4367     }
4368 
4369     /* skips disposition-and-params */
4370     r = skip_nil_or_nstring_list(prot);
4371     if (r) goto out;
4372 
4373     /* parse languages */  /* TODO */
4374     r = skip_nil_or_nstring_list(prot);
4375     if (r) goto out;
4376 
4377     /* location */
4378     c = getnstring(prot, NULL, &buf);
4379     if (c != ')') goto badformat; /* final field */
4380     body->location = buf_releasenull(&buf);
4381 
4382     r = 0;
4383 out:
4384     buf_free(&buf);
4385     return r;
4386 }
4387 
parse_bodystructure_sections(const char ** cachestrp,const char * cacheend,struct body * body,uint32_t cache_version,const char * part_id)4388 static int parse_bodystructure_sections(const char **cachestrp, const char *cacheend,
4389                                         struct body *body, uint32_t cache_version,
4390                                         const char *part_id)
4391 {
4392     struct body *this;
4393     int nsubparts;
4394     int part;
4395     uint32_t cte;
4396     struct buf buf = BUF_INITIALIZER;
4397     int r = 0;
4398 
4399     if (*cachestrp + 4 > cacheend) {
4400         r = IMAP_MAILBOX_BADFORMAT;
4401         goto done;
4402     }
4403 
4404     nsubparts = CACHE_ITEM_BIT32(*cachestrp);
4405     *cachestrp += 4;
4406 
4407     /* XXX - this size needs increasing for charset sizes and sha1s depending on version,
4408      * it won't crash, but it may overrun while reading */
4409     if (*cachestrp + 4*5*nsubparts > cacheend) {
4410         r = IMAP_MAILBOX_BADFORMAT;
4411         goto done;
4412     }
4413 
4414     if (strcmp(body->type, "MESSAGE") == 0
4415         && strcmp(body->subtype, "RFC822") == 0) {
4416 
4417         if (strcmp(body->subpart->type, "MULTIPART") == 0) {
4418 
4419             /*
4420              * Part 0 of a message/rfc822 is the message header/text.
4421              * Nested parts of a message/rfc822 containing a multipart
4422              * are the sub-parts of the multipart.
4423              */
4424             if (body->subpart->numparts + 1 != nsubparts) {
4425                 r = IMAP_MAILBOX_BADFORMAT;
4426                 goto done;
4427             }
4428 
4429             body->subpart->header_offset = CACHE_ITEM_BIT32(*cachestrp+0*4);
4430             body->subpart->header_size = CACHE_ITEM_BIT32(*cachestrp+1*4);
4431             body->subpart->content_offset = CACHE_ITEM_BIT32(*cachestrp+2*4);
4432             body->subpart->content_size = CACHE_ITEM_BIT32(*cachestrp+3*4);
4433             // skip cte
4434             *cachestrp += 5*4;
4435 
4436             if (cache_version >= 5)
4437                 *cachestrp = message_guid_import(&body->subpart->content_guid, *cachestrp);
4438 
4439             if (cache_version >= 8) {
4440                 body->subpart->decoded_content_size = CACHE_ITEM_BIT32(*cachestrp);
4441                 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4442             }
4443 
4444             if (cache_version >= 9) {
4445                 body->subpart->content_lines = CACHE_ITEM_BIT32(*cachestrp);
4446                 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4447             }
4448 
4449             for (part = 0; part < body->subpart->numparts; part++) {
4450                 this = &body->subpart->subpart[part];
4451                 this->header_offset = CACHE_ITEM_BIT32(*cachestrp+0*4);
4452                 this->header_size = CACHE_ITEM_BIT32(*cachestrp+1*4);
4453                 this->content_offset = CACHE_ITEM_BIT32(*cachestrp+2*4);
4454                 this->content_size = CACHE_ITEM_BIT32(*cachestrp+3*4);
4455                 cte = CACHE_ITEM_BIT32(*cachestrp+4*4);
4456                 *cachestrp += 5*4;
4457 
4458                 /* XXX CACHE_MINOR_VERSION 4 replaces numeric charset
4459                  * identifiers with variable-length strings. Remove
4460                  * this conditional once cache versions <= 3 are
4461                  * deprecated */
4462                 if (cache_version >= 4)
4463                     *cachestrp += (cte >> 16) & 0xffff;
4464 
4465                 /* CACHE_MINOR_VERSION 5 adds a sha1 after the charset */
4466                 if (cache_version >= 5)
4467                     *cachestrp = message_guid_import(&this->content_guid, *cachestrp);
4468 
4469                 /* CACHE_MINOR_VERSION 8 adds the decoded content size after sha1 */
4470                 if (cache_version >= 8) {
4471                     this->decoded_content_size = CACHE_ITEM_BIT32(*cachestrp);
4472                     *cachestrp += CACHE_ITEM_SIZE_SKIP;
4473                 }
4474 
4475                 /* CACHE_MINOR_VERSION 9 adds the number of content lines after the decoded size */
4476                 if (cache_version >= 9) {
4477                     this->content_lines = CACHE_ITEM_BIT32(*cachestrp);
4478                     *cachestrp += CACHE_ITEM_SIZE_SKIP;
4479                 }
4480             }
4481 
4482             /* and parse subparts */
4483             for (part = 0; part < body->subpart->numparts; part++) {
4484                 this = &body->subpart->subpart[part];
4485                 buf_reset(&buf);
4486                 if (part_id) buf_printf(&buf, "%s.", part_id);
4487                 buf_printf(&buf, "%d", part + 1);
4488                 if (parse_bodystructure_sections(cachestrp, cacheend, this, cache_version, buf_cstring(&buf))) {
4489                     r = IMAP_MAILBOX_BADFORMAT;
4490                     goto done;
4491                 }
4492             }
4493         }
4494         else {
4495             /*
4496              * Part 0 of a message/rfc822 is the message header/text.
4497              * Part 1 of a message/rfc822 containing a non-multipart
4498              * is the message body.
4499              */
4500 
4501             if (2 != nsubparts) {
4502                 r = IMAP_MAILBOX_BADFORMAT;
4503                 goto done;
4504             }
4505 
4506             /* data is the same in body, just grab the first one */
4507             body->subpart->header_offset = CACHE_ITEM_BIT32(*cachestrp+0*4);
4508             body->subpart->header_size = CACHE_ITEM_BIT32(*cachestrp+1*4);
4509             body->subpart->content_offset = CACHE_ITEM_BIT32(*cachestrp+2*4);
4510             body->subpart->content_size = CACHE_ITEM_BIT32(*cachestrp+3*4);
4511             // skip cte
4512             *cachestrp += 5*4;
4513             if (cache_version >= 5)
4514                 *cachestrp += MESSAGE_GUID_SIZE;
4515             if (cache_version >= 8)
4516                 *cachestrp += 1*4;
4517             if (cache_version >= 9)
4518                 *cachestrp += 1*4;
4519             *cachestrp += 4*4;
4520 
4521             if (strcmp(body->subpart->type, "MULTIPART") == 0) {
4522                 /* Treat 0-part multipart as 0-length text */
4523                 *cachestrp += 1*4;
4524             }
4525             else {
4526                 /* Skip charset/encoding identifiers. */
4527                 cte = CACHE_ITEM_BIT32(*cachestrp);
4528                 *cachestrp += 1*4;
4529                 /* XXX CACHE_MINOR_VERSION 4 replaces numeric charset
4530                  * identifiers with variable-length strings. Remove
4531                  * this conditional once cache versions <= 3 are
4532                  * deprecated */
4533                 if (cache_version >= 4)
4534                     *cachestrp += (cte >> 16) & 0xffff;
4535 
4536                 if (!body->subpart->part_id) {
4537                     buf_reset(&buf);
4538                     if (part_id) buf_printf(&buf, "%s.", part_id);
4539                     buf_printf(&buf, "%d", 1);
4540                     body->subpart->part_id = buf_release(&buf);
4541                 }
4542             }
4543             /* CACHE_MINOR_VERSION 5 adds a sha1 after the charset */
4544             if (cache_version >= 5)
4545                 *cachestrp = message_guid_import(&body->subpart->content_guid, *cachestrp);
4546 
4547             if (cache_version >= 8) {
4548                 body->subpart->decoded_content_size = CACHE_ITEM_BIT32(*cachestrp);
4549                 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4550             }
4551 
4552             if (cache_version >= 9) {
4553                 body->subpart->content_lines = CACHE_ITEM_BIT32(*cachestrp);
4554                 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4555             }
4556 
4557             /* and parse subpart */
4558             if (parse_bodystructure_sections(cachestrp, cacheend, body->subpart, cache_version, body->part_id)) {
4559                 r = IMAP_MAILBOX_BADFORMAT;
4560                 goto done;
4561             }
4562         }
4563     }
4564     else if (body->numparts) {
4565         /*
4566          * Cannot fetch part 0 of a multipart.
4567          * Nested parts of a multipart are the sub-parts.
4568          */
4569         if (body->numparts + 1 != nsubparts) {
4570             r = IMAP_MAILBOX_BADFORMAT;
4571             goto done;
4572         }
4573         *cachestrp += 5*4;
4574         if (cache_version >= 5)
4575             *cachestrp += MESSAGE_GUID_SIZE;
4576         if (cache_version >= 8)
4577             *cachestrp += 4;
4578         if (cache_version >= 9)
4579             *cachestrp += 4;
4580         for (part = 0; part < body->numparts; part++) {
4581             this = &body->subpart[part];
4582             this->header_offset = CACHE_ITEM_BIT32(*cachestrp+0*4);
4583             this->header_size = CACHE_ITEM_BIT32(*cachestrp+1*4);
4584             this->content_offset = CACHE_ITEM_BIT32(*cachestrp+2*4);
4585             this->content_size = CACHE_ITEM_BIT32(*cachestrp+3*4);
4586             cte = CACHE_ITEM_BIT32(*cachestrp+4*4);
4587             *cachestrp += 5*4;
4588 
4589             if (cache_version >= 4)
4590                 *cachestrp += (cte >> 16) & 0xffff;
4591 
4592             if (cache_version >= 5)
4593                 *cachestrp = message_guid_import(&this->content_guid, *cachestrp);
4594 
4595             if (cache_version >= 8) {
4596                 this->decoded_content_size = CACHE_ITEM_BIT32(*cachestrp);
4597                 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4598             }
4599 
4600             if (cache_version >= 9) {
4601                 this->content_lines = CACHE_ITEM_BIT32(*cachestrp);
4602                 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4603             }
4604         }
4605 
4606         for (part = 0; part < body->numparts; part++) {
4607             this = &body->subpart[part];
4608             buf_reset(&buf);
4609             if (part_id) buf_printf(&buf, "%s.", part_id);
4610             buf_printf(&buf, "%d", part + 1);
4611             if (parse_bodystructure_sections(cachestrp, cacheend, this, cache_version, buf_cstring(&buf))) {
4612                 r = IMAP_MAILBOX_BADFORMAT;
4613                 goto done;
4614             }
4615         }
4616     }
4617     else {
4618         /*
4619          * Leaf section--no part 0 or nested parts
4620          */
4621         if (nsubparts != 0) {
4622             r = IMAP_MAILBOX_BADFORMAT;
4623             goto done;
4624         }
4625         if (!body->part_id)
4626             body->part_id = xstrdupnull(part_id);
4627     }
4628 
4629 done:
4630     buf_free(&buf);
4631     return r;
4632 }
4633 
message_parse_cbodystructure(message_t * m)4634 static int message_parse_cbodystructure(message_t *m)
4635 {
4636     struct protstream *prot = NULL;
4637     const char *cachestr = cacheitem_base(&m->record, CACHE_SECTION);
4638     const char *cacheend = cachestr + cacheitem_size(&m->record, CACHE_SECTION);
4639     struct body toplevel;
4640     int r;
4641 
4642     /* We're reading the cache - double check we have it */
4643     assert(m->have & M_CACHE);
4644 
4645     prot = prot_readmap(cacheitem_base(&m->record, CACHE_BODYSTRUCTURE),
4646                         cacheitem_size(&m->record, CACHE_BODYSTRUCTURE));
4647     if (!prot)
4648         return IMAP_MAILBOX_BADFORMAT;
4649     prot_setisclient(prot, 1);  /* don't crash parsing literals */
4650 
4651     m->body = xzmalloc(sizeof(struct body));
4652     r = parse_bodystructure_part(prot, m->body, NULL);
4653     if (r) syslog(LOG_ERR, "IOERROR: parsing body structure for %s %u (%.*s)",
4654                   m->mailbox->name, m->record.uid,
4655                   (int)cacheitem_size(&m->record, CACHE_BODYSTRUCTURE),
4656                   cacheitem_base(&m->record, CACHE_BODYSTRUCTURE));
4657     if (r) goto done;
4658 
4659     memset(&toplevel, 0, sizeof(struct body));
4660     toplevel.type = "MESSAGE";
4661     toplevel.subtype = "RFC822";
4662     toplevel.subpart = m->body;
4663 
4664     r = parse_bodystructure_sections(&cachestr, cacheend, &toplevel, m->record.cache_version, NULL);
4665     if (r) syslog(LOG_ERR, "IOERROR: parsing section structure for %s %u (%.*s)",
4666                   m->mailbox->name, m->record.uid,
4667                   (int)cacheitem_size(&m->record, CACHE_BODYSTRUCTURE),
4668                   cacheitem_base(&m->record, CACHE_BODYSTRUCTURE));
4669 
4670 done:
4671     prot_free(prot);
4672 
4673     return r;
4674 }
4675 
4676 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
4677 
message_map_file(message_t * m,const char * fname)4678 static int message_map_file(message_t *m, const char *fname)
4679 {
4680     int fd;
4681     struct stat sbuf;
4682 
4683     fd = open(fname, O_RDONLY, 0666);
4684     if (fd == -1) return errno;
4685 
4686     if (fstat(fd, &sbuf) == -1) {
4687         syslog(LOG_ERR, "IOERROR: fstat on %s: %m", fname);
4688         fatal("can't fstat message file", EX_OSFILE);
4689     }
4690     if (!S_ISREG(sbuf.st_mode)) {
4691         close(fd);
4692         return EINVAL;
4693     }
4694     buf_free(&m->map);
4695     buf_refresh_mmap(&m->map, /*onceonly*/1, fd, fname, sbuf.st_size,
4696                   m->mailbox ? m->mailbox->name : NULL);
4697     close(fd);
4698 
4699     return 0;
4700 }
4701 
4702 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
4703 
body_get_leaf_types(struct body * body,strarray_t * types)4704 static void body_get_leaf_types(struct body *body, strarray_t *types)
4705 {
4706     int i;
4707 
4708     if (strcmpsafe(body->type, "MULTIPART") &&
4709         strcmpsafe(body->type, "MESSAGE")) {
4710         strarray_append(types, body->type);
4711         strarray_append(types, body->subtype);
4712     }
4713 
4714     for (i = 0; i < body->numparts; i++) {
4715         body_get_leaf_types(&body->subpart[i], types);
4716     }
4717 }
4718 
body_foreach_section(struct body * body,struct message * message,int (* proc)(int isbody,charset_t charset,int encoding,const char * type,const char * subtype,const struct param * type_params,const char * disposition,const struct param * disposition_params,const struct message_guid * content_guid,const char * part,struct buf * data,void * rock),void * rock)4719 static int body_foreach_section(struct body *body, struct message *message,
4720                                 int (*proc)(int isbody, charset_t charset,
4721                                     int encoding,
4722                                     const char *type, const char *subtype,
4723                                     const struct param *type_params,
4724                                     const char *disposition,
4725                                     const struct param *disposition_params,
4726                                     const struct message_guid *content_guid,
4727                                     const char *part,
4728                                     struct buf *data, void *rock),
4729                                 void *rock)
4730 {
4731     struct buf data = BUF_INITIALIZER;
4732     int i, r;
4733 
4734     if (body->header_size) {
4735         struct body *tmpbody = NULL;
4736         const char *disposition = body->disposition;
4737         struct param *disposition_params = body->disposition_params;
4738 
4739         if (!disposition) {
4740             /* XXX hack: body can either be read from the binary cache body
4741              * or bodystructure, but either misses the contents of the other */
4742             tmpbody = xzmalloc(sizeof(struct body));
4743             strarray_t boundaries = STRARRAY_INITIALIZER;
4744             struct msg msg;
4745 
4746             msg.base = message->map.s + body->header_offset;
4747             msg.len = body->header_size;
4748             msg.offset = 0;
4749             msg.encode = 0;
4750             message_parse_headers(&msg, tmpbody, "text/plain", &boundaries, NULL);
4751 
4752             disposition = tmpbody->disposition;
4753             disposition_params = tmpbody->disposition_params;
4754         }
4755 
4756         buf_init_ro(&data, message->map.s + body->header_offset, body->header_size);
4757         r = proc(/*isbody*/0, CHARSET_UNKNOWN_CHARSET, 0, body->type, body->subtype,
4758                  body->params, disposition, disposition_params, &body->content_guid,
4759                  body->part_id, &data, rock);
4760         buf_free(&data);
4761 
4762         if (tmpbody) {
4763             message_free_body(tmpbody);
4764             free(tmpbody);
4765         }
4766 
4767         if (r) return r;
4768     }
4769 
4770     if (!strcmpsafe(body->type, "TEXT")) {
4771         int encoding;
4772         charset_t charset = CHARSET_UNKNOWN_CHARSET;
4773         message_parse_charset(body, &encoding, &charset);
4774         buf_init_ro(&data, message->map.s + body->content_offset, body->content_size);
4775         r = proc(/*isbody*/1, charset, encoding, body->type, body->subtype,
4776                  body->params, NULL, NULL, &body->content_guid, body->part_id,
4777                  &data, rock);
4778         buf_free(&data);
4779         charset_free(&charset);
4780         if (r) return r;
4781     } else {
4782         buf_init_ro(&data, message->map.s + body->content_offset, body->content_size);
4783         r = proc(/*isbody*/1, CHARSET_UNKNOWN_CHARSET, encoding_lookupname(body->encoding),
4784                  body->type, body->subtype, body->params, NULL, NULL,
4785                  &body->content_guid, body->part_id, &data, rock);
4786         buf_free(&data);
4787         if (r) return r;
4788     }
4789 
4790     for (i = 0; i < body->numparts; i++) {
4791         r = body_foreach_section(&body->subpart[i], message, proc, rock);
4792         if (r) return r;
4793     }
4794 
4795     return r;
4796 }
4797 
4798 
4799 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
4800 
4801 /*
4802  * Iterate 'proc' over all the MIME header sections and body sections of
4803  * type TEXT, in the message 'm', preorder.  The 'proc' is called with
4804  * 'partno' equal to zero for header sections, non-zero for body
4805  * sections.  If 'proc' returns non-zero, the iteration finishes early
4806  * and the return value of 'proc' is returned.  Otherwise returns 0.
4807  */
message_foreach_section(message_t * m,int (* proc)(int isbody,charset_t charset,int encoding,const char * type,const char * subtype,const struct param * type_params,const char * disposition,const struct param * disposition_params,const struct message_guid * content_guid,const char * part,struct buf * data,void * rock),void * rock)4808 EXPORTED int message_foreach_section(message_t *m,
4809                          int (*proc)(int isbody, charset_t charset, int encoding,
4810                                      const char *type, const char *subtype,
4811                                      const struct param *type_params,
4812                                      const char *disposition,
4813                                      const struct param *disposition_params,
4814                                      const struct message_guid *content_guid,
4815                                      const char *part,
4816                                      struct buf *data,
4817                                      void *rock),
4818                          void *rock)
4819 {
4820     int r = message_need(m, M_CACHEBODY|M_MAP);
4821     if (r) return r;
4822     return body_foreach_section(m->body, m, proc, rock);
4823 }
4824 
4825 /*
4826  * Get the MIME content types of all leaf sections, i.e. sections whose
4827  * type is not multipart or message.  Strings are added to the array in
4828  * pairs, type first then subtype.
4829  */
message_get_leaf_types(message_t * m,strarray_t * types)4830 EXPORTED int message_get_leaf_types(message_t *m, strarray_t *types)
4831 {
4832     int r = message_need(m, M_CACHEBODY);
4833     if (r) return r;
4834     body_get_leaf_types(m->body, types);
4835     return 0;
4836 }
4837 
4838 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
4839 
message_get_bcc(message_t * m,struct buf * buf)4840 EXPORTED int message_get_bcc(message_t *m, struct buf *buf)
4841 {
4842     return message_get_field(m, "bcc", MESSAGE_RAW, buf);
4843 }
4844 
message_get_deliveredto(message_t * m,struct buf * buf)4845 EXPORTED int message_get_deliveredto(message_t *m, struct buf *buf)
4846 {
4847     int r = message_get_field(m, "X-Original-Delivered-To", MESSAGE_RAW, buf);
4848     if (!r && buf_len(buf) == 0) {
4849         r = message_get_field(m, "X-Delivered-To", MESSAGE_RAW, buf);
4850     }
4851     return r;
4852 }
4853 
message_get_cc(message_t * m,struct buf * buf)4854 EXPORTED int message_get_cc(message_t *m, struct buf *buf)
4855 {
4856     return message_get_field(m, "cc", MESSAGE_RAW, buf);
4857 }
4858 
message_get_to(message_t * m,struct buf * buf)4859 EXPORTED int message_get_to(message_t *m, struct buf *buf)
4860 {
4861     return message_get_field(m, "to", MESSAGE_RAW, buf);
4862 }
4863 
message_get_from(message_t * m,struct buf * buf)4864 EXPORTED int message_get_from(message_t *m, struct buf *buf)
4865 {
4866     return message_get_field(m, "from", MESSAGE_RAW, buf);
4867 }
4868 
message_get_listid(message_t * m,struct buf * buf)4869 EXPORTED int message_get_listid(message_t *m, struct buf *buf)
4870 {
4871     return message_get_field(m, "list-id", MESSAGE_RAW, buf);
4872 }
4873 
message_get_messageid(message_t * m,struct buf * buf)4874 EXPORTED int message_get_messageid(message_t *m, struct buf *buf)
4875 {
4876     return message_get_field(m, "message-id", MESSAGE_RAW, buf);
4877 }
4878 
message_get_subject(message_t * m,struct buf * buf)4879 EXPORTED int message_get_subject(message_t *m, struct buf *buf)
4880 {
4881     return message_get_field(m, "subject", MESSAGE_RAW, buf);
4882 }
4883 
message_get_mailinglist(message_t * m,struct buf * buf)4884 EXPORTED int message_get_mailinglist(message_t *m, struct buf *buf)
4885 {
4886     return message_get_field(m, "mailing-list", MESSAGE_RAW, buf);
4887 }
4888 
message_get_priority(message_t * m,struct buf * buf)4889 EXPORTED int message_get_priority(message_t *m, struct buf *buf)
4890 {
4891     /* Only returns priority value "1" or none. */
4892     int r = message_get_field(m, "X-Priority", MESSAGE_RAW, buf);
4893     buf_trim(buf);
4894     if (!r && !strcmp(buf_cstring(buf), "1")) {
4895         return 0;
4896     }
4897     r = message_get_field(m, "Importance", MESSAGE_RAW, buf);
4898     buf_trim(buf);
4899     if (!r && !strcmp(buf_cstring(buf), "high")) {
4900         buf_setcstr(buf, "1");
4901         return 0;
4902     }
4903     buf_reset(buf);
4904     return r;
4905 }
4906 
msg_record(const message_t * m)4907 EXPORTED const struct index_record *msg_record(const message_t *m)
4908 {
4909     assert(!message_need(m, M_RECORD))
4910     return &m->record;
4911 }
4912 
msg_mailbox(const message_t * m)4913 EXPORTED struct mailbox *msg_mailbox(const message_t *m)
4914 {
4915     assert(!message_need(m, M_MAILBOX))
4916     return m->mailbox;
4917 }
4918 
message_get_size(message_t * m,uint32_t * sizep)4919 EXPORTED int message_get_size(message_t *m, uint32_t *sizep)
4920 {
4921     int r = message_need(m, M_RECORD);
4922     if (!r) {
4923         *sizep = m->record.size;
4924         return 0;
4925     }
4926     r = message_need(m, M_MAP);
4927     if (!r) {
4928         *sizep = buf_len(&m->map);
4929     }
4930     return r;
4931 }
4932 
msg_size(const message_t * m)4933 EXPORTED uint32_t msg_size(const message_t *m)
4934 {
4935     assert(!message_need(m, M_RECORD))
4936     return m->record.size;
4937 }
4938 
message_get_uid(message_t * m,uint32_t * uidp)4939 EXPORTED int message_get_uid(message_t *m, uint32_t *uidp)
4940 {
4941     int r = message_need(m, M_RECORD);
4942     if (r) return r;
4943     *uidp = m->record.uid;
4944     return 0;
4945 }
4946 
msg_uid(const message_t * m)4947 EXPORTED uint32_t msg_uid(const message_t *m)
4948 {
4949     assert(!message_need(m, M_RECORD))
4950     return m->record.uid;
4951 }
4952 
message_get_cid(message_t * m,conversation_id_t * cidp)4953 EXPORTED int message_get_cid(message_t *m, conversation_id_t *cidp)
4954 {
4955     int r = message_need(m, M_RECORD);
4956     if (r) return r;
4957     *cidp = m->record.cid;
4958     return 0;
4959 }
4960 
msg_cid(const message_t * m)4961 EXPORTED conversation_id_t msg_cid(const message_t *m)
4962 {
4963     assert(!message_need(m, M_RECORD))
4964     return m->record.cid;
4965 }
4966 
message_get_modseq(message_t * m,modseq_t * modseqp)4967 EXPORTED int message_get_modseq(message_t *m, modseq_t *modseqp)
4968 {
4969     int r = message_need(m, M_RECORD);
4970     if (r) return r;
4971     *modseqp = m->record.modseq;
4972     return 0;
4973 }
4974 
msg_modseq(const message_t * m)4975 EXPORTED modseq_t msg_modseq(const message_t *m)
4976 {
4977     assert(!message_need(m, M_RECORD))
4978     return m->record.modseq;
4979 }
4980 
message_get_msgno(message_t * m,uint32_t * msgnop)4981 EXPORTED int message_get_msgno(message_t *m, uint32_t *msgnop)
4982 {
4983     int r = message_need(m, M_INDEX);
4984     if (r) return r;
4985     *msgnop = m->msgno;
4986     return 0;
4987 }
4988 
msg_msgno(const message_t * m)4989 EXPORTED uint32_t msg_msgno(const message_t *m)
4990 {
4991     assert(!message_need(m, M_INDEX))
4992     return m->msgno;
4993 }
4994 
message_get_guid(message_t * m,const struct message_guid ** guidp)4995 EXPORTED int message_get_guid(message_t *m, const struct message_guid **guidp)
4996 {
4997     int r = message_need(m, M_RECORD);
4998     if (!r) {
4999         *guidp = &m->record.guid;
5000         return 0;
5001     }
5002     if (message_guid_isnull(&m->guid)) {
5003         r = message_need(m, M_MAP);
5004         if (r) return r;
5005         message_guid_generate(&m->guid, buf_base(&m->map), buf_len(&m->map));
5006     }
5007     *guidp = &m->guid;
5008     return 0;
5009 }
5010 
msg_guid(const message_t * m)5011 EXPORTED const struct message_guid *msg_guid(const message_t *m)
5012 {
5013     assert(!message_need(m, M_RECORD))
5014     return &m->record.guid;
5015 }
5016 
message_get_userflags(message_t * m,uint32_t * flagsp)5017 EXPORTED int message_get_userflags(message_t *m, uint32_t *flagsp)
5018 {
5019     int r = message_need(m, M_RECORD);
5020     int i;
5021     if (r) return r;
5022     for (i = 0; i < MAX_USER_FLAGS/32; i++)
5023         flagsp[i] = m->record.user_flags[i];
5024     return 0;
5025 }
5026 
message_get_systemflags(message_t * m,uint32_t * flagsp)5027 EXPORTED int message_get_systemflags(message_t *m, uint32_t *flagsp)
5028 {
5029     int r = message_need(m, M_RECORD);
5030     if (r) return r;
5031     *flagsp = m->record.system_flags;
5032     return 0;
5033 }
5034 
message_get_internalflags(message_t * m,uint32_t * flagsp)5035 EXPORTED int message_get_internalflags(message_t *m, uint32_t *flagsp)
5036 {
5037     int r = message_need(m, M_RECORD);
5038     if (r) return r;
5039     *flagsp = m->record.internal_flags;
5040     return 0;
5041 }
5042 
message_get_indexflags(message_t * m,uint32_t * flagsp)5043 EXPORTED int message_get_indexflags(message_t *m, uint32_t *flagsp)
5044 {
5045     int r = message_need(m, M_INDEX);
5046     if (r) return r;
5047     *flagsp = m->indexflags;
5048     return 0;
5049 }
5050 
message_get_savedate(message_t * m,time_t * datep)5051 EXPORTED int message_get_savedate(message_t *m, time_t *datep)
5052 {
5053     int r = message_need(m, M_RECORD);
5054     if (r) return r;
5055     *datep = m->record.savedate;
5056     if (!*datep) *datep = m->record.internaldate;
5057     return 0;
5058 }
5059 
message_get_indexversion(message_t * m,uint32_t * versionp)5060 EXPORTED int message_get_indexversion(message_t *m, uint32_t *versionp)
5061 {
5062     int r = message_need(m, M_MAILBOX);
5063     if (r) return r;
5064     *versionp = m->mailbox->i.minor_version;
5065     return 0;
5066 }
5067 
message_get_sentdate(message_t * m,time_t * datep)5068 EXPORTED int message_get_sentdate(message_t *m, time_t *datep)
5069 {
5070     int r = message_need(m, M_RECORD);
5071     if (r) return r;
5072     *datep = m->record.sentdate;
5073     return 0;
5074 }
5075 
message_get_gmtime(message_t * m,time_t * tp)5076 EXPORTED int message_get_gmtime(message_t *m, time_t *tp)
5077 {
5078     int r = message_need(m, M_RECORD);
5079     if (r) return r;
5080     *tp = m->record.gmtime;
5081     return 0;
5082 }
5083 
message_get_internaldate(message_t * m,time_t * datep)5084 EXPORTED int message_get_internaldate(message_t *m, time_t *datep)
5085 {
5086     int r = message_need(m, M_RECORD);
5087     if (r) return r;
5088     *datep = m->record.internaldate;
5089     return 0;
5090 }
5091 
message_get_fname(message_t * m,const char ** fnamep)5092 EXPORTED int message_get_fname(message_t *m, const char **fnamep)
5093 {
5094     int r = message_need(m, M_FILENAME);
5095     if (r) return r;
5096     *fnamep = m->filename;
5097     return 0;
5098 }
5099 
5100 /* XXX despite the name, this actually gives back ALL the values of the
5101  * XXX named header, unless flags contains MESSAGE_LAST
5102  */
extract_one(struct buf * buf,const char * name,int flags,int has_name,int isutf8,struct buf * raw)5103 static void extract_one(struct buf *buf,
5104                         const char *name,
5105                         int flags,
5106                         int has_name,
5107                         int isutf8,
5108                         struct buf *raw)
5109 {
5110     char *p = NULL;
5111 
5112     if (raw->len && (flags & MESSAGE_LAST)) {
5113         /* Skip all but the last header value */
5114         const char *q = raw->s;
5115         const char *last = raw->s;
5116         while ((p = strnchr(q, '\r', raw->s + raw->len - q))) {
5117             if (p >= raw->s + raw->len - 2)
5118                 break;
5119             if (*(p+1) == '\n' && *(p+2) && !isspace(*(p+2)))
5120                 last = p + 2;
5121             q = p + 1;
5122         }
5123         if (last != raw->s)
5124             buf_remove(raw, 0, last - raw->s);
5125         p = NULL;
5126     }
5127 
5128     if (has_name && !(flags & MESSAGE_FIELDNAME)) {
5129         /* remove the fieldname and colon */
5130         int pos = buf_findchar(raw, 0, ':');
5131         assert(pos > 0);
5132         buf_remove(raw, 0, pos+1);
5133     }
5134     else if (!has_name && (flags & MESSAGE_FIELDNAME)) {
5135         /* insert a fieldname and colon */
5136         buf_insertcstr(raw, 0, ":");
5137         buf_insertcstr(raw, 0, name);
5138     }
5139 
5140     switch (flags & _MESSAGE_FORMAT_MASK) {
5141     case MESSAGE_RAW:
5142         /* Logically, we're appending to the resulting buffer.
5143          * However if the buf is empty we can save a memory copy
5144          * by setting it up as a CoW buffer.  This means that
5145          * the caller will need to call buf_cstring() if they
5146          * need a C string. */
5147         if (!raw->alloc)
5148             buf_cowappendmap(buf, raw->s, raw->len);
5149         else
5150             buf_append(buf, raw);
5151         break;
5152     case MESSAGE_DECODED:
5153         /* XXX - this is also broken with utf8ness, but the only caller protects agains the fields
5154          * that could be utf8 (search_header) - so it doesn't matter */
5155         p = charset_parse_mimeheader(buf_cstring(raw), charset_flags);
5156         buf_appendcstr(buf, p);
5157         break;
5158     case MESSAGE_SNIPPET:
5159         if (isutf8) {
5160             charset_t utf8 = charset_lookupname("utf-8");
5161             p = charset_convert(buf_cstring(raw), utf8, charset_snippet_flags);
5162             charset_free(&utf8);
5163         }
5164         else {
5165             p = charset_decode_mimeheader(buf_cstring(raw), charset_snippet_flags);
5166         }
5167         buf_appendcstr(buf, p);
5168         break;
5169     case MESSAGE_SEARCH:
5170         /* TODO: need a variant of decode_mimeheader() which
5171          * takes two struct buf* and a search flag */
5172         if (isutf8) {
5173             charset_t utf8 = charset_lookupname("utf-8");
5174             p = charset_convert(buf_cstring(raw), utf8, charset_flags);
5175             charset_free(&utf8);
5176         }
5177         else {
5178             p = charset_decode_mimeheader(buf_cstring(raw), charset_flags);
5179         }
5180         buf_appendcstr(buf, p);
5181         break;
5182     }
5183 
5184     if (flags & MESSAGE_TRIM)
5185         buf_trim(buf);
5186 
5187     free(p);
5188 }
5189 
message_get_spamscore(message_t * m,uint32_t * valp)5190 EXPORTED int message_get_spamscore(message_t *m, uint32_t *valp)
5191 {
5192     struct buf buf = BUF_INITIALIZER;
5193     int r = message_get_field(m, "X-Spam-score", MESSAGE_RAW, &buf);
5194     *valp = r ? 0 : (int)((atof(buf_cstring(&buf)) * 100)  + 0.5);
5195     buf_free(&buf);
5196     return r;
5197 }
5198 
message_get_field(message_t * m,const char * hdr,int flags,struct buf * buf)5199 EXPORTED int message_get_field(message_t *m, const char *hdr, int flags, struct buf *buf)
5200 {
5201     strarray_t want = STRARRAY_INITIALIZER;
5202     struct buf raw = BUF_INITIALIZER;
5203     int hasname = 1;
5204     int isutf8 = 0;
5205 
5206     if (!strcasecmp(hdr, "rawheaders")) {
5207         int r = message_need(m, M_MAP|M_RECORD);
5208         if (r) return r;
5209         buf_setmap(buf, m->map.s, m->record.header_size);
5210         return 0;
5211     }
5212 
5213     if (!strcasecmp(hdr, "rawbody")) {
5214         int r = message_need(m, M_MAP|M_RECORD);
5215         if (r) return r;
5216         buf_setmap(buf, m->map.s + m->record.header_size, m->record.size - m->record.header_size);
5217         return 0;
5218     }
5219 
5220     if (!(flags & MESSAGE_APPEND))
5221         buf_reset(buf);
5222 
5223     /* Attempt to read field from the least-cost source available */
5224     int found_field = 0;
5225 
5226     /* the 5 standalone cache fields */
5227     if (!strcasecmp(hdr, "from")) {
5228         int r = message_need(m, M_CACHE);
5229         if (!r) {
5230             buf_setmap(&raw, cacheitem_base(&m->record, CACHE_FROM),
5231                     cacheitem_size(&m->record, CACHE_FROM));
5232             if (raw.len == 3 && raw.s[0] == 'N' && raw.s[1] == 'I' && raw.s[2] == 'L')
5233                 buf_reset(&raw);
5234             hasname = 0;
5235             isutf8 = 1;
5236             found_field = 1;
5237         } else if (r != IMAP_NOTFOUND) return r;
5238     }
5239     else if (!strcasecmp(hdr, "to")) {
5240         int r = message_need(m, M_CACHE);
5241         if (!r) {
5242             buf_setmap(&raw, cacheitem_base(&m->record, CACHE_TO),
5243                     cacheitem_size(&m->record, CACHE_TO));
5244             if (raw.len == 3 && raw.s[0] == 'N' && raw.s[1] == 'I' && raw.s[2] == 'L')
5245                 buf_reset(&raw);
5246             hasname = 0;
5247             isutf8 = 1;
5248             found_field = 1;
5249         } else if (r != IMAP_NOTFOUND) return r;
5250     }
5251     else if (!strcasecmp(hdr, "cc")) {
5252         int r = message_need(m, M_CACHE);
5253         if (!r) {
5254             buf_setmap(&raw, cacheitem_base(&m->record, CACHE_CC),
5255                     cacheitem_size(&m->record, CACHE_CC));
5256             if (raw.len == 3 && raw.s[0] == 'N' && raw.s[1] == 'I' && raw.s[2] == 'L')
5257                 buf_reset(&raw);
5258             hasname = 0;
5259             isutf8 = 1;
5260             found_field = 1;
5261         } else if (r != IMAP_NOTFOUND) return r;
5262     }
5263     else if (!strcasecmp(hdr, "bcc")) {
5264         int r = message_need(m, M_CACHE);
5265         if (!r) {
5266             buf_setmap(&raw, cacheitem_base(&m->record, CACHE_BCC),
5267                     cacheitem_size(&m->record, CACHE_BCC));
5268             if (raw.len == 3 && raw.s[0] == 'N' && raw.s[1] == 'I' && raw.s[2] == 'L')
5269                 buf_reset(&raw);
5270             hasname = 0;
5271             isutf8 = 1;
5272             found_field = 1;
5273         } else if (r != IMAP_NOTFOUND) return r;
5274     }
5275     else if (!strcasecmp(hdr, "subject")) {
5276         int r = message_need(m, M_CACHE);
5277         if (!r) {
5278             message1_get_subject(&m->record, &raw);
5279             hasname = 0;
5280             isutf8 = 1;
5281             found_field = 1;
5282         } else if (r != IMAP_NOTFOUND) return r;
5283     }
5284 
5285     /* message-id is from the envelope */
5286     else if (!strcasecmp(hdr, "message-id")) {
5287         char *envtokens[NUMENVTOKENS];
5288         char *c_env;
5289         int r = message_need(m, M_CACHE);
5290         if (!r) {
5291             c_env = xstrndup(cacheitem_base(&m->record, CACHE_ENVELOPE) + 1,
5292                     cacheitem_size(&m->record, CACHE_ENVELOPE) - 2);
5293             parse_cached_envelope(c_env, envtokens, NUMENVTOKENS);
5294             if (envtokens[ENV_MSGID])
5295                 buf_appendcstr(&raw, envtokens[ENV_MSGID]);
5296             free(c_env);
5297             if (raw.len == 3 && raw.s[0] == 'N' && raw.s[1] == 'I' && raw.s[2] == 'L')
5298                 buf_reset(&raw);
5299             hasname = 0;
5300             found_field = 1;
5301         } else if (r != IMAP_NOTFOUND) return r;
5302     }
5303     else {
5304         int r = message_need(m, M_RECORD);
5305         unsigned cache_version = mailbox_cached_header(hdr);
5306         if (!r && m->record.cache_version >= cache_version) {
5307             /* it's in the cache */
5308             char *headers = NULL;
5309             int r = message_need(m, M_CACHE);
5310             if (r) return r;
5311             headers = xstrndup(cacheitem_base(&m->record, CACHE_HEADERS),
5312                       cacheitem_size(&m->record, CACHE_HEADERS));
5313             strarray_append(&want, hdr);
5314             message_pruneheader(headers, &want, NULL);
5315             buf_appendcstr(&raw, headers);
5316             free(headers);
5317             hasname = 1;
5318             found_field = 1;
5319         } else if (r && r != IMAP_NOTFOUND) return r;
5320     }
5321 
5322     if (!found_field) {
5323         /* fall back to read field from raw headers */
5324         char *headers = NULL;
5325         int r = message_need(m, M_MAP|M_CACHEBODY);
5326         if (r) return r;
5327         headers = xstrndup(m->map.s + m->body->header_offset, m->body->header_size);
5328         strarray_append(&want, hdr);
5329         message_pruneheader(headers, &want, NULL);
5330         buf_appendcstr(&raw, headers);
5331         free(headers);
5332         hasname = 1;
5333         found_field = 1;
5334     }
5335 
5336     if (raw.len)
5337         extract_one(buf, hdr, flags, hasname, isutf8, &raw);
5338 
5339     buf_free(&raw);
5340     strarray_fini(&want);
5341 
5342     return 0;
5343 }
5344 
message_foreach_header(const char * headers,size_t len,int (* cb)(const char *,const char *,void *),void * rock)5345 EXPORTED int message_foreach_header(const char *headers, size_t len,
5346                                     int(*cb)(const char*, const char*, void*),
5347                                     void *rock)
5348 {
5349     struct buf key = BUF_INITIALIZER;
5350     struct buf val = BUF_INITIALIZER;
5351     const char *top = headers + len;
5352     const char *hdr = headers;
5353     int r = 0;
5354 
5355     while (hdr < top) {
5356         /* Look for colon separating header name from value */
5357         const char *p = memchr(hdr, ':', top - hdr);
5358         if (!p) {
5359             r = IMAP_INTERNAL;
5360             goto done;
5361         }
5362         buf_setmap(&key, hdr, p - hdr);
5363         p++;
5364         /* Extract raw header value, skipping over folding CRLF */
5365         const char *q = p;
5366         while (q < top && (q = memchr(q, '\n', top - q))) {
5367             if ((++q == top) || (*q != ' ' && *q != '\t'))
5368                 break;
5369         }
5370         if (!q) q = top;
5371         /* Chomp of trailing CRLF */
5372         buf_setmap(&val, p, q - p >= 2 ? q - p - 2 : 0);
5373         /* Call callback for header */
5374         r = cb(buf_cstring(&key), buf_cstring(&val), rock);
5375         if (r) break;
5376         /* Prepare next iteration */
5377         buf_reset(&key);
5378         buf_reset(&val);
5379         hdr = q;
5380     }
5381 
5382 done:
5383     buf_free(&key);
5384     buf_free(&val);
5385     return r;
5386 }
5387 
message_get_type(message_t * m,const char ** strp)5388 EXPORTED int message_get_type(message_t *m, const char **strp)
5389 {
5390     int r = message_need(m, M_CACHEBODY);
5391     if (r) return r;
5392     *strp = m->body->type;
5393     return 0;
5394 }
5395 
message_get_subtype(message_t * m,const char ** strp)5396 EXPORTED int message_get_subtype(message_t *m, const char **strp)
5397 {
5398     int r = message_need(m, M_CACHEBODY);
5399     if (r) return r;
5400     *strp = m->body->subtype;
5401     return 0;
5402 }
5403 
message_get_encoding(message_t * m,int * encp)5404 EXPORTED int message_get_encoding(message_t *m, int *encp)
5405 {
5406     int r = message_need(m, M_CACHEBODY);
5407     if (r) return r;
5408     *encp = m->body->charset_enc;
5409     return 0;
5410 }
5411 
message_get_charset_id(message_t * m,const char ** strp)5412 EXPORTED int message_get_charset_id(message_t *m, const char **strp)
5413 {
5414     int r = message_need(m, M_CACHEBODY);
5415     if (r) return r;
5416     *strp = m->body->charset_id;
5417     return 0;
5418 }
5419 
message_get_cachebody(message_t * m,const struct body ** bodyp)5420 EXPORTED int message_get_cachebody(message_t *m, const struct body **bodyp)
5421 {
5422     int r = message_need(m, M_CACHEBODY);
5423     if (r) return r;
5424     *bodyp = m->body;
5425     return 0;
5426 }
5427 
message_get_body(message_t * m,struct buf * buf)5428 EXPORTED int message_get_body(message_t *m, struct buf *buf)
5429 {
5430     return message_get_field(m, "rawbody", MESSAGE_RAW, buf);
5431 }
5432 
message_get_headers(message_t * m,struct buf * buf)5433 EXPORTED int message_get_headers(message_t *m, struct buf *buf)
5434 {
5435     return message_get_field(m, "rawheaders", MESSAGE_RAW, buf);
5436 }
5437