1 /* message.c -- Message manipulation/parsing
2 *
3 * Copyright (c) 1994-2008 Carnegie Mellon University. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. The name "Carnegie Mellon University" must not be used to
18 * endorse or promote products derived from this software without
19 * prior written permission. For permission or any legal
20 * details, please contact
21 * Carnegie Mellon University
22 * Center for Technology Transfer and Enterprise Creation
23 * 4615 Forbes Avenue
24 * Suite 302
25 * Pittsburgh, PA 15213
26 * (412) 268-7393, fax: (412) 268-7395
27 * innovation@andrew.cmu.edu
28 *
29 * 4. Redistributions of any form whatsoever must retain the following
30 * acknowledgment:
31 * "This product includes software developed by Computing Services
32 * at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33 *
34 * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36 * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41 */
42
43 #include <config.h>
44
45 #ifdef HAVE_UNISTD_H
46 #include <unistd.h>
47 #endif
48 #include <errno.h>
49 #include <stdio.h>
50 #include <ctype.h>
51 #include <string.h>
52 #include <sysexits.h>
53 #include <syslog.h>
54 #include <sys/types.h>
55 #include <sys/uio.h>
56 #include <sys/stat.h>
57 #include <netinet/in.h>
58 #include <stdlib.h>
59
60 #include "arrayu64.h"
61 #include "assert.h"
62 #include "crc32.h"
63 #include "dlist.h"
64 #include "prot.h"
65 #include "hash.h"
66 #include "map.h"
67 #include "mailbox.h"
68 #include "message.h"
69 #include "message_priv.h"
70 #include "message_guid.h"
71 #include "parseaddr.h"
72 #include "charset.h"
73 #include "stristr.h"
74 #include "user.h"
75 #include "util.h"
76 #include "xmalloc.h"
77 #include "xstrlcpy.h"
78 #include "strarray.h"
79 #include "ptrarray.h"
80 #include "global.h"
81 #include "retry.h"
82 #include "rfc822tok.h"
83 #include "times.h"
84 #include "xstrnchr.h"
85
86 /* generated headers are not necessarily in current directory */
87 #include "imap/imap_err.h"
88 #include "imap/rfc822_header.h"
89
90 static int message_map_file(message_t *m, const char *fname);
91 static int message_parse_cbodystructure(message_t *m);
92
93 #define DEBUG 0
94
95 /* Message being parsed */
96 struct msg {
97 const char *base;
98 unsigned long len;
99 unsigned long offset;
100 int encode;
101 };
102
103 #define MAX_FIELDNAME_LENGTH 256
104
105 /* Default MIME Content-type */
106 #define DEFAULT_CONTENT_TYPE "TEXT/PLAIN; CHARSET=us-ascii"
107
108 static int message_parse_body(struct msg *msg,
109 struct body *body,
110 const char *defaultContentType,
111 strarray_t *boundaries,
112 const char *efname);
113 static int message_parse_headers(struct msg *msg,
114 struct body *body,
115 const char *defaultContentType,
116 strarray_t *boundaries,
117 const char *efname);
118
119 static void message_parse_address(const char *hdr, struct address **addrp);
120 static void message_parse_encoding(const char *hdr, char **hdrp);
121 static void message_parse_charset(const struct body *body,
122 int *encoding, charset_t *charset);
123 static void message_parse_header(const char *hdr, struct buf *buf);
124 static void message_parse_bodytype(const char *hdr, struct body *body);
125 static void message_parse_bodydisposition(const char *hdr, struct body *body);
126 static void message_parse_params(const char *hdr, struct param **paramp);
127 static void message_fold_params(struct param **paramp);
128 static void message_parse_language(const char *hdr, struct param **paramp);
129 static void message_parse_rfc822space(const char **s);
130 static void message_parse_received_date(const char *hdr, char **hdrp);
131
132 static void message_parse_multipart(struct msg *msg,
133 struct body *body,
134 strarray_t *boundaries,
135 const char *efname);
136 static void message_parse_content(struct msg *msg,
137 struct body *body,
138 strarray_t *boundaries,
139 const char *efname);
140
141 static char *message_getline(struct buf *, struct msg *msg);
142 static int message_pendingboundary(const char *s, int slen, strarray_t *);
143
144 static void message_write_envelope(struct buf *buf, const struct body *body);
145 static void message_write_address(struct buf *buf,
146 const struct address *addrlist);
147 static void message_write_text_lcase(struct buf *buf, const char *s);
148 static void message_write_section(struct buf *buf, const struct body *body);
149 static void message_write_charset(struct buf *buf, const struct body *body);
150 static void message_write_searchaddr(struct buf *buf,
151 const struct address *addrlist);
152 static int message_need(const message_t *m, unsigned int need);
153 static void message_yield(message_t *m, unsigned int yield);
154
155 /*
156 * Convert a string to uppercase. Returns the string.
157 *
158 * This differs from the ucase() function in lib/util.c by using the
159 * libc tolower() instead of our hardcoded builtin lookup table.
160 * Whether this is a good thing is unclear, but that's what the old code
161 * did so I'm going to preserve it - gnb
162 */
message_ucase(char * s)163 static char *message_ucase(char *s)
164 {
165 char *p;
166
167 for (p = s ; *p ; p++)
168 if (Uislower(*p))
169 *p = toupper((int) *p);
170 return s;
171 }
172
173 /*
174 * Check a message 'from' of 'size' bytes for minimal RFC 822 compliance.
175 * The message is read from 'from'. If 'to' is not NULL, the message
176 * is copied to 'to', otherwise an in-memory buffer of 'from' is checked.
177 *
178 * Caller must have initialized config_* routines (with cyrus_init) to read
179 * imapd.conf before calling.
180 */
message_copy_strict(struct protstream * from,FILE * to,unsigned size,int allow_null)181 EXPORTED int message_copy_strict(struct protstream *from, FILE *to,
182 unsigned size, int allow_null)
183 {
184 char buf[4096+1];
185 unsigned char *p, *endp;
186 int r = 0;
187 size_t n;
188 int sawcr = 0, sawnl;
189 int reject8bit = config_getswitch(IMAPOPT_REJECT8BIT);
190 int munge8bit = config_getswitch(IMAPOPT_MUNGE8BIT);
191 int inheader = 1, blankline = 1;
192 struct buf tmp = BUF_INITIALIZER;
193
194 while (size) {
195 n = prot_read(from, buf, size > 4096 ? 4096 : size);
196 if (!n) {
197 syslog(LOG_ERR, "IOERROR: reading message: unexpected end of file");
198 return IMAP_IOERROR;
199 }
200
201 buf[n] = '\0';
202
203 /* Quick check for NUL in entire buffer, if we're not allowing it */
204 if (!allow_null && (n != strlen(buf))) {
205 r = IMAP_MESSAGE_CONTAINSNULL;
206 }
207
208 size -= n;
209 if (r) continue;
210
211 for (p = (unsigned char *)buf, endp = p + n; p < endp; p++) {
212 if (!*p && inheader) {
213 /* NUL in header is always bad */
214 r = IMAP_MESSAGE_CONTAINSNULL;
215 }
216 else if (*p == '\n') {
217 if (!sawcr && (inheader || !allow_null))
218 r = IMAP_MESSAGE_CONTAINSNL;
219 sawcr = 0;
220 if (blankline) {
221 inheader = 0;
222 }
223 blankline = 1;
224 }
225 else if (*p == '\r') {
226 sawcr = 1;
227 }
228 else {
229 sawcr = 0;
230 blankline = 0;
231 if (inheader && *p >= 0x80) {
232 if (reject8bit) {
233 /* We have been configured to reject all mail of this
234 form. */
235 if (!r) r = IMAP_MESSAGE_CONTAINS8BIT;
236 } else if (munge8bit) {
237 /* We have been configured to munge all mail of this
238 form. */
239 *p = 'X';
240 }
241 }
242 }
243 }
244
245 if (to)
246 fwrite(buf, 1, n, to);
247 else
248 buf_appendmap(&tmp, buf, n);
249 }
250
251 if (r) goto done;
252
253 if (to) {
254 fflush(to);
255 if (ferror(to) || fsync(fileno(to))) {
256 syslog(LOG_ERR, "IOERROR: writing message: %m");
257 r = IMAP_IOERROR;
258 goto done;
259 }
260 rewind(to);
261 }
262
263 /* Go back and check headers */
264 sawnl = 1;
265 const char *cur = buf_base(&tmp);
266 const char *top = buf_base(&tmp) + buf_len(&tmp);
267 for (;;) {
268 /* Read headers into buffer */
269 if (to) {
270 if (!fgets(buf, sizeof(buf), to)) {
271 r = sawnl ? 0 : IMAP_MESSAGE_BADHEADER;
272 goto done;
273 }
274 }
275 else {
276 if (cur >= top) {
277 r = sawnl ? 0 : IMAP_MESSAGE_BADHEADER;
278 goto done;
279 }
280 const char *q = strchr(cur, '\n');
281 if (q == NULL) {
282 q = cur + sizeof(buf);
283 if (q > top) q = top;
284 }
285 else {
286 q++;
287 }
288 if (q > cur + sizeof(buf) - 1) {
289 q = cur + sizeof(buf) - 1;
290 }
291 memcpy(buf, cur, q - cur);
292 buf[q-cur] = '\0';
293 cur = q;
294 }
295
296 /* End of header section */
297 if (sawnl && buf[0] == '\r') {
298 r = 0;
299 goto done;
300 }
301
302 /* Check for valid header name */
303 if (sawnl && buf[0] != ' ' && buf[0] != '\t') {
304 if (buf[0] == ':') {
305 r = IMAP_MESSAGE_BADHEADER;
306 goto done;
307 }
308 if (strstr(buf, "From ") != buf) {
309 for (p = (unsigned char *)buf; *p && *p != ':'; p++) {
310 if (*p <= ' ') {
311 r = IMAP_MESSAGE_BADHEADER;
312 goto done;
313 }
314 }
315 }
316 }
317
318 /* Used to be some 8bit checks here but those were moved above so that
319 we could do something other than refuse the message.
320 Unfortunately, we still need to look for the end of the string. */
321 for(p = (unsigned char*) buf; *p; p++);
322
323 sawnl = (p > (unsigned char *)buf) && (p[-1] == '\n');
324 }
325 done:
326 buf_free(&tmp);
327 return r;
328 }
329
message_parse(const char * fname,struct index_record * record)330 EXPORTED int message_parse(const char *fname, struct index_record *record)
331 {
332 struct body *body = NULL;
333 FILE *f;
334 int r;
335
336 f = fopen(fname, "r");
337 if (!f) return IMAP_IOERROR;
338
339 r = message_parse_file(f, NULL, NULL, &body, fname);
340 if (!r) r = message_create_record(record, body);
341
342 fclose(f);
343
344 if (body) {
345 message_free_body(body);
346 free(body);
347 }
348
349 return r;
350 }
351
352 /*
353 * Parse the message 'infile'.
354 *
355 * The caller MUST free the allocated body struct.
356 *
357 * If msg_base/msg_len are non-NULL, the file will remain memory-mapped
358 * and returned to the caller. The caller MUST unmap the file.
359 */
message_parse_file(FILE * infile,const char ** msg_base,size_t * msg_len,struct body ** body,const char * efname)360 EXPORTED int message_parse_file(FILE *infile,
361 const char **msg_base, size_t *msg_len,
362 struct body **body,
363 const char *efname)
364 {
365 int fd = fileno(infile);
366 struct stat sbuf;
367 const char *tmp_base;
368 size_t tmp_len;
369 int unmap = 0, r;
370
371 if (!msg_base) {
372 unmap = 1;
373 msg_base = &tmp_base;
374 msg_len = &tmp_len;
375 }
376 *msg_base = NULL;
377 *msg_len = 0;
378
379 if (fstat(fd, &sbuf) == -1) {
380 if (efname)
381 syslog(LOG_ERR, "IOERROR: fstat on new message in spool (%s): %m",
382 efname);
383 else
384 syslog(LOG_ERR, "IOERROR: fstat on new message in spool: %m");
385 fatal("can't fstat message file", EX_OSFILE);
386 }
387 map_refresh(fd, 1, msg_base, msg_len, sbuf.st_size,
388 "new message", 0);
389
390 if (!*msg_base || !*msg_len)
391 return IMAP_IOERROR; /* zero length file? */
392
393 if (!*body) *body = (struct body *) xzmalloc(sizeof(struct body));
394 r = message_parse_mapped(*msg_base, *msg_len, *body, efname);
395
396 if (unmap) map_free(msg_base, msg_len);
397
398 return r;
399 }
400
401 /*
402 * Parse the message 'infile'.
403 *
404 * The caller MUST free the allocated body struct.
405 *
406 * If msg_base/msg_len are non-NULL, the file will remain memory-mapped
407 * and returned to the caller. The caller MUST unmap the file.
408 */
message_parse_file_buf(FILE * infile,struct buf * buf,struct body ** body,const char * efname)409 EXPORTED int message_parse_file_buf(FILE *infile,
410 struct buf *buf,
411 struct body **body,
412 const char *efname)
413 {
414 int fd = fileno(infile);
415 struct stat sbuf;
416
417 // unmap or clear space
418 buf_free(buf);
419
420 if (fstat(fd, &sbuf) == -1) {
421 if (efname)
422 syslog(LOG_ERR, "IOERROR: fstat on new message in spool (%s): %m",
423 efname);
424 else
425 syslog(LOG_ERR, "IOERROR: fstat on new message in spool: %m");
426 fatal("can't fstat message file", EX_OSFILE);
427 }
428 buf_refresh_mmap(buf, 1, fd, efname, sbuf.st_size, "new message");
429
430 if (!*body) *body = (struct body *) xzmalloc(sizeof(struct body));
431 return message_parse_mapped(buf_base(buf), buf_len(buf), *body, efname);
432 }
433
434
435 /*
436 * Parse the message 'infile'.
437 *
438 * The caller MUST free the allocated body struct.
439 *
440 * This function differs from message_parse_file() in that we create a
441 * writable buffer rather than memory-mapping the file, so that binary
442 * data can be encoded into the buffer. The file is rewritten upon
443 * completion.
444 *
445 * XXX can we do this with mmap()?
446 */
message_parse_binary_file(FILE * infile,struct body ** body,const char * efname)447 EXPORTED int message_parse_binary_file(FILE *infile, struct body **body,
448 const char *efname)
449 {
450 int fd = fileno(infile);
451 struct stat sbuf;
452 struct msg msg;
453 size_t n;
454
455 if (fstat(fd, &sbuf) == -1) {
456 if (efname)
457 syslog(LOG_ERR, "IOERROR: fstat on new message in spool (%s): %m",
458 efname);
459 else
460 syslog(LOG_ERR, "IOERROR: fstat on new message in spool: %m");
461 fatal("can't fstat message file", EX_OSFILE);
462 }
463 msg.len = sbuf.st_size;
464 msg.base = xmalloc(msg.len);
465 msg.offset = 0;
466 msg.encode = 1;
467
468 lseek(fd, 0L, SEEK_SET);
469
470 n = retry_read(fd, (char*) msg.base, msg.len);
471 if (n != msg.len) {
472 if (efname)
473 syslog(LOG_ERR, "IOERROR: reading binary file in spool (%s): %m",
474 efname);
475 else
476 syslog(LOG_ERR, "IOERROR: reading binary file in spool: %m");
477 return IMAP_IOERROR;
478 }
479
480 if (!*body) *body = (struct body *) xzmalloc(sizeof(struct body));
481 message_parse_body(&msg, *body,
482 DEFAULT_CONTENT_TYPE, NULL, efname);
483
484 (*body)->filesize = msg.len;
485
486 message_guid_generate(&(*body)->guid, msg.base, msg.len);
487
488 lseek(fd, 0L, SEEK_SET);
489 n = retry_write(fd, msg.base, msg.len);
490
491 free((char*) msg.base);
492
493 if (n != msg.len || fsync(fd)) {
494 if (efname)
495 syslog(LOG_ERR, "IOERROR: rewriting binary file in spool (%s): %m",
496 efname);
497 else
498 syslog(LOG_ERR, "IOERROR: rewriting binary file in spool: %m");
499 return IMAP_IOERROR;
500 }
501
502 return 0;
503 }
504
505 /*
506 * Parse the message at 'msg_base' of length 'msg_len'.
507 */
message_parse_mapped(const char * msg_base,unsigned long msg_len,struct body * body,const char * efname)508 EXPORTED int message_parse_mapped(const char *msg_base, unsigned long msg_len,
509 struct body *body, const char *efname)
510 {
511 struct msg msg;
512
513 msg.base = msg_base;
514 msg.len = msg_len;
515 msg.offset = 0;
516 msg.encode = 0;
517
518 message_parse_body(&msg, body, DEFAULT_CONTENT_TYPE, NULL, efname);
519
520 body->filesize = msg_len;
521
522 message_guid_generate(&body->guid, msg_base, msg_len);
523
524 if (body->filesize != body->header_size + body->content_size) {
525 if (efname)
526 syslog(LOG_NOTICE, "IOERROR: size mismatch on parse %s (%s) (%d, %d)",
527 message_guid_encode(&body->guid), efname,
528 (int)body->filesize,
529 (int)(body->header_size + body->content_size));
530 else
531 syslog(LOG_NOTICE, "IOERROR: size mismatch on parse %s (%d, %d)",
532 message_guid_encode(&body->guid), (int)body->filesize,
533 (int)(body->header_size + body->content_size));
534 }
535
536 return 0;
537 }
538
539 /*
540 * Prune the header section in buf to include only those headers
541 * listed in headers or (if headers_not is non-empty) those headers
542 * not in headers_not.
543 */
message_pruneheader(char * buf,const strarray_t * headers,const strarray_t * headers_not)544 HIDDEN void message_pruneheader(char *buf, const strarray_t *headers,
545 const strarray_t *headers_not)
546 {
547 char *p, *colon, *nextheader;
548 int goodheader;
549 char *endlastgood = buf;
550 char **l;
551 int count = 0;
552 int maxlines = config_getint(IMAPOPT_MAXHEADERLINES);
553
554 p = buf;
555 while (*p && *p != '\r') {
556 colon = strchr(p, ':');
557 if (colon && headers_not && headers_not->count) {
558 goodheader = 1;
559 for (l = headers_not->data ; *l ; l++) {
560 if ((size_t) (colon - p) == strlen(*l) &&
561 !strncasecmp(p, *l, colon - p)) {
562 goodheader = 0;
563 break;
564 }
565 }
566 } else {
567 goodheader = 0;
568 }
569 if (colon && headers && headers->count) {
570 for (l = headers->data ; *l ; l++) {
571 if ((size_t) (colon - p) == strlen(*l) &&
572 !strncasecmp(p, *l, colon - p)) {
573 goodheader = 1;
574 break;
575 }
576 }
577 }
578
579 nextheader = p;
580 do {
581 nextheader = strchr(nextheader, '\n');
582 if (nextheader) nextheader++;
583 else nextheader = p + strlen(p);
584 } while (*nextheader == ' ' || *nextheader == '\t');
585
586 if (goodheader) {
587 if (endlastgood != p) {
588 /* memmove and not strcpy since this is all within a
589 * single buffer */
590 memmove(endlastgood, p, strlen(p) + 1);
591 nextheader -= p - endlastgood;
592 }
593 endlastgood = nextheader;
594 }
595 p = nextheader;
596
597 /* stop giant headers causing massive loops */
598 if (maxlines) {
599 count++;
600 if (count > maxlines) break;
601 }
602 }
603
604 *endlastgood = '\0';
605 }
606
message_find_part(struct body * body,const char * section,const char ** content_types,const char * msg_base,unsigned long msg_len,struct bodypart *** parts,int * n)607 static void message_find_part(struct body *body, const char *section,
608 const char **content_types,
609 const char *msg_base, unsigned long msg_len,
610 struct bodypart ***parts, int *n)
611 {
612 int match;
613 const char **type;
614 char nextsection[128];
615
616 for (match = 0, type = content_types; !match && *type; type++) {
617 const char *subtype = strchr(*type, '/');
618 size_t tlen = subtype ? (size_t) (subtype++ - *type) : strlen(*type);
619
620 if ((!(*type)[0] || (tlen == strlen(body->type) &&
621 !strncasecmp(body->type, *type, tlen))) &&
622 (!subtype || !subtype[0] || !strcasecmp(body->subtype, subtype))) {
623 match = 1;
624 }
625 }
626
627 if (match) {
628 /* matching part, sanity check the size against the mmap'd file */
629 if (body->content_offset + body->content_size > msg_len) {
630 syslog(LOG_ERR, "IOERROR: body part exceeds size of message file");
631 fatal("body part exceeds size of message file", EX_OSFILE);
632 }
633
634 if (!body->decoded_body) {
635 int encoding;
636 charset_t charset = CHARSET_UNKNOWN_CHARSET;
637 message_parse_charset(body, &encoding, &charset);
638 if (charset == CHARSET_UNKNOWN_CHARSET)
639 /* try ASCII */
640 charset = charset_lookupname("us-ascii");
641 body->decoded_body = charset_to_utf8(
642 msg_base + body->content_offset, body->content_size,
643 charset, encoding); /* returns a cstring */
644 charset_free(&charset);
645 }
646
647 /* grow the array and add the new part */
648 *parts = xrealloc(*parts, (*n+2)*sizeof(struct bodypart *));
649 (*parts)[*n] = xzmalloc(sizeof(struct bodypart));
650 strlcpy((*parts)[*n]->section, section, sizeof((*parts)[*n]->section));
651 (*parts)[*n]->decoded_body = body->decoded_body;
652 (*parts)[++(*n)] = NULL;
653 }
654 else if (!strcmp(body->type, "MULTIPART")) {
655 int i;
656
657 for (i = 0; i < body->numparts; i++) {
658 snprintf(nextsection, sizeof(nextsection), "%s.%d", section, i+1);
659 message_find_part(&body->subpart[i], nextsection, content_types,
660 msg_base, msg_len, parts, n);
661 }
662 }
663 else if (!strcmp(body->type, "MESSAGE") &&
664 !strcmp(body->subtype, "RFC822")) {
665 snprintf(nextsection, sizeof(nextsection), "%s.1", section);
666 message_find_part(body->subpart, nextsection, content_types,
667 msg_base, msg_len, parts, n);
668 }
669 }
670
671 /*
672 * Fetch the bodypart(s) which match the given content_type and return
673 * them as an allocated array.
674 *
675 * The caller MUST free the array of allocated bodypart(s).
676 */
message_fetch_part(struct message_content * msg,const char ** content_types,struct bodypart *** parts)677 EXPORTED void message_fetch_part(struct message_content *msg,
678 const char **content_types,
679 struct bodypart ***parts)
680 {
681 int n = 0; /* running count of the number of matching parts */
682
683 *parts = NULL;
684 message_find_part(msg->body, "1", content_types,
685 buf_base(&msg->map), buf_len(&msg->map), parts, &n);
686 }
687
688 /*
689 * Appends the message's cache information to the cache file
690 * and fills in appropriate information in the index record pointed to
691 * by 'record'.
692 */
message_create_record(struct index_record * record,const struct body * body)693 HIDDEN int message_create_record(struct index_record *record,
694 const struct body *body)
695 {
696 /* used for sent time searching, truncated to day with no TZ */
697 if (time_from_rfc5322(body->date, &record->sentdate, DATETIME_DATE_ONLY) < 0)
698 record->sentdate = 0;
699
700 /* used for sent time sorting, full gmtime of Date: header */
701 if (time_from_rfc5322(body->date, &record->gmtime, DATETIME_FULL) < 0)
702 record->gmtime = 0;
703
704 record->size = body->filesize;
705 record->header_size = body->header_size;
706 message_guid_copy(&record->guid, &body->guid);
707
708 message_write_cache(record, body);
709
710 return 0;
711 }
712
713 static enum rfc822_header
message_header_lookup(const char * buf,const char ** valp)714 message_header_lookup(const char *buf, const char **valp)
715 {
716 unsigned int len = strcspn(buf, ":\r\n");
717 if (buf[len] != ':')
718 return RFC822_BAD;
719 if (valp)
720 *valp = buf+len+1;
721 return rfc822_header_from_string_len(buf, len);
722 }
723
724
body_add_content_guid(const char * base,struct body * body)725 static void body_add_content_guid(const char *base, struct body *body)
726 {
727 int encoding = ENCODING_NONE;
728 char *decbuf = NULL;
729 charset_t cs = NULL;
730 size_t len = body->content_size;
731 message_parse_charset(body, &encoding, &cs);
732 base = charset_decode_mimebody(base, len, encoding, &decbuf, &len);
733 if (base) {
734 message_guid_generate(&body->content_guid, base, len);
735 body->decoded_content_size = len;
736 }
737 else {
738 message_guid_set_null(&body->content_guid);
739 body->decoded_content_size = 0;
740 }
741 charset_free(&cs);
742 free(decbuf);
743 }
744
745
746 /*
747 * Parse a body-part
748 */
message_parse_body(struct msg * msg,struct body * body,const char * defaultContentType,strarray_t * boundaries,const char * efname)749 static int message_parse_body(struct msg *msg, struct body *body,
750 const char *defaultContentType,
751 strarray_t *boundaries,
752 const char *efname)
753 {
754 strarray_t newboundaries = STRARRAY_INITIALIZER;
755 int sawboundary;
756
757 memset(body, 0, sizeof(struct body));
758
759 /* No passed-in boundary structure, create a new, empty one */
760 if (!boundaries) {
761 boundaries = &newboundaries;
762 /* We're at top-level--preallocate space to store cached headers */
763 buf_ensure(&body->cacheheaders, 1024);
764 }
765
766
767 sawboundary = message_parse_headers(msg, body, defaultContentType,
768 boundaries, efname);
769
770 /* Charset id and encoding id are stored in the binary
771 * bodystructure, but we don't have that one here. */
772 struct param *param = body->params;
773 while (param) {
774 if (!strcasecmp(param->attribute, "CHARSET")) {
775 body->charset_id = xstrdupnull(param->value);
776 break;
777 }
778 param = param->next;
779 }
780
781 body->charset_enc = encoding_lookupname(body->encoding);
782
783 /* Recurse according to type */
784 if (strcmp(body->type, "MULTIPART") == 0) {
785 if (!sawboundary) {
786 message_parse_multipart(msg, body, boundaries, efname);
787 }
788 }
789 else if (strcmp(body->type, "MESSAGE") == 0 &&
790 strcmp(body->subtype, "RFC822") == 0) {
791 const char *base = msg->base + msg->offset;
792 body->subpart = (struct body *)xzmalloc(sizeof(struct body));
793
794 if (sawboundary) {
795 memset(body->subpart, 0, sizeof(struct body));
796 message_parse_bodytype(DEFAULT_CONTENT_TYPE, body->subpart);
797 }
798 else {
799 message_parse_body(msg, body->subpart,
800 DEFAULT_CONTENT_TYPE, boundaries, efname);
801
802 /* Calculate our size/lines information */
803 body->content_size = body->subpart->header_size +
804 body->subpart->content_size;
805 body->content_lines = body->subpart->header_lines +
806 body->subpart->content_lines;
807
808 /* Move any enclosing boundary information up to our level */
809 body->boundary_size = body->subpart->boundary_size;
810 body->boundary_lines = body->subpart->boundary_lines;
811
812 /* it's nice to have a GUID for the message/rfc822 itself */
813 body_add_content_guid(base, body);
814 }
815 }
816 else {
817 if (!sawboundary) {
818 message_parse_content(msg, body, boundaries, efname);
819 }
820 }
821
822 /* Free up boundary storage if necessary */
823 strarray_fini(&newboundaries);
824
825 return 0;
826 }
827
828 /*
829 * Parse the headers of a body-part
830 */
message_parse_headers(struct msg * msg,struct body * body,const char * defaultContentType,strarray_t * boundaries,const char * efname)831 static int message_parse_headers(struct msg *msg, struct body *body,
832 const char *defaultContentType,
833 strarray_t *boundaries,
834 const char *efname)
835 {
836 struct buf headers = BUF_INITIALIZER;
837 char *next;
838 int len;
839 int sawboundary = 0;
840 uint32_t maxlines = config_getint(IMAPOPT_MAXHEADERLINES);
841 int have_max = 0;
842 const char *value;
843
844 body->header_offset = msg->offset;
845
846 buf_putc(&headers, '\n'); /* Leading newline to prime the pump */
847
848 /* Slurp up all of the headers into 'headers' */
849 while ((next = message_getline(&headers, msg)) &&
850 (next[-1] != '\n' ||
851 (*next != '\r' || next[1] != '\n'))) {
852
853 len = strlen(next);
854
855 if (next[-1] == '\n' && *next == '-' &&
856 message_pendingboundary(next, len, boundaries)) {
857 body->boundary_size = len;
858 body->boundary_lines++;
859 if (next - 1 > headers.s) {
860 body->boundary_size += 2;
861 body->boundary_lines++;
862 next[-2] = '\0';
863 }
864 else {
865 *next = '\0';
866 }
867 sawboundary = 1;
868 break;
869 }
870 }
871
872 body->content_offset = msg->offset;
873 body->header_size = strlen(headers.s+1);
874
875 /* Scan over the slurped-up headers for interesting header information */
876 body->header_lines = -1; /* Correct for leading newline */
877 for (next = headers.s; *next; next++) {
878 if (*next == '\n') {
879 body->header_lines++;
880
881 /* if we're skipping, skip now */
882 if (have_max) continue;
883
884 /* check if we've hit a limit and flag it */
885 if (maxlines && body->header_lines > maxlines) {
886 if (efname)
887 syslog(LOG_ERR, "ERROR: message (%s) has more than %d header lines "
888 "not caching any more",
889 efname, maxlines);
890 else
891 syslog(LOG_ERR, "ERROR: message has more than %d header lines "
892 "not caching any more",
893 maxlines);
894 have_max = 1;
895 continue;
896 }
897
898 if (/* space preallocated, i.e. must be top-level body */
899 body->cacheheaders.s &&
900 /* this is not a continuation line */
901 (next[1] != ' ') && (next[1] != '\t') &&
902 /* this header is supposed to be cached */
903 mailbox_cached_header_inline(next+1) != BIT32_MAX) {
904 /* append to the headers cache */
905 message_parse_header(next+1, &body->cacheheaders);
906 }
907
908 switch (message_header_lookup(next+1, &value)) {
909 case RFC822_BCC:
910 message_parse_address(value, &body->bcc);
911 break;
912 case RFC822_CC:
913 message_parse_address(value, &body->cc);
914 break;
915 case RFC822_CONTENT_DESCRIPTION:
916 message_parse_string(value, &body->description);
917 break;
918 case RFC822_CONTENT_DISPOSITION:
919 message_parse_bodydisposition(value, body);
920 break;
921 case RFC822_CONTENT_ID:
922 message_parse_string(value, &body->id);
923 break;
924 case RFC822_CONTENT_LANGUAGE:
925 message_parse_language(value, &body->language);
926 break;
927 case RFC822_CONTENT_LOCATION:
928 message_parse_string(value, &body->location);
929 break;
930 case RFC822_CONTENT_MD5:
931 message_parse_string(value, &body->md5);
932 break;
933 case RFC822_CONTENT_TRANSFER_ENCODING:
934 message_parse_encoding(value, &body->encoding);
935
936 /* If we're encoding binary, replace "binary"
937 with "base64" in CTE header body */
938 if (msg->encode &&
939 !strcmpsafe(body->encoding, "BINARY")) {
940 char *p = (char*)
941 stristr(msg->base + body->header_offset +
942 (next - headers.s) + 27,
943 "binary");
944 memcpy(p, "base64", 6);
945 }
946 break;
947 case RFC822_CONTENT_TYPE:
948 message_parse_bodytype(value, body);
949 break;
950 case RFC822_DATE:
951 message_parse_string(value, &body->date);
952 break;
953 case RFC822_FROM:
954 message_parse_address(value, &body->from);
955 break;
956 case RFC822_IN_REPLY_TO:
957 message_parse_string(value, &body->in_reply_to);
958 break;
959 case RFC822_MESSAGE_ID:
960 message_parse_string(value, &body->message_id);
961 break;
962 case RFC822_REPLY_TO:
963 message_parse_address(value, &body->reply_to);
964 break;
965 case RFC822_RECEIVED:
966 message_parse_received_date(value, &body->received_date);
967 break;
968 case RFC822_REFERENCES:
969 message_parse_string(value, &body->references);
970 break;
971 case RFC822_SUBJECT:
972 message_parse_string(value, &body->subject);
973 break;
974 case RFC822_SENDER:
975 message_parse_address(value, &body->sender);
976 break;
977 case RFC822_TO:
978 message_parse_address(value, &body->to);
979 break;
980 case RFC822_X_DELIVEREDINTERNALDATE:
981 /* Explicit x-deliveredinternaldate overrides received: headers */
982 message_parse_string(value, &body->x_deliveredinternaldate);
983 break;
984 case RFC822_X_ME_MESSAGE_ID:
985 message_parse_string(value, &body->x_me_message_id);
986 break;
987 default:
988 break;
989 } /* switch() */
990 } /* if (*next == '\n') */
991 }
992
993 /* If didn't find Content-Type: header, use the passed-in default type */
994 if (!body->type) {
995 message_parse_bodytype(defaultContentType, body);
996 }
997 buf_free(&headers);
998 return sawboundary;
999 }
1000
1001 /*
1002 * Parse a list of RFC 822 addresses from a header
1003 */
message_parse_address(const char * hdr,struct address ** addrp)1004 static void message_parse_address(const char *hdr, struct address **addrp)
1005 {
1006 char *hdrend, hdrendchar = '\0';
1007
1008 /* If we saw this header already, discard the earlier value */
1009 if (*addrp) {
1010 parseaddr_free(*addrp);
1011 *addrp = NULL;
1012 }
1013
1014 /* Find end of header */
1015 hdrend = (char *)hdr;
1016 do {
1017 hdrend = strchr(hdrend+1, '\n');
1018 } while (hdrend && (hdrend[1] == ' ' || hdrend[1] == '\t'));
1019
1020 /* Put a NUL character at the end of header */
1021 /* gnb:TODO this is evil and should be stopped */
1022 if (hdrend) {
1023 if (hdrend > hdr && hdrend[-1] == '\r') hdrend--;
1024 hdrendchar = *hdrend;
1025 *hdrend = '\0';
1026 }
1027
1028 parseaddr_list(hdr, addrp);
1029
1030 /* Put character at end of header back */
1031 if (hdrend) *hdrend = hdrendchar;
1032 }
1033
1034 /*
1035 * Parse a Content-Transfer-Encoding from a header.
1036 */
message_parse_encoding(const char * hdr,char ** hdrp)1037 static void message_parse_encoding(const char *hdr, char **hdrp)
1038 {
1039 int len;
1040 const char *p;
1041
1042 /* If we saw this header already, discard the earlier value */
1043 if (*hdrp) {
1044 free(*hdrp);
1045 *hdrp = NULL;
1046 }
1047
1048 /* Skip leading whitespace, ignore header if blank */
1049 message_parse_rfc822space(&hdr);
1050 if (!hdr) return;
1051
1052 /* Find end of encoding token */
1053 for (p = hdr; *p && !Uisspace(*p) && *p != '('; p++) {
1054 if (*p < ' ' || strchr(MIME_TSPECIALS, *p)) return;
1055 }
1056 len = p - hdr;
1057
1058 /* Skip trailing whitespace, ignore header if trailing garbage */
1059 message_parse_rfc822space(&p);
1060 if (p) return;
1061
1062 /* Save encoding token */
1063 *hdrp = message_ucase(xstrndup(hdr, len));
1064 }
1065
1066 /*
1067 * parse a charset and encoding out of a body structure
1068 */
message_parse_charset(const struct body * body,int * e_ptr,charset_t * c_ptr)1069 static void message_parse_charset(const struct body *body,
1070 int *e_ptr, charset_t *c_ptr)
1071 {
1072
1073 int encoding = ENCODING_NONE;
1074 charset_t charset = charset_lookupname("us-ascii");
1075 struct param *param;
1076
1077
1078 if (body->encoding) {
1079 switch (body->encoding[0]) {
1080 case '7':
1081 case '8':
1082 if (!strcmp(body->encoding+1, "BIT"))
1083 encoding = ENCODING_NONE;
1084 else
1085 encoding = ENCODING_UNKNOWN;
1086 break;
1087
1088 case 'B':
1089 if (!strcmp(body->encoding, "BASE64"))
1090 encoding = ENCODING_BASE64;
1091 else if (!strcmp(body->encoding, "BINARY"))
1092 encoding = ENCODING_NONE;
1093 else
1094 encoding = ENCODING_UNKNOWN;
1095 break;
1096
1097 case 'Q':
1098 if (!strcmp(body->encoding, "QUOTED-PRINTABLE"))
1099 encoding = ENCODING_QP;
1100 else
1101 encoding = ENCODING_UNKNOWN;
1102 break;
1103
1104 default:
1105 encoding = ENCODING_UNKNOWN;
1106 }
1107 }
1108
1109 if (!body->type || !strcmp(body->type, "TEXT")) {
1110 for (param = body->params; param; param = param->next) {
1111 if (!strcasecmp(param->attribute, "charset")) {
1112 if (param->value && *param->value) {
1113 charset_free(&charset);
1114 charset = charset_lookupname(param->value);
1115 if (charset == CHARSET_UNKNOWN_CHARSET)
1116 syslog(LOG_NOTICE, "message_parse_charset: unknown charset %s for text/%s", param->value, body->subtype);
1117 }
1118 break;
1119 }
1120 }
1121 }
1122 else if (!strcmp(body->type, "MESSAGE")) {
1123 if (!strcmp(body->subtype, "RFC822")) {
1124 charset_free(&charset);
1125 charset = CHARSET_UNKNOWN_CHARSET;
1126 }
1127 encoding = ENCODING_NONE;
1128 }
1129 else {
1130 charset_free(&charset);
1131 charset = CHARSET_UNKNOWN_CHARSET;
1132 }
1133
1134 if (e_ptr) *e_ptr = encoding;
1135 if (c_ptr) *c_ptr = charset;
1136 else charset_free(&charset);
1137 }
1138
1139 /*
1140 * Parse an uninterpreted header
1141 */
message_parse_string(const char * hdr,char ** hdrp)1142 EXPORTED void message_parse_string(const char *hdr, char **hdrp)
1143 {
1144 const char *hdrend;
1145 char *he;
1146
1147 /* If we saw this header already, discard the earlier value */
1148 if (*hdrp) {
1149 free(*hdrp);
1150 *hdrp = NULL;
1151 }
1152
1153 /* Skip initial whitespace */
1154 while (*hdr == ' ' || *hdr == '\t') hdr++;
1155
1156 /* Find end of header */
1157 hdrend = hdr;
1158 do {
1159 hdrend = strchr(hdrend+1, '\n');
1160 } while (hdrend && (hdrend[1] == ' ' || hdrend[1] == '\t'));
1161 if (hdrend) {
1162 if (hdrend > hdr && hdrend[-1] == '\r') hdrend--;
1163 }
1164 else {
1165 hdrend = hdr + strlen(hdr);
1166 }
1167
1168 /* Save header value */
1169 *hdrp = xstrndup(hdr, (hdrend - hdr));
1170
1171 /* Un-fold header (overlapping buffers, use memmove) */
1172 he = *hdrp;
1173 while ((he = strchr(he, '\n'))!=NULL) {
1174 if (he > *hdrp && he[-1] == '\r') {
1175 he--;
1176 memmove(he, he+2, strlen(he+2)+1);
1177 }
1178 else {
1179 memmove(he, he+1, strlen(he+1)+1);
1180 }
1181 }
1182 }
1183
1184 /*
1185 * Cache a header
1186 */
1187 static void
message_parse_header(const char * hdr,struct buf * buf)1188 message_parse_header(const char *hdr, struct buf *buf)
1189 {
1190 int len;
1191 const char *hdrend;
1192
1193 /* Find end of header */
1194 hdrend = hdr;
1195 do {
1196 hdrend = strchr(hdrend+1, '\n');
1197 } while (hdrend && (hdrend[1] == ' ' || hdrend[1] == '\t'));
1198 if (hdrend) {
1199 if (hdrend > hdr && hdrend[-1] == '\r') hdrend--;
1200 }
1201 else {
1202 hdrend = hdr + strlen(hdr);
1203 }
1204
1205 /* Save header value */
1206 len = hdrend - hdr;
1207 buf_appendmap(buf, hdr, len);
1208 buf_putc(buf, '\r');
1209 buf_putc(buf, '\n');
1210 }
1211
1212 /*
1213 * Parse a Content-Type from a header.
1214 */
message_parse_type(const char * hdr,char ** typep,char ** subtypep,struct param ** paramp)1215 EXPORTED void message_parse_type(const char *hdr, char **typep, char **subtypep, struct param **paramp)
1216 {
1217 const char *type;
1218 int typelen;
1219 const char *subtype;
1220 int subtypelen;
1221 char *decbuf = NULL;
1222
1223 /* Skip leading whitespace, ignore header if blank */
1224 message_parse_rfc822space(&hdr);
1225 if (!hdr) return;
1226
1227 /* Very old versions of macOS Mail.app encode the Content-Type header
1228 * in MIME words, if the attachment name contains non-ASCII characters */
1229 if (strlen(hdr) > 2 && hdr[0] == '=' && hdr[1] == '?') {
1230 int flags = CHARSET_KEEPCASE;
1231 decbuf = charset_decode_mimeheader(hdr, flags);
1232 if (strcmpsafe(decbuf, hdr)) hdr = decbuf;
1233 }
1234
1235 /* Find end of type token */
1236 type = hdr;
1237 for (; *hdr && !Uisspace(*hdr) && *hdr != '/' && *hdr != '('; hdr++) {
1238 if (*hdr < ' ' || strchr(MIME_TSPECIALS, *hdr)) goto done;
1239 }
1240 typelen = hdr - type;
1241
1242 /* Skip whitespace after type */
1243 message_parse_rfc822space(&hdr);
1244 if (!hdr) goto done;
1245
1246 /* Ignore header if no '/' character */
1247 if (*hdr++ != '/') goto done;
1248
1249 /* Skip whitespace before subtype, ignore header if no subtype */
1250 message_parse_rfc822space(&hdr);
1251 if (!hdr) return;
1252
1253 /* Find end of subtype token */
1254 subtype = hdr;
1255 for (; *hdr && !Uisspace(*hdr) && *hdr != ';' && *hdr != '('; hdr++) {
1256 if (*hdr < ' ' || strchr(MIME_TSPECIALS, *hdr)) goto done;
1257 }
1258 subtypelen = hdr - subtype;
1259
1260 /* Skip whitespace after subtype */
1261 message_parse_rfc822space(&hdr);
1262
1263 /* Ignore header if not at end of header or parameter delimiter */
1264 if (hdr && *hdr != ';') goto done;
1265
1266 /* Save content type & subtype */
1267 *typep = message_ucase(xstrndup(type, typelen));
1268 *subtypep = message_ucase(xstrndup(subtype, subtypelen));
1269
1270 /* Parse parameter list */
1271 if (hdr) {
1272 message_parse_params(hdr+1, paramp);
1273 message_fold_params(paramp);
1274 if (decbuf && paramp && *paramp) {
1275 /* The type header was erroneously encoded as a RFC 2407 encoded word
1276 * (rather than encoding its attributes), and the parameter values
1277 * might now contain non-ASCII characters. Let's reencode them. */
1278 struct param *param = *paramp;
1279 for (; param; param = param->next) {
1280 const char *attr = param->attribute;
1281 /* Skip extended parameters */
1282 size_t attrlen = strlen(attr);
1283 if (!attrlen || attr[attrlen-1] == '*') continue;
1284 /* Check if the parameter value has non-ASCII characters */
1285 int has_highbit = 0;
1286 const char *val = param->value;
1287 for (val = param->value; *val && !has_highbit; val++) {
1288 has_highbit = *val & 0x80;
1289 }
1290 if (!has_highbit) continue;
1291 /* Reencode the parameter value */
1292 char *encvalue = charset_encode_mimeheader(param->value, strlen(param->value), 0);
1293 if (encvalue) {
1294 free(param->value);
1295 param->value = encvalue;
1296 }
1297 }
1298 }
1299 }
1300
1301 done:
1302 free(decbuf);
1303 }
1304
message_parse_bodytype(const char * hdr,struct body * body)1305 static void message_parse_bodytype(const char *hdr, struct body *body)
1306 {
1307 /* If we saw this header already, discard the earlier value */
1308 if (body->type) {
1309 free(body->type);
1310 free(body->subtype);
1311 body->type = body->subtype = NULL;
1312 param_free(&body->params);
1313 }
1314
1315 message_parse_type(hdr, &body->type, &body->subtype, &body->params);
1316 }
1317
1318 /*
1319 * Parse a Content-Disposition from a header.
1320 */
message_parse_disposition(const char * hdr,char ** hdrp,struct param ** paramp)1321 EXPORTED void message_parse_disposition(const char *hdr, char **hdrp, struct param **paramp)
1322 {
1323 const char *disposition;
1324 int dispositionlen;
1325
1326 /* Skip leading whitespace, ignore header if blank */
1327 message_parse_rfc822space(&hdr);
1328 if (!hdr) return;
1329
1330 /* Find end of disposition token */
1331 disposition = hdr;
1332 for (; *hdr && !Uisspace(*hdr) && *hdr != ';' && *hdr != '('; hdr++) {
1333 if (*hdr < ' ' || strchr(MIME_TSPECIALS, *hdr)) return;
1334 }
1335 dispositionlen = hdr - disposition;
1336
1337 /* Skip whitespace after type */
1338 message_parse_rfc822space(&hdr);
1339
1340 /* Ignore header if not at end of header or parameter delimiter */
1341 if (hdr && *hdr != ';') return;
1342
1343 /* Save content disposition */
1344 *hdrp = message_ucase(xstrndup(disposition, dispositionlen));
1345
1346 /* Parse parameter list */
1347 if (hdr) {
1348 message_parse_params(hdr+1, paramp);
1349 message_fold_params(paramp);
1350 }
1351 }
1352
1353 /*
1354 * Parse a Content-Disposition from a header.
1355 */
message_parse_bodydisposition(const char * hdr,struct body * body)1356 static void message_parse_bodydisposition(const char *hdr, struct body *body)
1357 {
1358 /* If we saw this header already, discard the earlier value */
1359 if (body->disposition) {
1360 free(body->disposition);
1361 body->disposition = NULL;
1362 param_free(&body->disposition_params);
1363 }
1364
1365 message_parse_disposition(hdr, &body->disposition, &body->disposition_params);
1366 }
1367
1368 /*
1369 * Parse a parameter list from a header.
1370 *
1371 * 'hdr' points into the message, and is not expected to
1372 * be nul-terminated. Handles continuation headers.
1373 *
1374 * Malformed parameters are handled by skipping to the
1375 * next ';' or end of line, which should mark the next
1376 * parameter.
1377 */
message_parse_params(const char * hdr,struct param ** paramp)1378 static void message_parse_params(const char *hdr, struct param **paramp)
1379 {
1380 struct param *param;
1381 const char *attribute;
1382 int attributelen;
1383 const char *value;
1384 int valuelen;
1385 char *p;
1386
1387 for (;;) {
1388 /* Skip over leading whitespace */
1389 message_parse_rfc822space(&hdr);
1390 if (!hdr) return;
1391
1392 /* Find end of attribute */
1393 attribute = hdr;
1394 for (; *hdr && !Uisspace(*hdr) && *hdr != '=' && *hdr != '('; hdr++) {
1395 if (*hdr < ' ' || strchr(MIME_TSPECIALS, *hdr)) goto skip;
1396 }
1397 attributelen = hdr - attribute;
1398
1399 /* Skip whitespace after attribute */
1400 message_parse_rfc822space(&hdr);
1401 if (!hdr) return;
1402
1403 /* Ignore param if no '=' character */
1404 if (*hdr++ != '=') goto skip;
1405
1406 /* Skip whitespace before value */
1407 message_parse_rfc822space(&hdr);
1408 if (!hdr) return;
1409
1410 /* Find end of value */
1411 value = hdr;
1412 if (*hdr == '\"') {
1413 /* Parse quoted-string */
1414 hdr++;
1415 while (*hdr && *hdr != '\"') {
1416 if (*hdr == '\\') {
1417 hdr++;
1418 if (!*hdr) return;
1419 }
1420 if (*hdr == '\r') {
1421 /* check for continuation headers */
1422 if (hdr[1] == '\n' && (hdr[2] == ' ' || hdr[2] == '\t')) hdr += 2;
1423 else return; /* end of header field */
1424 }
1425 hdr++;
1426 }
1427 if (!*hdr++) return;
1428 }
1429 else {
1430 /* Parse token (leniently allow space and tspecials) */
1431 const char *endval = hdr;
1432 while (*hdr && *hdr != ';' && *hdr != '(') {
1433 if (*hdr == '\r') {
1434 /* Skip FWS and stop at CRLF */
1435 if (hdr[1] == '\n' && (hdr[2] == ' ' || hdr[2] == '\t')) {
1436 hdr += 2;
1437 continue;
1438 }
1439 else break;
1440 }
1441 if (*hdr & 0x80) {
1442 /* Allow unencoded non-ASCII characters */
1443 /* XXX We should probably make sure this is valid UTF-8 */
1444 }
1445 else if (*hdr < ' ' && *hdr != '\t') {
1446 /* Reject control characters */
1447 goto skip;
1448 }
1449 if (*hdr != ' ' && *hdr != '\t') {
1450 /* Keep last non-WSP position */
1451 endval = hdr;
1452 }
1453 hdr++;
1454 }
1455 /* Right-strip white space */
1456 hdr = endval + 1;
1457 }
1458 valuelen = hdr - value;
1459
1460 /* Skip whitespace after value */
1461 message_parse_rfc822space(&hdr);
1462
1463 /* Ignore parameter if not at end of header or parameter delimiter */
1464 if (hdr && *hdr++ != ';') {
1465 skip:
1466 hdr += strcspn(hdr, ";\r\n");
1467 if (*hdr == ';') hdr++;
1468 continue;
1469 }
1470
1471 /* Save attribute/value pair */
1472 *paramp = param = (struct param *)xzmalloc(sizeof(struct param));
1473 param->attribute = message_ucase(xstrndup(attribute, attributelen));
1474 param->value = xzmalloc(valuelen + 1); /* xzmalloc for trailing NUL */
1475 if (*value == '\"') {
1476 p = param->value;
1477 value++;
1478 while (*value != '\"') {
1479 if (*value == '\\') value++;
1480 else if (*value == '\r') value += 2;
1481 *p++ = *value++;
1482 }
1483 *p = '\0';
1484 }
1485 else {
1486 memcpy(param->value, value, valuelen);
1487 }
1488
1489 /* Get ready to parse the next parameter */
1490 paramp = ¶m->next;
1491 }
1492 }
1493
1494 /*
1495 * Decode RFC 2231 parameter continuations
1496 *
1497 * Algorithm: Run down the list of parameters looking for
1498 * an attribute of the form "foo*0" or "foo*0*". When we find
1499 * such an attribute, we look for "foo*1"/"foo*1*", "foo*2"/"foo*2*"
1500 * etc, appending each value to that of "foo*0" and then removing the
1501 * parameter we just appended from the list. When appending values,
1502 * if either parameter has extended syntax, we have to convert the other
1503 * value from simple to extended syntax. At the end, we change the name
1504 * of "foo*0"/"foo*0*" to either "foo" or "foo*", depending on whether
1505 * the value has extended syntax or not.
1506 */
message_fold_params(struct param ** params)1507 static void message_fold_params(struct param **params)
1508 {
1509 struct param *thisparam; /* The "foo*1" param we're folding */
1510 struct param **continuation; /* Pointer to the "foo*2" param */
1511 struct param *tmpparam; /* Placeholder for removing "foo*2" */
1512 char *asterisk;
1513 int section;
1514 int is_extended;
1515 char sectionbuf[5];
1516 int attributelen, sectionbuflen;
1517 char *from, *to;
1518
1519 for (thisparam = *params; thisparam; thisparam = thisparam->next) {
1520 asterisk = strchr(thisparam->attribute, '*');
1521 if (asterisk && asterisk[1] == '0' &&
1522 (!asterisk[2] || (asterisk[2] == '*' && !asterisk[3]))) {
1523 /* An initial section. Find and collect the rest */
1524 is_extended = (asterisk[2] == '*');
1525 *asterisk = '\0';
1526 attributelen = asterisk - thisparam->attribute;
1527 section = 1;
1528 for (;;) {
1529 if (section == 100) break;
1530 sectionbuf[0] = '*';
1531 if (section > 9) {
1532 sectionbuf[1] = section/10 + '0';
1533 sectionbuf[2] = section%10 + '0';
1534 sectionbuf[3] = '\0';
1535 sectionbuflen = 3;
1536 }
1537 else {
1538 sectionbuf[1] = section + '0';
1539 sectionbuf[2] = '\0';
1540 sectionbuflen = 2;
1541 }
1542
1543 /* Find the next continuation */
1544 for (continuation = params; *continuation;
1545 continuation = &((*continuation)->next)) {
1546 if (!strncmp((*continuation)->attribute, thisparam->attribute,
1547 attributelen) &&
1548 !strncmp((*continuation)->attribute + attributelen,
1549 sectionbuf, sectionbuflen) &&
1550 ((*continuation)->attribute[attributelen+sectionbuflen] == '\0' ||
1551 ((*continuation)->attribute[attributelen+sectionbuflen] == '*' && (*continuation)->attribute[attributelen+sectionbuflen+1] == '\0'))) {
1552 break;
1553 }
1554 }
1555
1556 /* No more continuations to find */
1557 if (!*continuation) break;
1558
1559 if ((*continuation)->attribute[attributelen+sectionbuflen] == '\0') {
1560 /* Continuation is simple */
1561 if (is_extended) {
1562 /* Have to re-encode continuation value */
1563 thisparam->value =
1564 xrealloc(thisparam->value,
1565 strlen(thisparam->value) +
1566 3*strlen((*continuation)->value) + 1);
1567 from = (*continuation)->value;
1568 to = thisparam->value + strlen(thisparam->value);
1569 while (*from) {
1570 if (*from <= ' ' || *from >= 0x7f ||
1571 *from == '*' || *from == '\'' ||
1572 *from == '%' || strchr(MIME_TSPECIALS, *from)) {
1573 *to++ = '%';
1574 to += bin_to_hex(from, 1, to, BH_UPPER);
1575 } else {
1576 *to++ = *from;
1577 }
1578 from++;
1579 }
1580 *to++ = '\0';
1581 }
1582 else {
1583 thisparam->value =
1584 xrealloc(thisparam->value,
1585 strlen(thisparam->value) +
1586 strlen((*continuation)->value) + 1);
1587 from = (*continuation)->value;
1588 to = thisparam->value + strlen(thisparam->value);
1589 while ((*to++ = *from++)!= 0)
1590 { }
1591 }
1592 }
1593 else {
1594 /* Continuation is extended */
1595 if (is_extended) {
1596 thisparam->value =
1597 xrealloc(thisparam->value,
1598 strlen(thisparam->value) +
1599 strlen((*continuation)->value) + 1);
1600 from = (*continuation)->value;
1601 to = thisparam->value + strlen(thisparam->value);
1602 while ((*to++ = *from++) != 0)
1603 { }
1604 }
1605 else {
1606 /* Have to re-encode thisparam value */
1607 char *tmpvalue =
1608 xmalloc(2 + 3*strlen(thisparam->value) +
1609 strlen((*continuation)->value) + 1);
1610
1611 from = thisparam->value;
1612 to = tmpvalue;
1613 *to++ = '\''; /* Unspecified charset */
1614 *to++ = '\''; /* Unspecified language */
1615 while (*from) {
1616 if (*from <= ' ' || *from >= 0x7f ||
1617 *from == '*' || *from == '\'' ||
1618 *from == '%' || strchr(MIME_TSPECIALS, *from)) {
1619 *to++ = '%';
1620 to += bin_to_hex(from, 1, to, BH_UPPER);
1621 } else {
1622 *to++ = *from;
1623 }
1624 from++;
1625 }
1626 from = (*continuation)->value;
1627
1628 while ((*to++ = *from++)!=0)
1629 { }
1630
1631 free(thisparam->value);
1632 thisparam->value = tmpvalue;
1633 is_extended = 1;
1634 }
1635 }
1636
1637 /* Remove unneeded continuation */
1638 free((*continuation)->attribute);
1639 free((*continuation)->value);
1640 tmpparam = *continuation;
1641 *continuation = (*continuation)->next;
1642 free(tmpparam);
1643 section++;
1644 }
1645
1646 /* Fix up attribute name */
1647 if (is_extended) {
1648 asterisk[0] = '*';
1649 asterisk[1] = '\0';
1650 } else {
1651 asterisk[0] = '\0';
1652 }
1653 }
1654 }
1655 }
1656
1657
1658 /*
1659 * Parse a language list from a header
1660 */
message_parse_language(const char * hdr,struct param ** paramp)1661 static void message_parse_language(const char *hdr, struct param **paramp)
1662 {
1663 struct param *param;
1664 const char *value;
1665 int valuelen;
1666
1667 /* If we saw this header already, discard the earlier value */
1668 if (*paramp) param_free(paramp);
1669
1670 for (;;) {
1671 /* Skip over leading whitespace */
1672 message_parse_rfc822space(&hdr);
1673 if (!hdr) return;
1674
1675 /* Skip whitespace before value */
1676 message_parse_rfc822space(&hdr);
1677 if (!hdr) return;
1678
1679 /* Find end of value */
1680 value = hdr;
1681 for (; *hdr && !Uisspace(*hdr) && *hdr != ',' && *hdr != '('; hdr++) {
1682 if (*hdr != '-' && !Uisalpha((*hdr))) return;
1683 }
1684 valuelen = hdr - value;
1685
1686 /* Skip whitespace after value */
1687 message_parse_rfc822space(&hdr);
1688
1689 /* Ignore parameter if not at end of header or language delimiter */
1690 if (hdr && *hdr++ != ',') return;
1691
1692 /* Save value pair */
1693 *paramp = param = (struct param *)xzmalloc(sizeof(struct param));
1694 param->value = message_ucase(xstrndup(value, valuelen));
1695
1696 /* Get ready to parse the next parameter */
1697 paramp = ¶m->next;
1698 }
1699 }
1700
1701 /*
1702 * Skip over RFC 822 whitespace and comments
1703 */
message_parse_rfc822space(const char ** s)1704 static void message_parse_rfc822space(const char **s)
1705 {
1706 const char *p = *s;
1707 int commentlevel = 0;
1708
1709 if (!p) return;
1710 while (*p && (Uisspace(*p) || *p == '(')) {
1711 if (*p == '\n') {
1712 p++;
1713 if (*p != ' ' && *p != '\t') {
1714 *s = 0; /* end of header field, no continuation */
1715 return;
1716 }
1717 }
1718 else if (*p == '(') {
1719 p++;
1720 commentlevel++;
1721 while (commentlevel) {
1722 switch (*p) {
1723 case '\n':
1724 p++;
1725 if (*p == ' ' || *p == '\t') break;
1726 /* FALL THROUGH */
1727 case '\0':
1728 *s = 0;
1729 return;
1730
1731 case '\\':
1732 p++;
1733 break;
1734
1735 case '(':
1736 commentlevel++;
1737 break;
1738
1739 case ')':
1740 commentlevel--;
1741 break;
1742 }
1743 p++;
1744 }
1745 }
1746 else p++;
1747 }
1748 if (*p == 0) {
1749 *s = 0; /* embedded NUL */
1750 }
1751 else {
1752 *s = p;
1753 }
1754 }
1755
1756 /*
1757 * Parse the content of a MIME multipart body-part
1758 */
message_parse_multipart(struct msg * msg,struct body * body,strarray_t * boundaries,const char * efname)1759 static void message_parse_multipart(struct msg *msg, struct body *body,
1760 strarray_t *boundaries, const char *efname)
1761 {
1762 struct body preamble, epilogue;
1763 struct param *boundary;
1764 const char *defaultContentType = DEFAULT_CONTENT_TYPE;
1765 int i, depth;
1766 int limit = config_getint(IMAPOPT_BOUNDARY_LIMIT);
1767
1768 memset(&preamble, 0, sizeof(struct body));
1769 memset(&epilogue, 0, sizeof(struct body));
1770 if (strcmp(body->subtype, "DIGEST") == 0) {
1771 defaultContentType = "MESSAGE/RFC822";
1772 }
1773
1774 /* Find boundary id */
1775 boundary = body->params;
1776 while (boundary &&
1777 strcmp(boundary->attribute, "BOUNDARY") != 0 &&
1778 strcmp(boundary->attribute, "BOUNDARY*") != 0) {
1779 boundary = boundary->next;
1780 }
1781
1782 if (!boundary) {
1783 /* Invalid MIME--treat as zero-part multipart */
1784 message_parse_content(msg, body, boundaries, efname);
1785 return;
1786 }
1787
1788 /* Add the new boundary id */
1789 char *id = NULL;
1790 if (boundary->attribute[8] == '*') {
1791 /* Decode boundary id */
1792 id = charset_parse_mimexvalue(boundary->value, NULL);
1793 }
1794 if (!id) id = xstrdup(boundary->value);
1795 strarray_appendm(boundaries, id);
1796 depth = boundaries->count;
1797
1798 /* Parse preamble */
1799 message_parse_content(msg, &preamble, boundaries, efname);
1800
1801 /* Parse the component body-parts */
1802 while (boundaries->count == depth &&
1803 (limit == 0 ? 1 : boundaries->count < limit)) {
1804 body->subpart = (struct body *)xrealloc((char *)body->subpart,
1805 (body->numparts+1)*sizeof(struct body));
1806 message_parse_body(msg, &body->subpart[body->numparts],
1807 defaultContentType, boundaries, efname);
1808 if (msg->offset == msg->len &&
1809 body->subpart[body->numparts].boundary_size == 0) {
1810 /* hit the end of the message, therefore end all pending
1811 multiparts */
1812 strarray_truncate(boundaries, 0);
1813 }
1814 body->numparts++;
1815 }
1816
1817 if (boundaries->count == depth-1) {
1818 /* Parse epilogue */
1819 message_parse_content(msg, &epilogue, boundaries, efname);
1820 }
1821 else if (body->numparts) {
1822 /*
1823 * We hit the boundary of an enclosing multipart while parsing
1824 * a component body-part. Move the enclosing boundary information
1825 * up to our level.
1826 */
1827 body->boundary_size = body->subpart[body->numparts-1].boundary_size;
1828 body->boundary_lines = body->subpart[body->numparts-1].boundary_lines;
1829 body->subpart[body->numparts-1].boundary_size = 0;
1830 body->subpart[body->numparts-1].boundary_lines = 0;
1831 }
1832 else {
1833 /*
1834 * We hit the boundary of an enclosing multipart while parsing
1835 * the preamble. Move the enclosing boundary information
1836 * up to our level.
1837 */
1838 body->boundary_size = preamble.boundary_size;
1839 body->boundary_lines = preamble.boundary_lines;
1840 preamble.boundary_size = 0;
1841 preamble.boundary_lines = 0;
1842 }
1843
1844 /*
1845 * Calculate our size/lines information
1846 */
1847 body->content_size = preamble.content_size + preamble.boundary_size;
1848 body->content_lines = preamble.content_lines + preamble.boundary_lines;
1849 for (i=0; i< body->numparts; i++) {
1850 body->content_size += body->subpart[i].header_size +
1851 body->subpart[i].content_size +
1852 body->subpart[i].boundary_size;
1853 body->content_lines += body->subpart[i].header_lines +
1854 body->subpart[i].content_lines +
1855 body->subpart[i].boundary_lines;
1856 }
1857 body->content_size += epilogue.content_size;
1858 body->content_lines += epilogue.content_lines;
1859
1860 /*
1861 * Move any enclosing boundary information up to our level.
1862 */
1863 body->boundary_size += epilogue.boundary_size;
1864 body->boundary_lines += epilogue.boundary_lines;
1865
1866 /* check if we've hit a limit and flag it */
1867 if (limit && depth == limit) {
1868 if (efname)
1869 syslog(LOG_ERR, "ERROR: mime boundary limit %i exceeded, "
1870 "not parsing anymore (%s)",
1871 limit, efname);
1872 else
1873 syslog(LOG_ERR, "ERROR: mime boundary limit %i exceeded, "
1874 "not parsing anymore",
1875 limit);
1876 }
1877 }
1878
1879 /*
1880 * Parse the content of a generic body-part
1881 */
message_parse_content(struct msg * msg,struct body * body,strarray_t * boundaries,const char * efname)1882 static void message_parse_content(struct msg *msg, struct body *body,
1883 strarray_t *boundaries,
1884 const char *efname __attribute__((unused)))
1885 {
1886 const char *line, *endline;
1887 unsigned long s_offset = msg->offset;
1888 int encode;
1889 int len;
1890
1891 /* Should we encode a binary part? */
1892 encode = msg->encode &&
1893 body->encoding && !strcasecmp(body->encoding, "binary");
1894
1895 while (msg->offset < msg->len) {
1896 line = msg->base + msg->offset;
1897 endline = memchr(line, '\n', msg->len - msg->offset);
1898 if (endline) {
1899 endline++;
1900 }
1901 else {
1902 endline = msg->base + msg->len;
1903 }
1904 len = endline - line;
1905 msg->offset += len;
1906
1907 if (line[0] == '-' && line[1] == '-' &&
1908 message_pendingboundary(line, len, boundaries)) {
1909 body->boundary_size = len;
1910 body->boundary_lines++;
1911 if (body->content_lines) {
1912 body->content_lines--;
1913 body->boundary_lines++;
1914 }
1915 if (body->content_size > 1) {
1916 body->content_size -= 2;
1917 body->boundary_size += 2;
1918 }
1919 break;
1920 }
1921
1922 body->content_size += len;
1923
1924 /* Count the content lines, unless we're encoding
1925 (we always count blank lines) */
1926 if (endline[-1] == '\n' &&
1927 (!encode || line[0] == '\r')) {
1928 body->content_lines++;
1929 }
1930 }
1931
1932 if (encode) {
1933 size_t b64_size;
1934 int b64_lines, delta;
1935
1936 /* Determine encoded size */
1937 charset_encode_mimebody(NULL, body->content_size, NULL,
1938 &b64_size, NULL, 1 /* wrap */);
1939
1940 delta = b64_size - body->content_size;
1941
1942 /* Realloc buffer to accomodate encoding overhead */
1943 msg->base = xrealloc((char*) msg->base, msg->len + delta);
1944
1945 /* Shift content and remaining data by delta */
1946 memmove((char*) msg->base + s_offset + delta, msg->base + s_offset,
1947 msg->len - s_offset);
1948
1949 /* Encode content into buffer at current position */
1950 charset_encode_mimebody(msg->base + s_offset + delta,
1951 body->content_size,
1952 (char*) msg->base + s_offset,
1953 NULL, &b64_lines, 1 /* wrap */);
1954
1955 /* Adjust buffer position and length to account for encoding */
1956 msg->offset += delta;
1957 msg->len += delta;
1958
1959 /* Adjust body structure to account for encoding */
1960 free(body->encoding);
1961 body->encoding = xstrdup("BASE64");
1962 body->content_size = b64_size;
1963 body->content_lines += b64_lines;
1964 }
1965
1966 body_add_content_guid(msg->base + s_offset, body);
1967 }
1968
message_parse_received_date(const char * hdr,char ** hdrp)1969 static void message_parse_received_date(const char *hdr, char **hdrp)
1970 {
1971 char *curp, *hdrbuf = 0;
1972
1973 /* Ignore if we already saw one of these headers.
1974 * We want the date from the first Received header we see.
1975 */
1976 if (*hdrp) return;
1977
1978 /* Copy header to temp buffer */
1979 message_parse_string(hdr, &hdrbuf);
1980
1981 /* From rfc2822, 3.6.7
1982 * received = "Received:" name-val-list ";" date-time CRLF
1983 * So scan backwards for ; and assume everything after is a date.
1984 * Failed parsing will return 0, and we'll use time() elsewhere
1985 * instead anyway
1986 */
1987 curp = hdrbuf + strlen(hdrbuf) - 1;
1988 while (curp > hdrbuf && *curp != ';')
1989 curp--;
1990
1991 /* Didn't find ; - fill in hdrp so we don't look at next received header */
1992 if (curp == hdrbuf) {
1993 *hdrp = hdrbuf;
1994 return;
1995 }
1996
1997 /* Found it, copy out date string part */
1998 curp++;
1999 message_parse_string(curp, hdrp);
2000 free(hdrbuf);
2001 }
2002
2003
2004 /*
2005 * Read a line from @msg into @buf. Returns a pointer to the start of
2006 * the line inside @buf, or NULL at the end of @msg.
2007 */
message_getline(struct buf * buf,struct msg * msg)2008 static char *message_getline(struct buf *buf, struct msg *msg)
2009 {
2010 unsigned int oldlen = buf_len(buf);
2011 int c;
2012
2013 while (msg->offset < msg->len) {
2014 c = msg->base[msg->offset++];
2015 buf_putc(buf, c);
2016 if (c == '\n')
2017 break;
2018 }
2019 buf_cstring(buf);
2020
2021 if (buf_len(buf) == oldlen)
2022 return 0;
2023 return buf->s + oldlen;
2024 }
2025
2026
2027 /*
2028 * Return nonzero if s is an enclosing boundary delimiter.
2029 * If we hit a terminating boundary, the integer pointed to by
2030 * 'boundaryct' is modified appropriately.
2031 */
message_pendingboundary(const char * s,int slen,strarray_t * boundaries)2032 static int message_pendingboundary(const char *s, int slen,
2033 strarray_t *boundaries)
2034 {
2035 int i, len;
2036 int rfc2046_strict = config_getswitch(IMAPOPT_RFC2046_STRICT);
2037 const char *bbase;
2038 int blen;
2039
2040 /* skip initial '--' */
2041 if (slen < 2) return 0;
2042 if (s[0] != '-' || s[1] != '-') return 0;
2043 bbase = s + 2;
2044 blen = slen - 2;
2045
2046 for (i = 0; i < boundaries->count ; ++i) {
2047 len = strlen(boundaries->data[i]);
2048 /* basic sanity check and overflow protection */
2049 if (blen < len) continue;
2050
2051 if (!strncmp(bbase, boundaries->data[i], len)) {
2052 /* trailing '--', it's the end of this part */
2053 if (blen >= len+2 && bbase[len] == '-' && bbase[len+1] == '-')
2054 strarray_truncate(boundaries, i);
2055 else if (!rfc2046_strict && blen > len+1 &&
2056 bbase[len] && !Uisspace(bbase[len])) {
2057 /* Allow substring matches in the boundary.
2058 *
2059 * If rfc2046_strict is enabled, boundaries containing
2060 * other boundaries as substrings will be treated as identical
2061 * (per RFC 2046 section 5.1.1). Note that this will
2062 * break some messages created by Eudora 5.1 (and earlier).
2063 */
2064 continue;
2065 }
2066 return 1;
2067 }
2068 }
2069 return 0;
2070 }
2071
2072
2073 /*
2074 * Write the cache information for the message parsed to 'body'
2075 * to 'outfile'.
2076 */
message_write_cache(struct index_record * record,const struct body * body)2077 EXPORTED int message_write_cache(struct index_record *record, const struct body *body)
2078 {
2079 static struct buf cacheitem_buffer;
2080 struct buf ib[NUM_CACHE_FIELDS];
2081 struct body toplevel;
2082 char *subject;
2083 int i;
2084
2085 /* initialise data structures */
2086 buf_reset(&cacheitem_buffer);
2087 memset(ib, 0, sizeof(ib));
2088
2089 toplevel.type = "MESSAGE";
2090 toplevel.subtype = "RFC822";
2091 /* we cast away const because we know that we're only using
2092 * toplevel.subpart as const in message_write_section(). */
2093 toplevel.subpart = (struct body *)body;
2094
2095 subject = charset_parse_mimeheader(body->subject, charset_flags);
2096
2097 /* copy into bufs */
2098 message_write_envelope(&ib[CACHE_ENVELOPE], body);
2099 message_write_body(&ib[CACHE_BODYSTRUCTURE], body, 1);
2100 buf_copy(&ib[CACHE_HEADERS], &body->cacheheaders);
2101 message_write_body(&ib[CACHE_BODY], body, 0);
2102 message_write_section(&ib[CACHE_SECTION], &toplevel);
2103 message_write_searchaddr(&ib[CACHE_FROM], body->from);
2104 message_write_searchaddr(&ib[CACHE_TO], body->to);
2105 message_write_searchaddr(&ib[CACHE_CC], body->cc);
2106 message_write_searchaddr(&ib[CACHE_BCC], body->bcc);
2107 message_write_nstring(&ib[CACHE_SUBJECT], subject);
2108
2109 free(subject);
2110
2111 /* append the records to the buffer */
2112 for (i = 0; i < NUM_CACHE_FIELDS; i++) {
2113 record->crec.item[i].len = buf_len(&ib[i]);
2114 record->crec.item[i].offset = buf_len(&cacheitem_buffer) + sizeof(uint32_t);
2115 message_write_xdrstring(&cacheitem_buffer, &ib[i]);
2116 buf_free(&ib[i]);
2117 }
2118
2119 /* copy the fields into the message */
2120 record->cache_offset = 0; /* calculate on write! */
2121 record->cache_version = MAILBOX_CACHE_MINOR_VERSION;
2122 record->cache_crc = crc32_buf(&cacheitem_buffer);
2123 record->crec.buf = &cacheitem_buffer;
2124 record->crec.offset = 0; /* we're at the start of the buffer */
2125 record->crec.len = buf_len(&cacheitem_buffer);
2126
2127 return 0;
2128 }
2129
2130
2131 /*
2132 * Write the IMAP envelope for 'body' to 'buf'
2133 */
message_write_envelope(struct buf * buf,const struct body * body)2134 static void message_write_envelope(struct buf *buf, const struct body *body)
2135 {
2136 buf_putc(buf, '(');
2137 message_write_nstring(buf, body->date);
2138 buf_putc(buf, ' ');
2139 message_write_nstring(buf, body->subject);
2140 buf_putc(buf, ' ');
2141 message_write_address(buf, body->from);
2142 buf_putc(buf, ' ');
2143 message_write_address(buf, body->sender ? body->sender : body->from);
2144 buf_putc(buf, ' ');
2145 message_write_address(buf, body->reply_to ? body->reply_to : body->from);
2146 buf_putc(buf, ' ');
2147 message_write_address(buf, body->to);
2148 buf_putc(buf, ' ');
2149 message_write_address(buf, body->cc);
2150 buf_putc(buf, ' ');
2151 message_write_address(buf, body->bcc);
2152 buf_putc(buf, ' ');
2153 message_write_nstring(buf, body->in_reply_to);
2154 buf_putc(buf, ' ');
2155 message_write_nstring(buf, body->message_id);
2156 buf_putc(buf, ')');
2157 }
2158
2159 /*
2160 * Write the BODY (if 'newformat' is zero) or BODYSTRUCTURE
2161 * (if 'newformat' is nonzero) for 'body' to 'buf'.
2162 */
message_write_body(struct buf * buf,const struct body * body,int newformat)2163 EXPORTED void message_write_body(struct buf *buf, const struct body *body,
2164 int newformat)
2165 {
2166 struct param *param;
2167
2168 if (strcmp(body->type, "MULTIPART") == 0) {
2169 int i;
2170
2171 /* 0-part multiparts are illegal--convert to 0-len text parts */
2172 if (body->numparts == 0) {
2173 static struct body zerotextbody;
2174
2175 if (!zerotextbody.type) {
2176 message_parse_bodytype(DEFAULT_CONTENT_TYPE, &zerotextbody);
2177 }
2178 message_write_body(buf, &zerotextbody, newformat);
2179 return;
2180 }
2181
2182 /* Multipart types get a body_multipart */
2183 buf_putc(buf, '(');
2184 for (i = 0; i < body->numparts; i++) {
2185 message_write_body(buf, &body->subpart[i], newformat);
2186 }
2187 buf_putc(buf, ' ');
2188 message_write_nstring(buf, body->subtype);
2189
2190 if (newformat) {
2191 buf_putc(buf, ' ');
2192 if ((param = body->params)!=NULL) {
2193 buf_putc(buf, '(');
2194 while (param) {
2195 message_write_nstring(buf, param->attribute);
2196 buf_putc(buf, ' ');
2197 message_write_nstring(buf, param->value);
2198 if ((param = param->next)!=NULL) {
2199 buf_putc(buf, ' ');
2200 }
2201 }
2202 buf_putc(buf, ')');
2203 }
2204 else message_write_nstring(buf, (char *)0);
2205 buf_putc(buf, ' ');
2206 if (body->disposition) {
2207 buf_putc(buf, '(');
2208 message_write_nstring(buf, body->disposition);
2209 buf_putc(buf, ' ');
2210 if ((param = body->disposition_params)!=NULL) {
2211 buf_putc(buf, '(');
2212 while (param) {
2213 message_write_nstring(buf, param->attribute);
2214 buf_putc(buf, ' ');
2215 message_write_nstring(buf, param->value);
2216 if ((param = param->next)!=NULL) {
2217 buf_putc(buf, ' ');
2218 }
2219 }
2220 buf_putc(buf, ')');
2221 }
2222 else message_write_nstring(buf, (char *)0);
2223 buf_putc(buf, ')');
2224 }
2225 else {
2226 message_write_nstring(buf, (char *)0);
2227 }
2228 buf_putc(buf, ' ');
2229 if ((param = body->language)!=NULL) {
2230 buf_putc(buf, '(');
2231 while (param) {
2232 message_write_nstring(buf, param->value);
2233 if ((param = param->next)!=NULL) {
2234 buf_putc(buf, ' ');
2235 }
2236 }
2237 buf_putc(buf, ')');
2238 }
2239 else message_write_nstring(buf, (char *)0);
2240 buf_putc(buf, ' ');
2241 message_write_nstring(buf, body->location);
2242 }
2243
2244 buf_putc(buf, ')');
2245 return;
2246 }
2247
2248 buf_putc(buf, '(');
2249 message_write_nstring(buf, body->type);
2250 buf_putc(buf, ' ');
2251 message_write_nstring(buf, body->subtype);
2252 buf_putc(buf, ' ');
2253
2254 if ((param = body->params)!=NULL) {
2255 buf_putc(buf, '(');
2256 while (param) {
2257 message_write_nstring(buf, param->attribute);
2258 buf_putc(buf, ' ');
2259 message_write_nstring(buf, param->value);
2260 if ((param = param->next)!=NULL) {
2261 buf_putc(buf, ' ');
2262 }
2263 }
2264 buf_putc(buf, ')');
2265 }
2266 else message_write_nstring(buf, (char *)0);
2267 buf_putc(buf, ' ');
2268
2269 message_write_nstring(buf, body->id);
2270 buf_putc(buf, ' ');
2271 message_write_nstring(buf, body->description);
2272 buf_putc(buf, ' ');
2273 message_write_nstring(buf, body->encoding ? body->encoding : "7BIT");
2274 buf_putc(buf, ' ');
2275 buf_printf(buf, "%u", body->content_size);
2276
2277 if (strcmp(body->type, "TEXT") == 0) {
2278 /* Text types get a line count */
2279 buf_putc(buf, ' ');
2280 buf_printf(buf, "%u", body->content_lines);
2281 }
2282 else if (strcmp(body->type, "MESSAGE") == 0
2283 && strcmp(body->subtype, "RFC822") == 0) {
2284 /* Message/rfc822 gets a body_msg */
2285 buf_putc(buf, ' ');
2286 message_write_envelope(buf, body->subpart);
2287 buf_putc(buf, ' ');
2288 message_write_body(buf, body->subpart, newformat);
2289 buf_putc(buf, ' ');
2290 buf_printf(buf, "%u", body->content_lines);
2291 }
2292
2293 if (newformat) {
2294 /* Add additional fields for BODYSTRUCTURE */
2295 buf_putc(buf, ' ');
2296 message_write_nstring(buf, body->md5);
2297 buf_putc(buf, ' ');
2298 if (body->disposition) {
2299 buf_putc(buf, '(');
2300 message_write_nstring(buf, body->disposition);
2301 buf_putc(buf, ' ');
2302 if ((param = body->disposition_params)!=NULL) {
2303 buf_putc(buf, '(');
2304 while (param) {
2305 message_write_nstring(buf, param->attribute);
2306 buf_putc(buf, ' ');
2307 message_write_nstring(buf, param->value);
2308 if ((param = param->next)!=NULL) {
2309 buf_putc(buf, ' ');
2310 }
2311 }
2312 buf_putc(buf, ')');
2313 }
2314 else message_write_nstring(buf, (char *)0);
2315 buf_putc(buf, ')');
2316 }
2317 else {
2318 message_write_nstring(buf, (char *)0);
2319 }
2320 buf_putc(buf, ' ');
2321 if ((param = body->language)!=NULL) {
2322 buf_putc(buf, '(');
2323 while (param) {
2324 message_write_nstring(buf, param->value);
2325 if ((param = param->next)!=NULL) {
2326 buf_putc(buf, ' ');
2327 }
2328 }
2329 buf_putc(buf, ')');
2330 }
2331 else message_write_nstring(buf, (char *)0);
2332 buf_putc(buf, ' ');
2333 message_write_nstring(buf, body->location);
2334
2335 if (newformat > 1 && !body->numparts) {
2336 /* even newer extension fields for annotation callout */
2337 buf_printf(buf, " (OFFSET %u HEADERSIZE %u)",
2338 body->content_offset,
2339 body->header_size);
2340 }
2341 }
2342
2343 buf_putc(buf, ')');
2344 }
2345
2346 /*
2347 * Write the address list 'addrlist' to 'buf'
2348 */
message_write_address(struct buf * buf,const struct address * addrlist)2349 static void message_write_address(struct buf *buf,
2350 const struct address *addrlist)
2351 {
2352 /* If no addresses, write out NIL */
2353 if (!addrlist) {
2354 message_write_nstring(buf, (char *)0);
2355 return;
2356 }
2357
2358 buf_putc(buf, '(');
2359
2360 while (addrlist) {
2361 buf_putc(buf, '(');
2362 message_write_nstring(buf, addrlist->name);
2363 buf_putc(buf, ' ');
2364 message_write_nstring(buf, addrlist->route);
2365 buf_putc(buf, ' ');
2366 message_write_nstring(buf, addrlist->mailbox);
2367 buf_putc(buf, ' ');
2368 message_write_nstring(buf, addrlist->domain);
2369 buf_putc(buf, ')');
2370 addrlist = addrlist->next;
2371 }
2372
2373 buf_putc(buf, ')');
2374 }
2375
2376 /*
2377 * Write the nil-or-string 's' to 'buf'
2378 */
message_write_nstring(struct buf * buf,const char * s)2379 EXPORTED void message_write_nstring(struct buf *buf, const char *s)
2380 {
2381 message_write_nstring_map(buf, s, (s ? strlen(s) : 0));
2382 }
2383
message_write_nstring_map(struct buf * buf,const char * s,unsigned int len)2384 EXPORTED void message_write_nstring_map(struct buf *buf,
2385 const char *s,
2386 unsigned int len)
2387 {
2388 const char *p;
2389 int is_literal = 0;
2390
2391 /* Write null pointer as NIL */
2392 if (!s) {
2393 buf_appendcstr(buf, "NIL");
2394 return;
2395 }
2396
2397 if (len >= 1024)
2398 {
2399 is_literal = 1;
2400 }
2401 else
2402 {
2403 /* Look for any non-QCHAR characters */
2404 for (p = s; (unsigned)(p-s) < len ; p++) {
2405 if (!*p || *p & 0x80 || *p == '\r' || *p == '\n'
2406 || *p == '\"' || *p == '%' || *p == '\\') {
2407 is_literal = 1;
2408 break;
2409 }
2410 }
2411 }
2412
2413 if (is_literal) {
2414 /* Write out as literal */
2415 buf_printf(buf, "{%u}\r\n", len);
2416 buf_appendmap(buf, s, len);
2417 }
2418 else {
2419 /* Write out as quoted string */
2420 buf_putc(buf, '"');
2421 buf_appendmap(buf, s, len);
2422 buf_putc(buf, '"');
2423 }
2424 }
2425
2426 /*
2427 * Append the string @s to the buffer @buf in a binary
2428 * format almost exactly
2429 */
message_write_xdrstring(struct buf * buf,const struct buf * s)2430 EXPORTED void message_write_xdrstring(struct buf *buf, const struct buf *s)
2431 {
2432 unsigned padlen;
2433
2434 /* 32b string length in network order */
2435 buf_appendbit32(buf, buf_len(s));
2436 /* bytes of string */
2437 buf_appendmap(buf, s->s, s->len);
2438 /* 0 to 3 bytes padding */
2439 padlen = (4 - (s->len & 3)) & 3;
2440 buf_appendmap(buf, "\0\0\0", padlen);
2441 }
2442
2443 /*
2444 * Write the text 's' to 'buf', converting to lower case as we go.
2445 */
message_write_text_lcase(struct buf * buf,const char * s)2446 static void message_write_text_lcase(struct buf *buf, const char *s)
2447 {
2448 const char *p;
2449
2450 for (p = s; *p; p++) buf_putc(buf, TOLOWER(*p));
2451 }
2452
message_write_nocharset(struct buf * buf,const struct body * body)2453 static void message_write_nocharset(struct buf *buf, const struct body *body)
2454 {
2455 buf_appendbit32(buf, 0x0000ffff);
2456
2457 char guidbuf[MESSAGE_GUID_SIZE];
2458 if (body) message_guid_export(&body->content_guid, guidbuf);
2459 else memset(&guidbuf, 0, MESSAGE_GUID_SIZE);
2460 buf_appendmap(buf, guidbuf, MESSAGE_GUID_SIZE);
2461 buf_appendbit32(buf, body ? body->decoded_content_size : 0);
2462 buf_appendbit32(buf, body ? body->content_lines : 0);
2463 }
2464
2465 /*
2466 * Write out the FETCH BODY[section] location/size information to 'buf'.
2467 */
message_write_section(struct buf * buf,const struct body * body)2468 static void message_write_section(struct buf *buf, const struct body *body)
2469 {
2470 int part;
2471
2472 if (strcmp(body->type, "MESSAGE") == 0
2473 && strcmp(body->subtype, "RFC822") == 0) {
2474 if (body->subpart->numparts) {
2475 /*
2476 * Part 0 of a message/rfc822 is the message header/text.
2477 * Nested parts of a message/rfc822 containing a multipart
2478 * are the sub-parts of the multipart.
2479 */
2480 buf_appendbit32(buf, body->subpart->numparts+1);
2481 buf_appendbit32(buf, body->subpart->header_offset);
2482 buf_appendbit32(buf, body->subpart->header_size);
2483 buf_appendbit32(buf, body->subpart->content_offset);
2484 buf_appendbit32(buf, body->subpart->content_size);
2485 message_write_nocharset(buf, body->subpart);
2486 for (part = 0; part < body->subpart->numparts; part++) {
2487 buf_appendbit32(buf, body->subpart->subpart[part].header_offset);
2488 buf_appendbit32(buf, body->subpart->subpart[part].header_size);
2489 buf_appendbit32(buf, body->subpart->subpart[part].content_offset);
2490 if (body->subpart->subpart[part].numparts == 0 &&
2491 strcmp(body->subpart->subpart[part].type, "MULTIPART") == 0) {
2492 /* Treat 0-part multipart as 0-length text */
2493 buf_appendbit32(buf, 0);
2494 }
2495 else {
2496 buf_appendbit32(buf, body->subpart->subpart[part].content_size);
2497 }
2498 message_write_charset(buf, &body->subpart->subpart[part]);
2499 }
2500 for (part = 0; part < body->subpart->numparts; part++) {
2501 message_write_section(buf, &body->subpart->subpart[part]);
2502 }
2503 }
2504 else {
2505 /*
2506 * Part 0 of a message/rfc822 is the message header/text.
2507 * Part 1 of a message/rfc822 containing a non-multipart
2508 * is the message body.
2509 */
2510 buf_appendbit32(buf, 2);
2511 buf_appendbit32(buf, body->subpart->header_offset);
2512 buf_appendbit32(buf, body->subpart->header_size);
2513 buf_appendbit32(buf, body->subpart->content_offset);
2514 buf_appendbit32(buf, body->subpart->content_size);
2515 message_write_nocharset(buf, body->subpart);
2516 buf_appendbit32(buf, body->subpart->header_offset);
2517 buf_appendbit32(buf, body->subpart->header_size);
2518 buf_appendbit32(buf, body->subpart->content_offset);
2519 if (strcmp(body->subpart->type, "MULTIPART") == 0) {
2520 /* Treat 0-part multipart as 0-length text */
2521 buf_appendbit32(buf, 0);
2522 message_write_nocharset(buf, NULL);
2523 }
2524 else {
2525 buf_appendbit32(buf, body->subpart->content_size);
2526 message_write_charset(buf, body->subpart);
2527 }
2528 message_write_section(buf, body->subpart);
2529 }
2530 }
2531 else if (body->numparts) {
2532 /*
2533 * Cannot fetch part 0 of a multipart.
2534 * Nested parts of a multipart are the sub-parts.
2535 */
2536 buf_appendbit32(buf, body->numparts+1);
2537 buf_appendbit32(buf, 0);
2538 buf_appendbit32(buf, -1);
2539 buf_appendbit32(buf, 0);
2540 buf_appendbit32(buf, -1);
2541 message_write_nocharset(buf, NULL);
2542 for (part = 0; part < body->numparts; part++) {
2543 buf_appendbit32(buf, body->subpart[part].header_offset);
2544 buf_appendbit32(buf, body->subpart[part].header_size);
2545 buf_appendbit32(buf, body->subpart[part].content_offset);
2546 if (body->subpart[part].numparts == 0 &&
2547 strcmp(body->subpart[part].type, "MULTIPART") == 0) {
2548 /* Treat 0-part multipart as 0-length text */
2549 buf_appendbit32(buf, 0);
2550 message_write_nocharset(buf, &body->subpart[part]);
2551 }
2552 else {
2553 buf_appendbit32(buf, body->subpart[part].content_size);
2554 message_write_charset(buf, &body->subpart[part]);
2555 }
2556 }
2557 for (part = 0; part < body->numparts; part++) {
2558 message_write_section(buf, &body->subpart[part]);
2559 }
2560 }
2561 else {
2562 /*
2563 * Leaf section--no part 0 or nested parts
2564 */
2565 buf_appendbit32(buf, 0);
2566 }
2567 }
2568
2569 /*
2570 * Write the 32-bit charset/encoding value and the charset identifier
2571 * for section 'body' to 'buf'
2572 */
message_write_charset(struct buf * buf,const struct body * body)2573 static void message_write_charset(struct buf *buf, const struct body *body)
2574 {
2575 int encoding;
2576 charset_t charset;
2577 size_t len = 0;
2578 const char *name = NULL;
2579
2580 message_parse_charset(body, &encoding, &charset);
2581
2582 /* write charset/encoding preamble */
2583 if (charset != CHARSET_UNKNOWN_CHARSET) {
2584 size_t itemsize;
2585
2586 name = charset_alias_name(charset);
2587 len = strlen(name);
2588
2589 /* charset name length is a multiple of cache item size,
2590 * including the terminating zero byte(s) */
2591 itemsize = (size_t) CACHE_ITEM_SIZE_SKIP;
2592 len = ((len / itemsize) + 1) * itemsize;
2593 if (len > 0xffff) len = 0;
2594 }
2595 /* we stored 0x100 here to say that it was a version 4 cache with the
2596 * charset length stored, which is all very well and nice, but it's
2597 * useless once we added sha1, so it's been removed again */
2598 buf_appendbit32(buf, ((len & 0xffff) << 16)|(encoding & 0xff));
2599
2600 /* write charset identifier */
2601 if (len) {
2602 char *tmp = (char*) xcalloc(sizeof(char), len);
2603 memcpy(tmp, name, strlen(name));
2604 buf_appendmap(buf, tmp, len);
2605 free(tmp);
2606 }
2607 charset_free(&charset);
2608
2609 /* NOTE - this stuff doesn't really belong in a method called
2610 * message_write_charset, but it's the fields that are always
2611 * written immediately after the charset! */
2612 char guidbuf[MESSAGE_GUID_SIZE];
2613 if (body) message_guid_export(&body->content_guid, guidbuf);
2614 else memset(&guidbuf, 0, MESSAGE_GUID_SIZE);
2615 buf_appendmap(buf, guidbuf, MESSAGE_GUID_SIZE);
2616 buf_appendbit32(buf, body ? body->decoded_content_size : 0);
2617 buf_appendbit32(buf, body ? body->content_lines : 0);
2618 }
2619
2620 /*
2621 * Unparse the address list 'addrlist' to 'buf'
2622 */
message_write_searchaddr(struct buf * buf,const struct address * addrlist)2623 static void message_write_searchaddr(struct buf *buf,
2624 const struct address *addrlist)
2625 {
2626 int prevaddr = 0;
2627 char* tmp;
2628
2629 while (addrlist) {
2630
2631 /* Handle RFC 822 group addresses */
2632 if (!addrlist->domain) {
2633 if (addrlist->mailbox) {
2634 if (prevaddr) buf_putc(buf, ',');
2635
2636 tmp = charset_parse_mimeheader(addrlist->mailbox, charset_flags);
2637 buf_appendcstr(buf, tmp);
2638 free(tmp);
2639 tmp = NULL;
2640 buf_putc(buf, ':');
2641
2642 /* Suppress a trailing comma */
2643 prevaddr = 0;
2644 }
2645 else {
2646 buf_putc(buf, ';');
2647 prevaddr = 1;
2648 }
2649 }
2650 else {
2651 if (prevaddr) buf_putc(buf, ',');
2652
2653 if (addrlist->name) {
2654 tmp = charset_parse_mimeheader(addrlist->name, charset_flags);
2655 buf_appendcstr(buf, tmp);
2656 free(tmp); tmp = NULL;
2657 buf_putc(buf, ' ');
2658 }
2659
2660 buf_putc(buf, '<');
2661 if (addrlist->route) {
2662 message_write_text_lcase(buf, addrlist->route);
2663 buf_putc(buf, ':');
2664 }
2665
2666 message_write_text_lcase(buf, addrlist->mailbox);
2667 buf_putc(buf, '@');
2668
2669 message_write_text_lcase(buf, addrlist->domain);
2670 buf_putc(buf, '>');
2671 prevaddr = 1;
2672 }
2673
2674 addrlist = addrlist->next;
2675 }
2676 }
2677
param_free(struct param ** paramp)2678 EXPORTED void param_free(struct param **paramp)
2679 {
2680 struct param *param, *nextparam;
2681
2682 param = *paramp;
2683 *paramp = NULL;
2684
2685 for (; param; param = nextparam) {
2686 nextparam = param->next;
2687 if (param->attribute) free(param->attribute);
2688 if (param->value) free(param->value);
2689 free(param);
2690 }
2691 }
2692
2693 /*
2694 * Free the parsed body-part 'body'
2695 */
message_free_body(struct body * body)2696 EXPORTED void message_free_body(struct body *body)
2697 {
2698 int part;
2699
2700 if (!body) return;
2701
2702 if (body->type) {
2703 free(body->type);
2704 free(body->subtype);
2705 param_free(&body->params);
2706 }
2707 if (body->id) free(body->id);
2708 if (body->description) free(body->description);
2709 if (body->encoding) free(body->encoding);
2710 if (body->md5) free(body->md5);
2711 if (body->disposition) {
2712 free(body->disposition);
2713 param_free(&body->disposition_params);
2714 }
2715 param_free(&body->language);
2716 if (body->location) free(body->location);
2717 if (body->date) free(body->date);
2718 if (body->subject) free(body->subject);
2719 if (body->from) parseaddr_free(body->from);
2720 if (body->sender) parseaddr_free(body->sender);
2721 if (body->reply_to) parseaddr_free(body->reply_to);
2722 if (body->to) parseaddr_free(body->to);
2723 if (body->cc) parseaddr_free(body->cc);
2724 if (body->bcc) parseaddr_free(body->bcc);
2725 if (body->in_reply_to) free(body->in_reply_to);
2726 if (body->message_id) free(body->message_id);
2727 if (body->x_me_message_id) free(body->x_me_message_id);
2728 if (body->references) free(body->references);
2729 if (body->received_date) free(body->received_date);
2730 if (body->x_deliveredinternaldate) free(body->x_deliveredinternaldate);
2731 if (body->charset_id) free(body->charset_id);
2732 if (body->part_id) free(body->part_id);
2733
2734 if (body->subpart) {
2735 if (body->numparts) {
2736 for (part=0; part < body->numparts; part++) {
2737 message_free_body(&body->subpart[part]);
2738 }
2739 }
2740 else {
2741 message_free_body(body->subpart);
2742 }
2743 free(body->subpart);
2744 }
2745
2746 buf_free(&body->cacheheaders);
2747
2748 if (body->decoded_body) free(body->decoded_body);
2749 }
2750
2751 /*
2752 * Parse a cached envelope into individual tokens
2753 *
2754 * When inside a list (ncom > 0), we parse the individual tokens but don't
2755 * isolate them -- we return the entire list as a single token.
2756 */
parse_cached_envelope(char * env,char * tokens[],int tokens_size)2757 HIDDEN void parse_cached_envelope(char *env, char *tokens[], int tokens_size)
2758 {
2759 char *c;
2760 int i = 0, ncom = 0, len;
2761
2762 /*
2763 * We have no way of indicating that we parsed less than
2764 * the requested number of tokens, but we can at least
2765 * ensure that the array is correctly initialised to NULL.
2766 */
2767 memset(tokens, 0, tokens_size*sizeof(char*));
2768
2769 c = env;
2770 while (*c != '\0') {
2771 switch (*c) {
2772 case ' ': /* end of token */
2773 if (!ncom) *c = '\0'; /* mark end of token */
2774 c++;
2775 break;
2776 case 'N': /* "NIL" */
2777 case 'n':
2778 if (!ncom) {
2779 if(i>=tokens_size) break;
2780 tokens[i++] = NULL; /* empty token */
2781 }
2782 c += 3; /* skip "NIL" */
2783 break;
2784 case '"': /* quoted string */
2785 c++; /* skip open quote */
2786 if (!ncom) {
2787 if(i>=tokens_size) break;
2788 tokens[i++] = c; /* start of string */
2789 }
2790 while (*c && *c != '"') { /* find close quote */
2791 if (*c == '\\') c++; /* skip quoted-specials */
2792 if (*c) c++;
2793 }
2794 if (*c) {
2795 if (!ncom) *c = '\0'; /* end of string */
2796 c++; /* skip close quote */
2797 }
2798 break;
2799 case '{': /* literal */
2800 c++; /* skip open brace */
2801 len = 0; /* determine length of literal */
2802 while (cyrus_isdigit((int) *c)) {
2803 len = len*10 + *c - '0';
2804 c++;
2805 }
2806 c += 3; /* skip close brace & CRLF */
2807 if (!ncom){
2808 if(i>=tokens_size) break;
2809 tokens[i++] = c; /* start of literal */
2810 }
2811 c += len; /* skip literal */
2812 break;
2813 case '(': /* start of address */
2814 c++; /* skip open paren */
2815 if (!ncom) {
2816 if(i>=tokens_size) break;
2817 tokens[i++] = c; /* start of address list */
2818 }
2819 ncom++; /* new open - inc counter */
2820 break;
2821 case ')': /* end of address */
2822 c++; /* skip close paren */
2823 if (ncom) { /* paranoia */
2824 ncom--; /* close - dec counter */
2825 if (!ncom) /* all open paren are closed */
2826 *(c-1) = '\0'; /* end of list - trim close paren */
2827 }
2828 break;
2829 default:
2830 /* yikes! unparsed junk, just skip it */
2831 c++;
2832 break;
2833 }
2834 }
2835 }
2836
parse_nstring(char ** str)2837 EXPORTED char *parse_nstring(char **str)
2838 {
2839 char *cp = *str, *val;
2840
2841 if (*cp == '"') { /* quoted string */
2842 val = cp+1; /* skip " */
2843 do {
2844 cp = strchr(cp+1, '"');
2845 if (!cp) return NULL; /* whole thing is broken */
2846 } while (*(cp-1) == '\\'); /* skip escaped " */
2847 *cp++ = '\0';
2848 }
2849 else if (*cp == '{') {
2850 int len = 0;
2851 /* yeah, it may be a literal too */
2852 cp++;
2853 while (cyrus_isdigit((int) *cp)) {
2854 len = len*10 + *cp - '0';
2855 cp++;
2856 }
2857 cp += 3; /* skip close brace & CRLF */
2858 val = cp;
2859 val[len] = '\0';
2860 cp += len;
2861 }
2862 else { /* NIL */
2863 val = NULL;
2864 cp += 3;
2865 }
2866
2867 *str = cp;
2868 return val;
2869 }
2870
message_parse_env_address(char * str,struct address * addr)2871 EXPORTED void message_parse_env_address(char *str, struct address *addr)
2872 {
2873 if (*str == '(') str++; /* skip ( */
2874 addr->name = parse_nstring(&str);
2875 str++; /* skip SP */
2876 addr->route = parse_nstring(&str);
2877 str++; /* skip SP */
2878 addr->mailbox = parse_nstring(&str);
2879 str++; /* skip SP */
2880 addr->domain = parse_nstring(&str);
2881 }
2882
2883 /*
2884 * Read an nstring from cached bodystructure.
2885 * Analog to message_write_nstring().
2886 * If 'copy' is set, returns a freshly allocated copy of the string,
2887 * otherwise is returns a pointer to the string which will be overwritten
2888 * on the next call to message_read_nstring()
2889 */
message_read_nstring(struct protstream * strm,char ** str,int copy)2890 static int message_read_nstring(struct protstream *strm, char **str, int copy)
2891 {
2892 static struct buf buf = BUF_INITIALIZER;
2893 int c;
2894
2895 c = getnstring(strm, NULL, &buf);
2896
2897 if (str) {
2898 if (!buf.s) *str = NULL;
2899 else if (copy) *str = xstrdup(buf.s);
2900 else *str = buf.s;
2901 }
2902
2903 return c;
2904 }
2905
2906 /*
2907 * Read a parameter list from cached bodystructure.
2908 * If withattr is set, attribute/value pairs will be read,
2909 * otherwise, just values are read.
2910 */
message_read_params(struct protstream * strm,struct param ** paramp,int withattr)2911 static int message_read_params(struct protstream *strm, struct param **paramp,
2912 int withattr)
2913 {
2914 int c;
2915
2916 if ((c = prot_getc(strm)) == '(') {
2917 /* parse list */
2918 struct param *param;
2919
2920 do {
2921 *paramp = param = (struct param *) xzmalloc(sizeof(struct param));
2922
2923 if (withattr) {
2924 /* attribute */
2925 c = message_read_nstring(strm, ¶m->attribute, 1);
2926 }
2927
2928 /* value */
2929 c = message_read_nstring(strm, ¶m->value, 1);
2930
2931 /* get ready to append the next parameter */
2932 paramp = ¶m->next;
2933
2934 } while (c == ' ');
2935
2936 if (c == ')') c = prot_getc(strm);
2937 }
2938 else {
2939 /* NIL */
2940 prot_ungetc(c, strm);
2941 c = message_read_nstring(strm, NULL, 0);
2942 }
2943
2944 return c;
2945 }
2946
2947 /*
2948 * Read an address part from cached bodystructure.
2949 * The string is appended to 'buf' (including NUL).
2950 */
message_read_addrpart(struct protstream * strm,const char ** part,unsigned * off,struct buf * buf)2951 static int message_read_addrpart(struct protstream *strm,
2952 const char **part, unsigned *off, struct buf *buf)
2953 {
2954 int c;
2955
2956 c = message_read_nstring(strm, (char **)part, 0);
2957 if (*part) {
2958 *off = buf->len;
2959 buf_appendmap(buf, *part, strlen(*part)+1);
2960 }
2961
2962 return c;
2963 }
2964
2965 /*
2966 * Read an address list from cached bodystructure.
2967 * Analog to message_write_address()
2968 */
message_read_address(struct protstream * strm,struct address ** addrp)2969 static int message_read_address(struct protstream *strm, struct address **addrp)
2970 {
2971 int c;
2972
2973 if ((c = prot_getc(strm)) == '(') {
2974 /* parse list */
2975 struct address *addr;
2976 unsigned nameoff = 0, rtoff = 0, mboxoff = 0, domoff = 0;
2977
2978 do {
2979 struct buf buf = BUF_INITIALIZER;
2980 *addrp = addr = (struct address *) xzmalloc(sizeof(struct address));
2981
2982 /* opening '(' */
2983 c = prot_getc(strm);
2984
2985 /* name */
2986 c = message_read_addrpart(strm, &addr->name, &nameoff, &buf);
2987
2988 /* route */
2989 c = message_read_addrpart(strm, &addr->route, &rtoff, &buf);
2990
2991 /* mailbox */
2992 c = message_read_addrpart(strm, &addr->mailbox, &mboxoff, &buf);
2993
2994 /* host */
2995 c = message_read_addrpart(strm, &addr->domain, &domoff, &buf);
2996
2997 /* addr parts must now point into our freeme string */
2998 if (buf.len) {
2999 char *freeme = addr->freeme = buf_release(&buf);
3000
3001 if (addr->name) addr->name = freeme+nameoff;
3002 if (addr->route) addr->route = freeme+rtoff;
3003 if (addr->mailbox) addr->mailbox = freeme+mboxoff;
3004 if (addr->domain) addr->domain = freeme+domoff;
3005 }
3006
3007 buf_free(&buf);
3008
3009 /* get ready to append the next address */
3010 addrp = &addr->next;
3011
3012 } while (((c = prot_getc(strm)) == '(') && prot_ungetc(c, strm));
3013
3014 if (c == ')') c = prot_getc(strm);
3015 }
3016 else {
3017 /* NIL */
3018 prot_ungetc(c, strm);
3019 c = message_read_nstring(strm, NULL, 0);
3020 }
3021
3022 return c;
3023 }
3024
3025 /*
3026 * Read a cached envelope response.
3027 * Analog to message_write_envelope()
3028 */
message_read_envelope(struct protstream * strm,struct body * body)3029 static int message_read_envelope(struct protstream *strm, struct body *body)
3030 {
3031 int c;
3032
3033 /* opening '(' */
3034 c = prot_getc(strm);
3035
3036 /* date */
3037 c = message_read_nstring(strm, &body->date, 1);
3038
3039 /* subject */
3040 c = message_read_nstring(strm, &body->subject, 1);
3041
3042 /* from */
3043 c = message_read_address(strm, &body->from);
3044
3045 /* sender */
3046 c = message_read_address(strm, &body->sender);
3047
3048 /* reply-to */
3049 c = message_read_address(strm, &body->reply_to);
3050
3051 /* to */
3052 c = message_read_address(strm, &body->to);
3053
3054 /* cc */
3055 c = message_read_address(strm, &body->cc);
3056
3057 /* bcc */
3058 c = message_read_address(strm, &body->bcc);
3059
3060 /* in-reply-to */
3061 c = message_read_nstring(strm, &body->in_reply_to, 1);
3062
3063 /* message-id */
3064 c = message_read_nstring(strm, &body->message_id, 1);
3065
3066 if (c == ')') c = prot_getc(strm);
3067
3068 return c;
3069 }
3070
3071 /*
3072 * Read cached bodystructure response.
3073 * Analog to message_write_body()
3074 */
message_read_body(struct protstream * strm,struct body * body,const char * part_id)3075 static int message_read_body(struct protstream *strm, struct body *body, const char *part_id)
3076 {
3077 int c;
3078 struct buf buf = BUF_INITIALIZER;
3079
3080 /* opening '(' */
3081 c = prot_getc(strm);
3082 if (c == EOF) goto done;
3083
3084 /* check for multipart */
3085 if ((c = prot_peek(strm)) == '(') {
3086
3087 body->type = xstrdup("MULTIPART");
3088 do {
3089 body->subpart =
3090 (struct body *)xrealloc((char *)body->subpart,
3091 (body->numparts+1)*sizeof(struct body));
3092 memset(&body->subpart[body->numparts], 0, sizeof(struct body));
3093 buf_reset(&buf);
3094 if (part_id) buf_printf(&buf, "%s.", part_id);
3095 buf_printf(&buf, "%d", body->numparts + 1);
3096 struct body *subbody = &body->subpart[body->numparts++];
3097 subbody->part_id = buf_release(&buf);
3098 c = message_read_body(strm, subbody, subbody->part_id);
3099 } while (((c = prot_getc(strm)) == '(') && prot_ungetc(c, strm));
3100
3101 /* remove the part_id here, you can't address multiparts directly */
3102 free(body->part_id);
3103 body->part_id = NULL;
3104
3105 /* body subtype */
3106 c = message_read_nstring(strm, &body->subtype, 1);
3107 if (c == EOF) goto done;
3108
3109 /* extension data */
3110
3111 /* body parameters */
3112 c = message_read_params(strm, &body->params, 1);
3113 if (c == EOF) goto done;
3114 }
3115 else {
3116 if (!body->part_id) {
3117 buf_reset(&buf);
3118 if (part_id) buf_printf(&buf, "%s.", part_id);
3119 buf_printf(&buf, "%d", 1);
3120 body->part_id = buf_release(&buf);
3121 }
3122 /* non-multipart */
3123
3124 /* body type */
3125 c = message_read_nstring(strm, &body->type, 1);
3126 if (c == EOF) goto done;
3127
3128 /* body subtype */
3129 c = message_read_nstring(strm, &body->subtype, 1);
3130 if (c == EOF) goto done;
3131
3132 /* body parameters */
3133 c = message_read_params(strm, &body->params, 1);
3134 if (c == EOF) goto done;
3135
3136 /* body id */
3137 c = message_read_nstring(strm, &body->id, 1);
3138 if (c == EOF) goto done;
3139
3140 /* body description */
3141 c = message_read_nstring(strm, &body->description, 1);
3142 if (c == EOF) goto done;
3143
3144 /* body encoding */
3145 c = message_read_nstring(strm, &body->encoding, 1);
3146 if (c == EOF) goto done;
3147
3148 /* body size */
3149 c = getuint32(strm, &body->content_size);
3150 if (c == EOF) goto done;
3151
3152 if (!strcmp(body->type, "TEXT")) {
3153 /* body lines */
3154 c = getint32(strm, (int32_t *) &body->content_lines);
3155 if (c == EOF) goto done;
3156 }
3157 else if (!strcmp(body->type, "MESSAGE") &&
3158 !strcmp(body->subtype, "RFC822")) {
3159
3160 body->subpart = (struct body *) xzmalloc(sizeof(struct body));
3161
3162 /* envelope structure */
3163 c = message_read_envelope(strm, body->subpart);
3164 if (c == EOF) goto done;
3165
3166 /* body structure */
3167 c = message_read_body(strm, body->subpart, body->part_id);
3168 if (c == EOF) goto done;
3169 c = prot_getc(strm); /* trailing SP */
3170 if (c == EOF) goto done;
3171
3172 /* body lines */
3173 c = getint32(strm, (int32_t *) &body->content_lines);
3174 if (c == EOF) goto done;
3175 }
3176
3177 /* extension data */
3178
3179 /* body MD5 */
3180 c = message_read_nstring(strm, &body->md5, 1);
3181 if (c == EOF) goto done;
3182 }
3183
3184 /* common extension data */
3185
3186 /* body disposition */
3187 if ((c = prot_getc(strm)) == '(') {
3188 c = message_read_nstring(strm, &body->disposition, 1);
3189 if (c == EOF) goto done;
3190
3191 c = message_read_params(strm, &body->disposition_params, 1);
3192 if (c == ')') c = prot_getc(strm); /* trailing SP */
3193 if (c == EOF) goto done;
3194 }
3195 else {
3196 /* NIL */
3197 prot_ungetc(c, strm);
3198 c = message_read_nstring(strm, &body->disposition, 1);
3199 if (c == EOF) goto done;
3200 }
3201
3202 /* body language */
3203 if ((c = prot_peek(strm)) == '(') {
3204 c = message_read_params(strm, &body->language, 0);
3205 if (c == EOF) goto done;
3206 }
3207 else {
3208 char *lang;
3209
3210 c = message_read_nstring(strm, &lang, 1);
3211 if (c == EOF) goto done;
3212 if (lang) {
3213 body->language = (struct param *) xzmalloc(sizeof(struct param));
3214 body->language->value = lang;
3215 }
3216 }
3217
3218 /* body location */
3219 c = message_read_nstring(strm, &body->location, 1);
3220
3221 /* XXX We currently don't store any other extension data.
3222 MUST keep in sync with message_write_body() */
3223
3224 done:
3225 buf_free(&buf);
3226 return c;
3227 }
3228
3229 /*
3230 * Read cached binary bodystructure.
3231 * Analog to message_write_section()
3232 */
message_read_binarybody(struct body * body,const char ** sect,uint32_t cache_version)3233 static void message_read_binarybody(struct body *body, const char **sect,
3234 uint32_t cache_version)
3235 {
3236 bit32 n, i;
3237 const char *p = *sect;
3238 struct body *subpart;
3239 size_t len;
3240 uint32_t cte;
3241
3242 n = CACHE_ITEM_BIT32(*sect);
3243 p = *sect += CACHE_ITEM_SIZE_SKIP;
3244 if (!n) return;
3245
3246 if (!strcmp(body->type, "MESSAGE") && !strcmp(body->subtype, "RFC822") &&
3247 body->subpart->numparts) {
3248 subpart = body->subpart->subpart;
3249 body = body->subpart;
3250 }
3251 else {
3252 /* If a message/rfc822 contains a non-multipart,
3253 we don't care about part 0 (message header) */
3254 subpart = body->subpart;
3255 body = NULL;
3256 }
3257
3258 if (!body) {
3259 /* skip header part */
3260 p += 5 * CACHE_ITEM_SIZE_SKIP;
3261 if (cache_version >= 5)
3262 p += MESSAGE_GUID_SIZE;
3263 if (cache_version >= 8)
3264 p += CACHE_ITEM_SIZE_SKIP;
3265 if (cache_version >= 9)
3266 p += CACHE_ITEM_SIZE_SKIP;
3267 }
3268 else {
3269 /* read header part */
3270 body->header_offset = CACHE_ITEM_BIT32(p);
3271 p += CACHE_ITEM_SIZE_SKIP;
3272 body->header_size = CACHE_ITEM_BIT32(p);
3273 p += CACHE_ITEM_SIZE_SKIP;
3274 body->content_offset = CACHE_ITEM_BIT32(p);
3275 p += CACHE_ITEM_SIZE_SKIP;
3276 body->content_size = CACHE_ITEM_BIT32(p);
3277 p += CACHE_ITEM_SIZE_SKIP;
3278 cte = CACHE_ITEM_BIT32(p);
3279 p += CACHE_ITEM_SIZE_SKIP;
3280
3281 /* read encoding and charset identifier */
3282 /* Cache versions <= 3 store charset and encoding in 4 bytes,
3283 * but the code was broken. Just presume the charset unknown. */
3284 body->charset_enc = cte & 0xff;
3285 body->charset_id = NULL;
3286 if (cache_version >= 4) {
3287 /* determine the length of the charset identifer */
3288 len = (cte >> 16) & 0xffff;
3289 if (len) {
3290 /* XXX - assert (cte & 0xff00) == 0x100 */
3291 /* read len bytes as charset id */
3292 body->charset_id = xstrndup(p, len);
3293 p += len;
3294 }
3295 }
3296 if (cache_version >= 5)
3297 p = message_guid_import(&body->content_guid, p);
3298
3299 if (cache_version >= 8) {
3300 body->decoded_content_size = CACHE_ITEM_BIT32(p);
3301 p += CACHE_ITEM_SIZE_SKIP;
3302 }
3303 if (cache_version >= 9) {
3304 body->content_lines = CACHE_ITEM_BIT32(p);
3305 p += CACHE_ITEM_SIZE_SKIP;
3306 }
3307 }
3308
3309 /* read body parts */
3310 for (i = 0; i < n-1; i++) {
3311 subpart[i].header_offset = CACHE_ITEM_BIT32(p);
3312 p += CACHE_ITEM_SIZE_SKIP;
3313 subpart[i].header_size = CACHE_ITEM_BIT32(p);
3314 p += CACHE_ITEM_SIZE_SKIP;
3315 subpart[i].content_offset = CACHE_ITEM_BIT32(p);
3316 p += CACHE_ITEM_SIZE_SKIP;
3317 subpart[i].content_size = CACHE_ITEM_BIT32(p);
3318 p += CACHE_ITEM_SIZE_SKIP;
3319 cte = CACHE_ITEM_BIT32(p);
3320 p += CACHE_ITEM_SIZE_SKIP;
3321
3322 /* read encoding and charset identifier */
3323 /* Cache versions <= 3 store charset and encoding in 4 bytes,
3324 * but the code was broken. Just presume the charset unknown. */
3325 subpart[i].charset_enc = cte & 0xff;
3326 subpart[i].charset_id = NULL;
3327 if (cache_version >= 4) {
3328 /* determine the length of the charset identifer */
3329 len = (cte >> 16) & 0xffff;
3330 if (len) {
3331 /* XXX - assert (cte & 0xff00) == 0x100 */
3332 /* read len bytes as charset id */
3333 subpart[i].charset_id = xstrndup(p, len);
3334 p += len;
3335 }
3336 }
3337 if (cache_version >= 5)
3338 p = message_guid_import(&subpart[i].content_guid, p);
3339
3340 if (cache_version >= 8) {
3341 subpart[i].decoded_content_size = CACHE_ITEM_BIT32(p);
3342 p += CACHE_ITEM_SIZE_SKIP;
3343 }
3344 if (cache_version >= 9) {
3345 subpart[i].content_lines = CACHE_ITEM_BIT32(p);
3346 p += CACHE_ITEM_SIZE_SKIP;
3347 }
3348 }
3349
3350 /* read sub-parts */
3351 for (*sect = p, i = 0; i < n-1; i++) {
3352 message_read_binarybody(&subpart[i], sect, cache_version);
3353 }
3354 }
3355
3356 /*
3357 * Read cached envelope, binary bodystructure response and binary bodystructure
3358 * of the specified record. Populates 'body' which must be freed by the caller.
3359 */
message_read_bodystructure(const struct index_record * record,struct body ** body)3360 EXPORTED void message_read_bodystructure(const struct index_record *record, struct body **body)
3361 {
3362 struct protstream *strm;
3363 struct body toplevel;
3364 const char *binbody;
3365
3366 memset(&toplevel, 0, sizeof(struct body));
3367 toplevel.type = "MESSAGE";
3368 toplevel.subtype = "RFC822";
3369 toplevel.subpart = *body = xzmalloc(sizeof(struct body));
3370
3371 /* Read envelope response from cache */
3372 strm = prot_readmap(cacheitem_base(record, CACHE_ENVELOPE),
3373 cacheitem_size(record, CACHE_ENVELOPE));
3374 prot_setisclient(strm, 1); /* no-sync literals */
3375
3376 message_read_envelope(strm, *body);
3377 prot_free(strm);
3378
3379 /* Read bodystructure response from cache */
3380 strm = prot_readmap(cacheitem_base(record, CACHE_BODYSTRUCTURE),
3381 cacheitem_size(record, CACHE_BODYSTRUCTURE));
3382 prot_setisclient(strm, 1); /* no-sync literals */
3383
3384 message_read_body(strm, *body, NULL);
3385 prot_free(strm);
3386
3387 /* Read binary bodystructure from cache */
3388 binbody = cacheitem_base(record, CACHE_SECTION);
3389 message_read_binarybody(&toplevel, &binbody, record->cache_version);
3390 }
3391
de_nstring_buf(struct buf * src,struct buf * dst)3392 static void de_nstring_buf(struct buf *src, struct buf *dst)
3393 {
3394 char *p, *q;
3395
3396 if (src->s && src->len == 3 && !memcmp(src->s, "NIL", 3)) {
3397 buf_free(dst);
3398 return;
3399 }
3400 buf_cstring(src); /* ensure nstring parse doesn't overrun */
3401 q = src->s;
3402 p = parse_nstring(&q);
3403 buf_setmap(dst, p, q-p);
3404 buf_cstring(dst);
3405 }
3406
message1_get_subject(const struct index_record * record,struct buf * buf)3407 static void message1_get_subject(const struct index_record *record, struct buf *buf)
3408 {
3409 struct buf tmp = BUF_INITIALIZER;
3410 buf_copy(&tmp, cacheitem_buf(record, CACHE_SUBJECT));
3411 de_nstring_buf(&tmp, buf);
3412 buf_free(&tmp);
3413 }
3414
3415 /*
3416 * Generate a conversation id from the given message.
3417 * The conversation id is derived from the first 64b of
3418 * the SHA1 of the message, except that an all-zero
3419 * conversation id is not valid.
3420 */
generate_conversation_id(const struct index_record * record)3421 static conversation_id_t generate_conversation_id(
3422 const struct index_record *record)
3423 {
3424 conversation_id_t cid = 0;
3425 size_t i;
3426
3427 assert(record->guid.status == GUID_NONNULL);
3428
3429 for (i = 0 ; i < sizeof(cid) ; i++) {
3430 cid <<= 8;
3431 cid |= record->guid.value[i];
3432 }
3433
3434 // we make sure the cid doesn't look anything like the sha1 so
3435 // that people don't make assumptions
3436 cid ^= 0x91f3d9e10b690b12; // chosen by fair dice roll
3437
3438 /*
3439 * We carefully avoid returning NULLCONVERSATION as
3440 * a new cid, as that would confuse matters no end.
3441 */
3442 if (cid == NULLCONVERSATION)
3443 cid = 1;
3444
3445 return cid;
3446 }
3447
3448 /*
3449 * In RFC 2822, the In-Reply-To field is explicitly required to contain
3450 * only message-ids, whitespace and commas. The old RFC 822 was less
3451 * well specified and allowed all sorts of stuff. We used to be equally
3452 * liberal here in parsing the field. Sadly some versions of the NMH
3453 * mailer will generate In-Reply-To containing email addresses which we
3454 * cannot tell from message-ids, leading to massively confused
3455 * threading. So we have to be slightly stricter.
3456 */
is_valid_rfc2822_inreplyto(const char * p)3457 static int is_valid_rfc2822_inreplyto(const char *p)
3458 {
3459 if (!p)
3460 return 1;
3461
3462 /* skip any whitespace */
3463 while (*p && (isspace(*p) || *p == ','))
3464 p++;
3465
3466 return (!*p || *p == '<');
3467 }
3468
3469 /* XXX - refactor this whole thing to an "open or create" API */
getconvmailbox(const char * mboxname,struct mailbox ** mailboxptr)3470 static int getconvmailbox(const char *mboxname, struct mailbox **mailboxptr)
3471 {
3472 int r = mailbox_open_iwl(mboxname, mailboxptr);
3473 if (r != IMAP_MAILBOX_NONEXISTENT) return r;
3474
3475 struct mboxlock *namespacelock = mboxname_usernamespacelock(mboxname);
3476
3477 // try again - maybe we lost the race!
3478 r = mailbox_open_iwl(mboxname, mailboxptr);
3479 if (r == IMAP_MAILBOX_NONEXISTENT) {
3480 /* create the mailbox - it's OK to do as admin because this only ever gets
3481 * a user subfolder for this conversations.db owner */
3482 r = mboxlist_createmailbox(mboxname, MBTYPE_COLLECTION, NULL, 1 /* admin */, NULL, NULL,
3483 0, 0, 0, 0, mailboxptr);
3484 }
3485
3486 mboxname_release(&namespacelock);
3487
3488 return r;
3489 }
3490
3491 /*
3492 * This is the legacy code version to generate conversation subjects.
3493 * We keep it here to allow matching messages to conversations that
3494 * already got that oldstyle subject set.
3495 */
3496 /*
3497 * Normalise a subject string, to a form which can be used for deciding
3498 * whether a message belongs in the same conversation as it's antecedent
3499 * messages. What we're doing here is the same idea as the "base
3500 * subject" algorithm described in RFC 5256 but slightly adapted from
3501 * experience. Differences are:
3502 *
3503 * - We eliminate all whitespace; RFC 5256 normalises any sequence
3504 * of whitespace characters to a single SP. We do this because
3505 * we have observed combinations of buggy client software both
3506 * add and remove whitespace around folding points.
3507 *
3508 * - We include the Unicode U+00A0 (non-breaking space) codepoint in our
3509 * determination of whitespace (as the UTF-8 sequence \xC2\xA0) because
3510 * we have seen it in the wild, but do not currently generalise this to
3511 * other Unicode "whitespace" codepoints. (XXX)
3512 *
3513 * - Because we eliminate whitespace entirely, and whitespace helps
3514 * delimit some of our other replacements, we do that whitespace
3515 * step last instead of first.
3516 *
3517 * - We eliminate leading tokens like Re: and Fwd: using a simpler
3518 * and more generic rule than RFC 5256's; this rule catches a number
3519 * of semantically identical prefixes in other human languages, but
3520 * unfortunately also catches lots of other things. We think we can
3521 * get away with this because the normalised subject is never directly
3522 * seen by human eyes, so some information loss is acceptable as long
3523 * as the subjects in different messages match correctly.
3524 *
3525 * - We eliminate trailing tokens like [SEC=UNCLASSIFIED],
3526 * [DLM=Sensitive], etc which are automatically added by Australian
3527 * Government department email systems. In theory there should be no
3528 * more than one of these on an email subject but in practice multiple
3529 * have been seen.
3530 * http://www.finance.gov.au/files/2012/04/EPMS2012.3.pdf
3531 */
oldstyle_normalise_subject(struct buf * s)3532 static void oldstyle_normalise_subject(struct buf *s)
3533 {
3534 static int initialised_res = 0;
3535 static regex_t whitespace_re;
3536 static regex_t relike_token_re;
3537 static regex_t blob_start_re;
3538 static regex_t blob_end_re;
3539 int r;
3540
3541 if (!initialised_res) {
3542 r = regcomp(&whitespace_re, "([ \t\r\n]+|\xC2\xA0)", REG_EXTENDED);
3543 assert(r == 0);
3544 r = regcomp(&relike_token_re, "^[ \t]*[A-Za-z0-9]+(\\[[0-9]+\\])?:", REG_EXTENDED);
3545 assert(r == 0);
3546 r = regcomp(&blob_start_re, "^[ \t]*\\[[^]]+\\]", REG_EXTENDED);
3547 assert(r == 0);
3548 r = regcomp(&blob_end_re, "\\[(SEC|DLM)=[^]]+\\][ \t]*$", REG_EXTENDED);
3549 assert(r == 0);
3550 initialised_res = 1;
3551 }
3552
3553 /* step 1 is to decode any RFC 2047 MIME encoding of the header
3554 * field, but we assume that has already happened */
3555
3556 /* step 2 is to eliminate all "Re:"-like tokens and [] blobs
3557 * at the start, and AusGov [] blobs at the end */
3558 while (buf_replace_one_re(s, &relike_token_re, NULL) ||
3559 buf_replace_one_re(s, &blob_start_re, NULL) ||
3560 buf_replace_one_re(s, &blob_end_re, NULL))
3561 ;
3562
3563 /* step 3 is eliminating whitespace. */
3564 buf_replace_all_re(s, &whitespace_re, NULL);
3565 }
3566
extract_convsubject(const struct index_record * record,struct buf * msubject,void (* normalise)(struct buf *))3567 static void extract_convsubject(const struct index_record *record,
3568 struct buf *msubject,
3569 void (*normalise)(struct buf*))
3570 {
3571 if (cacheitem_base(record, CACHE_HEADERS)) {
3572 message1_get_subject(record, msubject);
3573 normalise(msubject);
3574 }
3575 }
3576
message_extract_convsubject(const struct index_record * record)3577 EXPORTED char *message_extract_convsubject(const struct index_record *record)
3578 {
3579 if (cacheitem_base(record, CACHE_HEADERS)) {
3580 struct buf msubject = BUF_INITIALIZER;
3581 extract_convsubject(record, &msubject, conversation_normalise_subject);
3582 return buf_release(&msubject);
3583 }
3584 return NULL;
3585 }
3586
3587 /*
3588 * Update the conversations database for the given
3589 * mailbox, to account for the given message.
3590 * @body may be NULL, in which case we get everything
3591 * we need out of the cache item in @record.
3592 */
message_update_conversations(struct conversations_state * state,struct mailbox * mailbox,struct index_record * record,conversation_t ** convp)3593 EXPORTED int message_update_conversations(struct conversations_state *state,
3594 struct mailbox *mailbox,
3595 struct index_record *record,
3596 conversation_t **convp)
3597 {
3598 char *hdrs[4];
3599 char *c_refs = NULL, *c_env = NULL, *c_me_msgid = NULL;
3600 strarray_t msgidlist = STRARRAY_INITIALIZER;
3601 arrayu64_t matchlist = ARRAYU64_INITIALIZER;
3602 arrayu64_t cids = ARRAYU64_INITIALIZER;
3603 int mustkeep = 0;
3604 conversation_t *conv = NULL;
3605 char *msubj = NULL;
3606 char *msubj_oldstyle = NULL;
3607 int i;
3608 int j;
3609 int r = 0;
3610 struct mailbox *local_mailbox = NULL;
3611
3612 /*
3613 * Gather all the msgids mentioned in the message, starting with
3614 * the oldest message in the References: header, then any mesgids
3615 * mentioned in the In-Reply-To: header, and finally the message's
3616 * own Message-Id:. In general this will result in duplicates (a
3617 * correct References: header will contain as its last entry the
3618 * msgid in In-Reply-To:), so we weed those out before proceeding
3619 * to the database.
3620 */
3621 if (cacheitem_base(record, CACHE_HEADERS)) {
3622 /* we have cache loaded, get what we need there */
3623 strarray_t want = STRARRAY_INITIALIZER;
3624 char *envtokens[NUMENVTOKENS];
3625
3626 /* get References from cached headers */
3627 c_refs = xstrndup(cacheitem_base(record, CACHE_HEADERS),
3628 cacheitem_size(record, CACHE_HEADERS));
3629 strarray_append(&want, "references");
3630 message_pruneheader(c_refs, &want, 0);
3631 hdrs[0] = c_refs;
3632
3633 /* get In-Reply-To, Message-ID out of the envelope
3634 *
3635 * get a working copy; strip outer ()'s
3636 * +1 -> skip the leading paren
3637 * -2 -> don't include the size of the outer parens
3638 */
3639 c_env = xstrndup(cacheitem_base(record, CACHE_ENVELOPE) + 1,
3640 cacheitem_size(record, CACHE_ENVELOPE) - 2);
3641 parse_cached_envelope(c_env, envtokens, NUMENVTOKENS);
3642 hdrs[1] = envtokens[ENV_INREPLYTO];
3643 hdrs[2] = envtokens[ENV_MSGID];
3644
3645 /* get X-ME-Message-ID from cached headers */
3646 c_me_msgid = xstrndup(cacheitem_base(record, CACHE_HEADERS),
3647 cacheitem_size(record, CACHE_HEADERS));
3648 strarray_set(&want, 0, "x-me-message-id");
3649 message_pruneheader(c_me_msgid, &want, 0);
3650 hdrs[3] = c_me_msgid;
3651
3652 strarray_fini(&want);
3653
3654 /* work around stupid message_guid API */
3655 message_guid_isnull(&record->guid);
3656 }
3657 else {
3658 /* nope, now we're screwed */
3659 return IMAP_INTERNAL;
3660 }
3661
3662 if (!is_valid_rfc2822_inreplyto(hdrs[1]))
3663 hdrs[1] = NULL;
3664
3665 /* Note that a NULL subject, e.g. due to a missing Subject: header
3666 * field in the original message, is normalised to "" not NULL */
3667 if (cacheitem_base(record, CACHE_HEADERS)) {
3668 struct buf msubject = BUF_INITIALIZER;
3669 extract_convsubject(record, &msubject, conversation_normalise_subject);
3670 msubj = xstrdup(buf_cstring(&msubject));
3671 buf_reset(&msubject);
3672 extract_convsubject(record, &msubject, oldstyle_normalise_subject);
3673 msubj_oldstyle = buf_release(&msubject);
3674 }
3675
3676 for (i = 0 ; i < 4 ; i++) {
3677 int hcount = 0;
3678 char *msgid = NULL;
3679 while ((msgid = find_msgid(hdrs[i], &hdrs[i])) != NULL) {
3680 hcount++;
3681 if (hcount > 20) {
3682 free(msgid);
3683 syslog(LOG_DEBUG, "too many references, skipping the rest");
3684 break;
3685 }
3686 /*
3687 * The issue of case sensitivity of msgids is curious.
3688 * RFC 2822 seems to imply they're case-insensitive,
3689 * without explicitly stating so. So here we punt
3690 * on that being the case.
3691 *
3692 * Note that the THREAD command elsewhere in Cyrus
3693 * assumes otherwise.
3694 */
3695 msgid = lcase(msgid);
3696
3697 /* already seen this one? */
3698 if (strarray_find(&msgidlist, msgid, 0) >= 0) {
3699 free(msgid);
3700 continue;
3701 }
3702
3703 /* won't be accepted as valid, ignore it! */
3704 if (conversations_check_msgid(msgid, strlen(msgid))) {
3705 free(msgid);
3706 continue;
3707 }
3708
3709 strarray_appendm(&msgidlist, msgid);
3710
3711 /* Lookup the conversations database to work out which
3712 * conversation ids that message belongs to. */
3713 r = conversations_get_msgid(state, msgid, &cids);
3714 if (r) goto out;
3715
3716 for (j = 0; j < cids.count; j++) {
3717 conversation_id_t cid = arrayu64_nth(&cids, j);
3718 conversation_free(conv);
3719 conv = NULL;
3720 r = conversation_load(state, cid, &conv);
3721 if (r) goto out;
3722 /* [IRIS-1576] if X-ME-Message-ID says the messages are
3723 * linked, ignore any difference in Subject: header fields. */
3724 if (!conv || i == 3 || !conv->subject ||
3725 !strcmpsafe(conv->subject, msubj) ||
3726 !strcmpsafe(conv->subject, msubj_oldstyle)) {
3727 arrayu64_add(&matchlist, cid);
3728 }
3729 }
3730
3731 conversation_free(conv);
3732 conv = NULL;
3733 }
3734 }
3735
3736 /* calculate the CID if needed */
3737 if (!record->silentupdate) {
3738 /* match for GUID, it always has the same CID */
3739 conversation_id_t currentcid = conversations_guid_cid_lookup(state, message_guid_encode(&record->guid));
3740 if (currentcid) {
3741 // would love to have this, but might hit bogus broken existing data...
3742 // assert(record->cid == 0 || record->cid == currentcid);
3743 record->cid = currentcid;
3744 mustkeep = 1;
3745 }
3746 if (!record->cid) record->cid = arrayu64_max(&matchlist);
3747 if (!record->cid) {
3748 record->cid = generate_conversation_id(record);
3749 if (record->cid) mustkeep = 1;
3750 }
3751 if (!mustkeep && !record->basecid) {
3752 /* try finding a CID in the match list, or if we came in with it */
3753 struct buf annotkey = BUF_INITIALIZER;
3754 struct buf annotval = BUF_INITIALIZER;
3755 buf_printf(&annotkey, "%snewcid/%016llx", IMAP_ANNOT_NS, record->cid);
3756 r = annotatemore_lookup(state->annotmboxname, buf_cstring(&annotkey), "", &annotval);
3757 if (annotval.len == 16) {
3758 const char *p = buf_cstring(&annotval);
3759 /* we have a new canonical CID */
3760 record->basecid = record->cid;
3761 r = parsehex(p, &p, 16, &record->cid);
3762 }
3763 else {
3764 r = 0; /* we're just going to pretend this wasn't found, worst case we split
3765 * more than we should */
3766 }
3767 buf_free(&annotkey);
3768 buf_free(&annotval);
3769 if (r) goto out;
3770 }
3771 }
3772
3773 if (!record->cid) goto out;
3774 if (!record->basecid) record->basecid = record->cid;
3775
3776 r = conversation_load(state, record->cid, &conv);
3777 if (r) goto out;
3778
3779 if (!conv) conv = conversation_new();
3780
3781 uint32_t max_thread = config_getint(IMAPOPT_CONVERSATIONS_MAX_THREAD);
3782 if (conv->exists >= max_thread && !mustkeep && !record->silentupdate) {
3783 /* time to reset the conversation */
3784 conversation_id_t was = record->cid;
3785 record->cid = generate_conversation_id(record);
3786
3787 syslog(LOG_NOTICE, "splitting conversation for %s %u base:%016llx was:%016llx now:%016llx",
3788 mailbox->name, record->uid, record->basecid, was, record->cid);
3789
3790 if (!record->basecid) record->basecid = was;
3791
3792 conversation_free(conv);
3793 r = conversation_load(state, record->cid, &conv);
3794 if (r) goto out;
3795 if (!conv) conv = conversation_new();
3796
3797 /* and update the pointer for next time */
3798 if (strcmpsafe(state->annotmboxname, mailbox->name)) {
3799 r = getconvmailbox(state->annotmboxname, &local_mailbox);
3800 if (r) goto out;
3801 mailbox = local_mailbox;
3802 }
3803
3804 struct annotate_state *astate = NULL;
3805 r = mailbox_get_annotate_state(mailbox, 0, &astate);
3806 if (r) goto out;
3807
3808 struct buf annotkey = BUF_INITIALIZER;
3809 struct buf annotval = BUF_INITIALIZER;
3810 buf_printf(&annotkey, "%snewcid/%016llx", IMAP_ANNOT_NS, record->basecid);
3811 buf_printf(&annotval, "%016llx", record->cid);
3812 r = annotate_state_write(astate, buf_cstring(&annotkey), "", &annotval);
3813 buf_free(&annotkey);
3814 buf_free(&annotval);
3815 if (r) goto out;
3816 }
3817
3818 /* Create the subject header if not already set and this isn't a Draft */
3819 if (!conv->subject && !(record->system_flags & FLAG_DRAFT))
3820 conv->subject = xstrdupnull(msubj);
3821
3822 /*
3823 * Update the database to add records for all the message-ids
3824 * not already mentioned. Note that add_msgid does the right
3825 * thing[tm] when the cid already exists.
3826 */
3827
3828 for (i = 0 ; i < msgidlist.count ; i++) {
3829 r = conversations_add_msgid(state, strarray_nth(&msgidlist, i), record->basecid);
3830 if (r) goto out;
3831 }
3832
3833 /* mark that it's split so basecid gets saved */
3834 if (record->basecid != record->cid)
3835 record->internal_flags |= FLAG_INTERNAL_SPLITCONVERSATION;
3836
3837 out:
3838 strarray_fini(&msgidlist);
3839 arrayu64_fini(&matchlist);
3840 arrayu64_fini(&cids);
3841 free(c_refs);
3842 free(c_env);
3843 free(c_me_msgid);
3844 free(msubj);
3845 free(msubj_oldstyle);
3846 if (local_mailbox)
3847 mailbox_close(&local_mailbox);
3848
3849 if (r)
3850 conversation_free(conv);
3851 else if (convp)
3852 *convp = conv;
3853 else {
3854 r = conversation_save(state, record->cid, conv);
3855 conversation_free(conv);
3856 }
3857
3858 return r;
3859 }
3860
3861
3862 /*
3863 Format of the CACHE_SECTION cache item is a binary encoding
3864 tree of MIME sections. In something like rpcgen notation
3865 (see RFC 4506):
3866
3867 struct part {
3868 uint32_t header_offset;
3869 uint32_t header_size;
3870 uint32_t content_offset;
3871 uint32_t content_size;
3872
3873 uint32_t encoding & 0x100 & (len << 16)
3874 length of charset identifier in bytes (=len)
3875 uint8_t[len] charset identifier
3876 };
3877
3878 struct section {
3879 unsigned int numparts;
3880 struct part parts[numparts];
3881 struct section[numparts-1];
3882 };
3883 */
3884
3885 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
3886
message_new(void)3887 EXPORTED message_t *message_new(void)
3888 {
3889 message_t *m = xzmalloc(sizeof(*m));
3890
3891 m->refcount = 1;
3892
3893 return m;
3894 }
3895
message_free(message_t * m)3896 static void message_free(message_t *m)
3897 {
3898 assert(m->refcount == 0);
3899
3900 message_yield(m, M_ALL);
3901
3902 free(m);
3903 }
3904
message_set_from_data(const char * base,size_t len,message_t * m)3905 EXPORTED void message_set_from_data(const char *base, size_t len, message_t *m)
3906 {
3907 assert(m->refcount == 1);
3908 message_yield(m, M_ALL);
3909 memset(m, 0, sizeof(message_t));
3910 buf_init_ro(&m->map, base, len);
3911 m->have = m->given = M_MAP;
3912 m->refcount = 1;
3913 }
3914
message_new_from_data(const char * base,size_t len)3915 EXPORTED message_t *message_new_from_data(const char *base, size_t len)
3916 {
3917 message_t *m = message_new();
3918 buf_init_ro(&m->map, base, len);
3919 m->have = m->given = M_MAP;
3920 return m;
3921 }
3922
message_set_from_mailbox(struct mailbox * mailbox,unsigned int recno,message_t * m)3923 EXPORTED void message_set_from_mailbox(struct mailbox *mailbox, unsigned int recno, message_t *m)
3924 {
3925 assert(m->refcount == 1);
3926 message_yield(m, M_ALL);
3927 memset(m, 0, sizeof(message_t));
3928 m->mailbox = mailbox;
3929 m->record.recno = recno;
3930 m->have = m->given = M_MAILBOX;
3931 m->refcount = 1;
3932 }
3933
message_new_from_mailbox(struct mailbox * mailbox,unsigned int recno)3934 EXPORTED message_t *message_new_from_mailbox(struct mailbox *mailbox, unsigned int recno)
3935 {
3936 message_t *m = message_new();
3937 m->mailbox = mailbox;
3938 m->record.recno = recno;
3939 m->have = m->given = M_MAILBOX;
3940 return m;
3941 }
3942
message_set_from_record(struct mailbox * mailbox,const struct index_record * record,message_t * m)3943 EXPORTED void message_set_from_record(struct mailbox *mailbox,
3944 const struct index_record *record,
3945 message_t *m)
3946 {
3947 assert(m->refcount == 1);
3948 message_yield(m, M_ALL);
3949 memset(m, 0, sizeof(message_t));
3950 assert(record->uid > 0);
3951 m->mailbox = mailbox;
3952 m->record = *record;
3953 m->have = m->given = M_MAILBOX|M_RECORD|M_UID;
3954 m->refcount = 1;
3955 }
3956
message_new_from_record(struct mailbox * mailbox,const struct index_record * record)3957 EXPORTED message_t *message_new_from_record(struct mailbox *mailbox,
3958 const struct index_record *record)
3959 {
3960 message_t *m = message_new();
3961 assert(record->uid > 0);
3962 m->mailbox = mailbox;
3963 m->record = *record;
3964 m->have = m->given = M_MAILBOX|M_RECORD|M_UID;
3965 return m;
3966 }
3967
message_set_from_index(struct mailbox * mailbox,const struct index_record * record,uint32_t msgno,uint32_t indexflags,message_t * m)3968 EXPORTED void message_set_from_index(struct mailbox *mailbox,
3969 const struct index_record *record,
3970 uint32_t msgno,
3971 uint32_t indexflags,
3972 message_t *m)
3973 {
3974 assert(m->refcount == 1);
3975 message_yield(m, M_ALL);
3976 memset(m, 0, sizeof(message_t));
3977 assert(record->uid > 0);
3978 m->mailbox = mailbox;
3979 m->record = *record;
3980 m->msgno = msgno;
3981 m->indexflags = indexflags;
3982 m->have = m->given = M_MAILBOX|M_RECORD|M_UID|M_INDEX;
3983 m->refcount = 1;
3984 }
3985
message_new_from_index(struct mailbox * mailbox,const struct index_record * record,uint32_t msgno,uint32_t indexflags)3986 EXPORTED message_t *message_new_from_index(struct mailbox *mailbox,
3987 const struct index_record *record,
3988 uint32_t msgno,
3989 uint32_t indexflags)
3990 {
3991 message_t *m = message_new();
3992 assert(record->uid > 0);
3993 m->mailbox = mailbox;
3994 m->record = *record;
3995 m->msgno = msgno;
3996 m->indexflags = indexflags;
3997 m->have = m->given = M_MAILBOX|M_RECORD|M_UID|M_INDEX;
3998 return m;
3999 }
4000
message_new_from_filename(const char * filename)4001 EXPORTED message_t *message_new_from_filename(const char *filename)
4002 {
4003 message_t *m = message_new();
4004 m->filename = xstrdup(filename);
4005 m->have = m->given = M_FILENAME;
4006 return m;
4007 }
4008
message_ref(message_t * m)4009 EXPORTED message_t *message_ref(message_t *m)
4010 {
4011 m->refcount++;
4012 assert(m->refcount >= 1);
4013 return m;
4014 }
4015
message_unref(message_t ** mp)4016 EXPORTED void message_unref(message_t **mp)
4017 {
4018 message_t *m;
4019
4020 if (!mp || !(m = *mp)) return;
4021 assert(m->refcount >= 1);
4022 if (--m->refcount == 0)
4023 message_free(m);
4024 *mp = NULL;
4025 }
4026
4027 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
4028
4029 /*
4030 * Open or create resources which we need but do not yet have.
4031 */
message_need(const message_t * cm,unsigned int need)4032 static int message_need(const message_t *cm, unsigned int need)
4033 {
4034 #define is_missing(flags) ((need & ~(m->have)) & (flags))
4035 #define found(flags) (m->have |= (flags))
4036 int r = 0;
4037 message_t *m = (message_t *)cm;
4038
4039 if (!is_missing(M_ALL))
4040 return 0; /* easy, we already have it */
4041
4042 if (is_missing(M_MAILBOX)) {
4043 /* We can't get this for ourselves,
4044 * it needs to be passed in by the caller */
4045 return IMAP_NOTFOUND;
4046 }
4047
4048 if (is_missing(M_FILENAME)) {
4049 const char *filename;
4050 r = message_need(m, M_MAILBOX|M_RECORD);
4051 if (r) return r;
4052 filename = mailbox_record_fname(m->mailbox, &m->record);
4053 if (!filename) return IMAP_NOTFOUND;
4054 m->filename = xstrdup(filename);
4055 found(M_FILENAME);
4056 }
4057
4058 if (is_missing(M_RECORD|M_UID)) {
4059 r = message_need(m, M_MAILBOX);
4060 if (r) return r;
4061 r = mailbox_reload_index_record(m->mailbox, &m->record);
4062 if (r) return r;
4063 found(M_RECORD|M_UID);
4064 }
4065
4066 if (is_missing(M_MAP)) {
4067 r = message_need(m, M_FILENAME);
4068 if (r) return r;
4069 r = message_map_file(m, m->filename);
4070 if (r) return r;
4071 found(M_MAP);
4072 }
4073
4074 if (is_missing(M_CACHE)) {
4075 r = message_need(m, M_MAILBOX|M_RECORD);
4076 if (r) return r;
4077 r = mailbox_cacherecord(m->mailbox, &m->record);
4078 if (r) return r;
4079 found(M_CACHE);
4080 }
4081
4082 if (is_missing(M_CACHEBODY)) {
4083 if (message_need(m, M_CACHE) == 0) {
4084 r = message_parse_cbodystructure(m);
4085 if (r) return r;
4086 found(M_CACHEBODY);
4087 }
4088 else
4089 return message_need(m, M_FULLBODY);
4090 }
4091
4092 if (is_missing(M_FULLBODY)) {
4093 r = message_need(m, M_MAP);
4094 if (r) return r;
4095 m->body = (struct body *)xzmalloc(sizeof(struct body));
4096 r = message_parse_mapped(m->map.s, m->map.len, m->body, NULL);
4097 if (r) return r;
4098 found(M_CACHEBODY|M_FULLBODY);
4099 }
4100
4101 /* Check that we got everything we asked for and could get */
4102 assert(!is_missing(M_ALL));
4103
4104 return 0;
4105 #undef found
4106 #undef is_missing
4107 }
4108
4109 /*
4110 * Yield open resources.
4111 */
message_yield(message_t * m,unsigned int yield)4112 static void message_yield(message_t *m, unsigned int yield)
4113 {
4114 /* Can only yield those resources we have. */
4115 yield &= m->have;
4116
4117 /* Do not yield resources we were given at initialisation
4118 * time, they cannot be rebuilt again later. */
4119 yield &= ~m->given;
4120
4121 /* nothing to free for these - they're not constructed
4122 * or have no dynamically allocated memory */
4123 yield &= ~(M_MAILBOX|M_RECORD|M_UID|M_CACHE);
4124
4125 if ((yield & M_MAP)) {
4126 buf_free(&m->map);
4127 m->have &= ~M_MAP;
4128 }
4129
4130 if ((yield & M_BODY)) {
4131 message_free_body(m->body);
4132 free(m->body);
4133 m->body = NULL;
4134 m->have &= ~M_BODY;
4135 }
4136
4137 if ((yield & M_FILENAME)) {
4138 free(m->filename);
4139 m->filename = NULL;
4140 m->have &= ~M_FILENAME;
4141 }
4142
4143 /* Check we yielded everything we could */
4144 assert((yield & m->have) == 0);
4145 }
4146
4147 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
4148
4149 /*
4150 * Parse various information out of the cyrus.cache.
4151 */
4152
4153 /*
4154 * Skip either a single NIL or a balanced possibly-nested list of
4155 * nstrings. Useful for ignoring various constructs from the
4156 * BODYSTRUCTURE cache.
4157 */
skip_nil_or_nstring_list(struct protstream * prot)4158 static int skip_nil_or_nstring_list(struct protstream *prot)
4159 {
4160 int r = IMAP_MAILBOX_BADFORMAT;
4161 int c;
4162 struct buf word = BUF_INITIALIZER;
4163
4164 c = prot_getc(prot);
4165 if (c == EOF)
4166 goto out; /* ran out of data */
4167 if (c == '(') {
4168 /* possibly-nested list of atoms */
4169 int treedepth = 1;
4170 do {
4171 c = prot_getc(prot);
4172 if (c == ' ')
4173 c = prot_getc(prot);
4174 if (c != ')' && c != '(') {
4175 prot_ungetc(c, prot);
4176 c = getnstring(prot, NULL, &word);
4177 #if DEBUG
4178 if (word.len)
4179 fprintf(stderr, "%sskipping string \"%s\" at %d\n",
4180 indent(depth), word.s, treedepth);
4181 #endif
4182 }
4183 if (c == '(')
4184 treedepth++;
4185 else if (c == ')')
4186 treedepth--;
4187 else if (c == ' ')
4188 prot_ungetc(c, prot);
4189 else
4190 goto out;
4191 } while (treedepth);
4192 c = prot_getc(prot);
4193 if (c != ' ') goto out;
4194 r = 0;
4195 }
4196 else {
4197 prot_ungetc(c, prot);
4198 c = getnstring(prot, NULL, &word);
4199 if (c == ' ' && !word.len) {
4200 /* 'NIL' */
4201 #if DEBUG
4202 fprintf(stderr, "%sskipping NIL\n", indent(depth));
4203 #endif
4204 r = 0;
4205 goto out;
4206 }
4207 }
4208 /* else, error */
4209
4210 out:
4211 buf_free(&word);
4212 return r;
4213 }
4214
parse_mime_params(struct protstream * prot,struct param ** prev)4215 static int parse_mime_params(struct protstream *prot, struct param **prev)
4216 {
4217 int c;
4218 struct buf key = BUF_INITIALIZER;
4219 struct buf val = BUF_INITIALIZER;
4220 struct param *param;
4221
4222 c = prot_getc(prot);
4223 if (c != '(') {
4224 /* must be NIL */
4225 if (c != 'N') goto err;
4226 c = prot_getc(prot);
4227 if (c != 'I') goto err;
4228 c = prot_getc(prot);
4229 if (c != 'L') goto err;
4230 return prot_getc(prot);
4231 }
4232
4233 /* otherwise we have a list */
4234 do {
4235 c = getnstring(prot, NULL, &key);
4236 if (c != ' ') goto err;
4237 c = getnstring(prot, NULL, &val);
4238 if (c != ' ' && c != ')') goto err;
4239 param = (struct param *)xzmalloc(sizeof(struct param));
4240 param->attribute = buf_releasenull(&key);
4241 param->value = buf_releasenull(&val);
4242 *prev = param;
4243 prev = ¶m->next;
4244 } while (c == ' ');
4245
4246 return prot_getc(prot);
4247
4248 err:
4249 buf_free(&key);
4250 buf_free(&val);
4251 return EOF;
4252 }
4253
parse_bodystructure_part(struct protstream * prot,struct body * body,const char * part_id)4254 static int parse_bodystructure_part(struct protstream *prot, struct body *body, const char *part_id)
4255 {
4256 int c;
4257 int r = 0;
4258 struct buf buf = BUF_INITIALIZER;
4259
4260 memset(body, 0, sizeof(struct body));
4261
4262 c = prot_getc(prot);
4263 if (c != '(') {
4264 badformat:
4265 r = IMAP_MAILBOX_BADFORMAT;
4266 goto out;
4267 }
4268
4269 c = prot_getc(prot);
4270 prot_ungetc(c, prot);
4271 if (c == '(') {
4272 while (c == '(') {
4273 body->numparts++;
4274 body->subpart = (struct body *)xrealloc((char *)body->subpart,
4275 body->numparts*sizeof(struct body));
4276
4277 buf_reset(&buf);
4278 if (part_id) buf_printf(&buf, "%s.", part_id);
4279 buf_printf(&buf, "%d", body->numparts);
4280 char *part_id = buf_release(&buf);
4281 struct body *subbody = &body->subpart[body->numparts-1];
4282 r = parse_bodystructure_part(prot, subbody, part_id);
4283 subbody->part_id = part_id;
4284 if (r) goto out;
4285
4286 c = prot_getc(prot);
4287 prot_ungetc(c, prot);
4288 }
4289
4290 c = prot_getc(prot);
4291 if (c != ' ') goto badformat;
4292
4293 body->type = xstrdup("MULTIPART");
4294 }
4295 else {
4296 /* parse mime-type */
4297 c = getnstring(prot, NULL, &buf);
4298 if (c != ' ') goto badformat;
4299
4300 body->type = buf_releasenull(&buf);
4301 }
4302
4303 /* parse mime-subtype */
4304 c = getnstring(prot, NULL, &buf);
4305 if (c != ' ') goto badformat;
4306 body->subtype = buf_releasenull(&buf);
4307
4308 /* parse mime-params */
4309 c = parse_mime_params(prot, &body->params);
4310 if (c != ' ') goto badformat;
4311
4312 if (strcmp(body->type, "MULTIPART")) {
4313 /* msgid */
4314 c = getnstring(prot, NULL, &buf);
4315 if (c != ' ') goto badformat;
4316 body->message_id = buf_releasenull(&buf);
4317
4318 /* description */
4319 c = getnstring(prot, NULL, &buf);
4320 if (c != ' ') goto badformat;
4321 body->description = buf_releasenull(&buf);
4322
4323 /* encoding */
4324 c = getnstring(prot, NULL, &buf);
4325 if (c != ' ') goto badformat;
4326 body->encoding = buf_releasenull(&buf);
4327
4328 /* content-size */
4329 c = getword(prot, &buf);
4330 if (c != ' ') goto badformat;
4331 body->content_size = atoi(buf_cstring(&buf));
4332
4333 if (!strcmpsafe(body->type, "TEXT")) {
4334 /* parse content-lines */
4335 c = getword(prot, &buf);
4336 if (c != ' ') goto badformat;
4337 body->content_lines = atoi(buf_cstring(&buf));
4338 }
4339
4340 else if (!strcmpsafe(body->type, "MESSAGE") &&
4341 !strcmpsafe(body->subtype, "RFC822")) {
4342 body->numparts = 1;
4343 body->subpart = xzmalloc(sizeof(struct body));
4344
4345 /* skip envelope */
4346 r = skip_nil_or_nstring_list(prot);
4347 if (r) goto out;
4348
4349 /* process body */
4350 r = parse_bodystructure_part(prot, body->subpart, part_id);
4351 if (r) goto out;
4352
4353 /* skip trailing space (parse_bs_part doesn't eat it) */
4354 c = prot_getc(prot);
4355 if (c != ' ') goto badformat;
4356
4357 /* parse content-lines */
4358 c = getword(prot, &buf);
4359 if (c != ' ') goto badformat;
4360 body->content_lines = atoi(buf_cstring(&buf));
4361 }
4362
4363 /* parse md5sum */
4364 c = getnstring(prot, NULL, &buf);
4365 if (c != ' ') goto badformat;
4366 body->md5 = buf_releasenull(&buf);
4367 }
4368
4369 /* skips disposition-and-params */
4370 r = skip_nil_or_nstring_list(prot);
4371 if (r) goto out;
4372
4373 /* parse languages */ /* TODO */
4374 r = skip_nil_or_nstring_list(prot);
4375 if (r) goto out;
4376
4377 /* location */
4378 c = getnstring(prot, NULL, &buf);
4379 if (c != ')') goto badformat; /* final field */
4380 body->location = buf_releasenull(&buf);
4381
4382 r = 0;
4383 out:
4384 buf_free(&buf);
4385 return r;
4386 }
4387
parse_bodystructure_sections(const char ** cachestrp,const char * cacheend,struct body * body,uint32_t cache_version,const char * part_id)4388 static int parse_bodystructure_sections(const char **cachestrp, const char *cacheend,
4389 struct body *body, uint32_t cache_version,
4390 const char *part_id)
4391 {
4392 struct body *this;
4393 int nsubparts;
4394 int part;
4395 uint32_t cte;
4396 struct buf buf = BUF_INITIALIZER;
4397 int r = 0;
4398
4399 if (*cachestrp + 4 > cacheend) {
4400 r = IMAP_MAILBOX_BADFORMAT;
4401 goto done;
4402 }
4403
4404 nsubparts = CACHE_ITEM_BIT32(*cachestrp);
4405 *cachestrp += 4;
4406
4407 /* XXX - this size needs increasing for charset sizes and sha1s depending on version,
4408 * it won't crash, but it may overrun while reading */
4409 if (*cachestrp + 4*5*nsubparts > cacheend) {
4410 r = IMAP_MAILBOX_BADFORMAT;
4411 goto done;
4412 }
4413
4414 if (strcmp(body->type, "MESSAGE") == 0
4415 && strcmp(body->subtype, "RFC822") == 0) {
4416
4417 if (strcmp(body->subpart->type, "MULTIPART") == 0) {
4418
4419 /*
4420 * Part 0 of a message/rfc822 is the message header/text.
4421 * Nested parts of a message/rfc822 containing a multipart
4422 * are the sub-parts of the multipart.
4423 */
4424 if (body->subpart->numparts + 1 != nsubparts) {
4425 r = IMAP_MAILBOX_BADFORMAT;
4426 goto done;
4427 }
4428
4429 body->subpart->header_offset = CACHE_ITEM_BIT32(*cachestrp+0*4);
4430 body->subpart->header_size = CACHE_ITEM_BIT32(*cachestrp+1*4);
4431 body->subpart->content_offset = CACHE_ITEM_BIT32(*cachestrp+2*4);
4432 body->subpart->content_size = CACHE_ITEM_BIT32(*cachestrp+3*4);
4433 // skip cte
4434 *cachestrp += 5*4;
4435
4436 if (cache_version >= 5)
4437 *cachestrp = message_guid_import(&body->subpart->content_guid, *cachestrp);
4438
4439 if (cache_version >= 8) {
4440 body->subpart->decoded_content_size = CACHE_ITEM_BIT32(*cachestrp);
4441 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4442 }
4443
4444 if (cache_version >= 9) {
4445 body->subpart->content_lines = CACHE_ITEM_BIT32(*cachestrp);
4446 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4447 }
4448
4449 for (part = 0; part < body->subpart->numparts; part++) {
4450 this = &body->subpart->subpart[part];
4451 this->header_offset = CACHE_ITEM_BIT32(*cachestrp+0*4);
4452 this->header_size = CACHE_ITEM_BIT32(*cachestrp+1*4);
4453 this->content_offset = CACHE_ITEM_BIT32(*cachestrp+2*4);
4454 this->content_size = CACHE_ITEM_BIT32(*cachestrp+3*4);
4455 cte = CACHE_ITEM_BIT32(*cachestrp+4*4);
4456 *cachestrp += 5*4;
4457
4458 /* XXX CACHE_MINOR_VERSION 4 replaces numeric charset
4459 * identifiers with variable-length strings. Remove
4460 * this conditional once cache versions <= 3 are
4461 * deprecated */
4462 if (cache_version >= 4)
4463 *cachestrp += (cte >> 16) & 0xffff;
4464
4465 /* CACHE_MINOR_VERSION 5 adds a sha1 after the charset */
4466 if (cache_version >= 5)
4467 *cachestrp = message_guid_import(&this->content_guid, *cachestrp);
4468
4469 /* CACHE_MINOR_VERSION 8 adds the decoded content size after sha1 */
4470 if (cache_version >= 8) {
4471 this->decoded_content_size = CACHE_ITEM_BIT32(*cachestrp);
4472 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4473 }
4474
4475 /* CACHE_MINOR_VERSION 9 adds the number of content lines after the decoded size */
4476 if (cache_version >= 9) {
4477 this->content_lines = CACHE_ITEM_BIT32(*cachestrp);
4478 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4479 }
4480 }
4481
4482 /* and parse subparts */
4483 for (part = 0; part < body->subpart->numparts; part++) {
4484 this = &body->subpart->subpart[part];
4485 buf_reset(&buf);
4486 if (part_id) buf_printf(&buf, "%s.", part_id);
4487 buf_printf(&buf, "%d", part + 1);
4488 if (parse_bodystructure_sections(cachestrp, cacheend, this, cache_version, buf_cstring(&buf))) {
4489 r = IMAP_MAILBOX_BADFORMAT;
4490 goto done;
4491 }
4492 }
4493 }
4494 else {
4495 /*
4496 * Part 0 of a message/rfc822 is the message header/text.
4497 * Part 1 of a message/rfc822 containing a non-multipart
4498 * is the message body.
4499 */
4500
4501 if (2 != nsubparts) {
4502 r = IMAP_MAILBOX_BADFORMAT;
4503 goto done;
4504 }
4505
4506 /* data is the same in body, just grab the first one */
4507 body->subpart->header_offset = CACHE_ITEM_BIT32(*cachestrp+0*4);
4508 body->subpart->header_size = CACHE_ITEM_BIT32(*cachestrp+1*4);
4509 body->subpart->content_offset = CACHE_ITEM_BIT32(*cachestrp+2*4);
4510 body->subpart->content_size = CACHE_ITEM_BIT32(*cachestrp+3*4);
4511 // skip cte
4512 *cachestrp += 5*4;
4513 if (cache_version >= 5)
4514 *cachestrp += MESSAGE_GUID_SIZE;
4515 if (cache_version >= 8)
4516 *cachestrp += 1*4;
4517 if (cache_version >= 9)
4518 *cachestrp += 1*4;
4519 *cachestrp += 4*4;
4520
4521 if (strcmp(body->subpart->type, "MULTIPART") == 0) {
4522 /* Treat 0-part multipart as 0-length text */
4523 *cachestrp += 1*4;
4524 }
4525 else {
4526 /* Skip charset/encoding identifiers. */
4527 cte = CACHE_ITEM_BIT32(*cachestrp);
4528 *cachestrp += 1*4;
4529 /* XXX CACHE_MINOR_VERSION 4 replaces numeric charset
4530 * identifiers with variable-length strings. Remove
4531 * this conditional once cache versions <= 3 are
4532 * deprecated */
4533 if (cache_version >= 4)
4534 *cachestrp += (cte >> 16) & 0xffff;
4535
4536 if (!body->subpart->part_id) {
4537 buf_reset(&buf);
4538 if (part_id) buf_printf(&buf, "%s.", part_id);
4539 buf_printf(&buf, "%d", 1);
4540 body->subpart->part_id = buf_release(&buf);
4541 }
4542 }
4543 /* CACHE_MINOR_VERSION 5 adds a sha1 after the charset */
4544 if (cache_version >= 5)
4545 *cachestrp = message_guid_import(&body->subpart->content_guid, *cachestrp);
4546
4547 if (cache_version >= 8) {
4548 body->subpart->decoded_content_size = CACHE_ITEM_BIT32(*cachestrp);
4549 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4550 }
4551
4552 if (cache_version >= 9) {
4553 body->subpart->content_lines = CACHE_ITEM_BIT32(*cachestrp);
4554 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4555 }
4556
4557 /* and parse subpart */
4558 if (parse_bodystructure_sections(cachestrp, cacheend, body->subpart, cache_version, body->part_id)) {
4559 r = IMAP_MAILBOX_BADFORMAT;
4560 goto done;
4561 }
4562 }
4563 }
4564 else if (body->numparts) {
4565 /*
4566 * Cannot fetch part 0 of a multipart.
4567 * Nested parts of a multipart are the sub-parts.
4568 */
4569 if (body->numparts + 1 != nsubparts) {
4570 r = IMAP_MAILBOX_BADFORMAT;
4571 goto done;
4572 }
4573 *cachestrp += 5*4;
4574 if (cache_version >= 5)
4575 *cachestrp += MESSAGE_GUID_SIZE;
4576 if (cache_version >= 8)
4577 *cachestrp += 4;
4578 if (cache_version >= 9)
4579 *cachestrp += 4;
4580 for (part = 0; part < body->numparts; part++) {
4581 this = &body->subpart[part];
4582 this->header_offset = CACHE_ITEM_BIT32(*cachestrp+0*4);
4583 this->header_size = CACHE_ITEM_BIT32(*cachestrp+1*4);
4584 this->content_offset = CACHE_ITEM_BIT32(*cachestrp+2*4);
4585 this->content_size = CACHE_ITEM_BIT32(*cachestrp+3*4);
4586 cte = CACHE_ITEM_BIT32(*cachestrp+4*4);
4587 *cachestrp += 5*4;
4588
4589 if (cache_version >= 4)
4590 *cachestrp += (cte >> 16) & 0xffff;
4591
4592 if (cache_version >= 5)
4593 *cachestrp = message_guid_import(&this->content_guid, *cachestrp);
4594
4595 if (cache_version >= 8) {
4596 this->decoded_content_size = CACHE_ITEM_BIT32(*cachestrp);
4597 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4598 }
4599
4600 if (cache_version >= 9) {
4601 this->content_lines = CACHE_ITEM_BIT32(*cachestrp);
4602 *cachestrp += CACHE_ITEM_SIZE_SKIP;
4603 }
4604 }
4605
4606 for (part = 0; part < body->numparts; part++) {
4607 this = &body->subpart[part];
4608 buf_reset(&buf);
4609 if (part_id) buf_printf(&buf, "%s.", part_id);
4610 buf_printf(&buf, "%d", part + 1);
4611 if (parse_bodystructure_sections(cachestrp, cacheend, this, cache_version, buf_cstring(&buf))) {
4612 r = IMAP_MAILBOX_BADFORMAT;
4613 goto done;
4614 }
4615 }
4616 }
4617 else {
4618 /*
4619 * Leaf section--no part 0 or nested parts
4620 */
4621 if (nsubparts != 0) {
4622 r = IMAP_MAILBOX_BADFORMAT;
4623 goto done;
4624 }
4625 if (!body->part_id)
4626 body->part_id = xstrdupnull(part_id);
4627 }
4628
4629 done:
4630 buf_free(&buf);
4631 return r;
4632 }
4633
message_parse_cbodystructure(message_t * m)4634 static int message_parse_cbodystructure(message_t *m)
4635 {
4636 struct protstream *prot = NULL;
4637 const char *cachestr = cacheitem_base(&m->record, CACHE_SECTION);
4638 const char *cacheend = cachestr + cacheitem_size(&m->record, CACHE_SECTION);
4639 struct body toplevel;
4640 int r;
4641
4642 /* We're reading the cache - double check we have it */
4643 assert(m->have & M_CACHE);
4644
4645 prot = prot_readmap(cacheitem_base(&m->record, CACHE_BODYSTRUCTURE),
4646 cacheitem_size(&m->record, CACHE_BODYSTRUCTURE));
4647 if (!prot)
4648 return IMAP_MAILBOX_BADFORMAT;
4649 prot_setisclient(prot, 1); /* don't crash parsing literals */
4650
4651 m->body = xzmalloc(sizeof(struct body));
4652 r = parse_bodystructure_part(prot, m->body, NULL);
4653 if (r) syslog(LOG_ERR, "IOERROR: parsing body structure for %s %u (%.*s)",
4654 m->mailbox->name, m->record.uid,
4655 (int)cacheitem_size(&m->record, CACHE_BODYSTRUCTURE),
4656 cacheitem_base(&m->record, CACHE_BODYSTRUCTURE));
4657 if (r) goto done;
4658
4659 memset(&toplevel, 0, sizeof(struct body));
4660 toplevel.type = "MESSAGE";
4661 toplevel.subtype = "RFC822";
4662 toplevel.subpart = m->body;
4663
4664 r = parse_bodystructure_sections(&cachestr, cacheend, &toplevel, m->record.cache_version, NULL);
4665 if (r) syslog(LOG_ERR, "IOERROR: parsing section structure for %s %u (%.*s)",
4666 m->mailbox->name, m->record.uid,
4667 (int)cacheitem_size(&m->record, CACHE_BODYSTRUCTURE),
4668 cacheitem_base(&m->record, CACHE_BODYSTRUCTURE));
4669
4670 done:
4671 prot_free(prot);
4672
4673 return r;
4674 }
4675
4676 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
4677
message_map_file(message_t * m,const char * fname)4678 static int message_map_file(message_t *m, const char *fname)
4679 {
4680 int fd;
4681 struct stat sbuf;
4682
4683 fd = open(fname, O_RDONLY, 0666);
4684 if (fd == -1) return errno;
4685
4686 if (fstat(fd, &sbuf) == -1) {
4687 syslog(LOG_ERR, "IOERROR: fstat on %s: %m", fname);
4688 fatal("can't fstat message file", EX_OSFILE);
4689 }
4690 if (!S_ISREG(sbuf.st_mode)) {
4691 close(fd);
4692 return EINVAL;
4693 }
4694 buf_free(&m->map);
4695 buf_refresh_mmap(&m->map, /*onceonly*/1, fd, fname, sbuf.st_size,
4696 m->mailbox ? m->mailbox->name : NULL);
4697 close(fd);
4698
4699 return 0;
4700 }
4701
4702 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
4703
body_get_leaf_types(struct body * body,strarray_t * types)4704 static void body_get_leaf_types(struct body *body, strarray_t *types)
4705 {
4706 int i;
4707
4708 if (strcmpsafe(body->type, "MULTIPART") &&
4709 strcmpsafe(body->type, "MESSAGE")) {
4710 strarray_append(types, body->type);
4711 strarray_append(types, body->subtype);
4712 }
4713
4714 for (i = 0; i < body->numparts; i++) {
4715 body_get_leaf_types(&body->subpart[i], types);
4716 }
4717 }
4718
body_foreach_section(struct body * body,struct message * message,int (* proc)(int isbody,charset_t charset,int encoding,const char * type,const char * subtype,const struct param * type_params,const char * disposition,const struct param * disposition_params,const struct message_guid * content_guid,const char * part,struct buf * data,void * rock),void * rock)4719 static int body_foreach_section(struct body *body, struct message *message,
4720 int (*proc)(int isbody, charset_t charset,
4721 int encoding,
4722 const char *type, const char *subtype,
4723 const struct param *type_params,
4724 const char *disposition,
4725 const struct param *disposition_params,
4726 const struct message_guid *content_guid,
4727 const char *part,
4728 struct buf *data, void *rock),
4729 void *rock)
4730 {
4731 struct buf data = BUF_INITIALIZER;
4732 int i, r;
4733
4734 if (body->header_size) {
4735 struct body *tmpbody = NULL;
4736 const char *disposition = body->disposition;
4737 struct param *disposition_params = body->disposition_params;
4738
4739 if (!disposition) {
4740 /* XXX hack: body can either be read from the binary cache body
4741 * or bodystructure, but either misses the contents of the other */
4742 tmpbody = xzmalloc(sizeof(struct body));
4743 strarray_t boundaries = STRARRAY_INITIALIZER;
4744 struct msg msg;
4745
4746 msg.base = message->map.s + body->header_offset;
4747 msg.len = body->header_size;
4748 msg.offset = 0;
4749 msg.encode = 0;
4750 message_parse_headers(&msg, tmpbody, "text/plain", &boundaries, NULL);
4751
4752 disposition = tmpbody->disposition;
4753 disposition_params = tmpbody->disposition_params;
4754 }
4755
4756 buf_init_ro(&data, message->map.s + body->header_offset, body->header_size);
4757 r = proc(/*isbody*/0, CHARSET_UNKNOWN_CHARSET, 0, body->type, body->subtype,
4758 body->params, disposition, disposition_params, &body->content_guid,
4759 body->part_id, &data, rock);
4760 buf_free(&data);
4761
4762 if (tmpbody) {
4763 message_free_body(tmpbody);
4764 free(tmpbody);
4765 }
4766
4767 if (r) return r;
4768 }
4769
4770 if (!strcmpsafe(body->type, "TEXT")) {
4771 int encoding;
4772 charset_t charset = CHARSET_UNKNOWN_CHARSET;
4773 message_parse_charset(body, &encoding, &charset);
4774 buf_init_ro(&data, message->map.s + body->content_offset, body->content_size);
4775 r = proc(/*isbody*/1, charset, encoding, body->type, body->subtype,
4776 body->params, NULL, NULL, &body->content_guid, body->part_id,
4777 &data, rock);
4778 buf_free(&data);
4779 charset_free(&charset);
4780 if (r) return r;
4781 } else {
4782 buf_init_ro(&data, message->map.s + body->content_offset, body->content_size);
4783 r = proc(/*isbody*/1, CHARSET_UNKNOWN_CHARSET, encoding_lookupname(body->encoding),
4784 body->type, body->subtype, body->params, NULL, NULL,
4785 &body->content_guid, body->part_id, &data, rock);
4786 buf_free(&data);
4787 if (r) return r;
4788 }
4789
4790 for (i = 0; i < body->numparts; i++) {
4791 r = body_foreach_section(&body->subpart[i], message, proc, rock);
4792 if (r) return r;
4793 }
4794
4795 return r;
4796 }
4797
4798
4799 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
4800
4801 /*
4802 * Iterate 'proc' over all the MIME header sections and body sections of
4803 * type TEXT, in the message 'm', preorder. The 'proc' is called with
4804 * 'partno' equal to zero for header sections, non-zero for body
4805 * sections. If 'proc' returns non-zero, the iteration finishes early
4806 * and the return value of 'proc' is returned. Otherwise returns 0.
4807 */
message_foreach_section(message_t * m,int (* proc)(int isbody,charset_t charset,int encoding,const char * type,const char * subtype,const struct param * type_params,const char * disposition,const struct param * disposition_params,const struct message_guid * content_guid,const char * part,struct buf * data,void * rock),void * rock)4808 EXPORTED int message_foreach_section(message_t *m,
4809 int (*proc)(int isbody, charset_t charset, int encoding,
4810 const char *type, const char *subtype,
4811 const struct param *type_params,
4812 const char *disposition,
4813 const struct param *disposition_params,
4814 const struct message_guid *content_guid,
4815 const char *part,
4816 struct buf *data,
4817 void *rock),
4818 void *rock)
4819 {
4820 int r = message_need(m, M_CACHEBODY|M_MAP);
4821 if (r) return r;
4822 return body_foreach_section(m->body, m, proc, rock);
4823 }
4824
4825 /*
4826 * Get the MIME content types of all leaf sections, i.e. sections whose
4827 * type is not multipart or message. Strings are added to the array in
4828 * pairs, type first then subtype.
4829 */
message_get_leaf_types(message_t * m,strarray_t * types)4830 EXPORTED int message_get_leaf_types(message_t *m, strarray_t *types)
4831 {
4832 int r = message_need(m, M_CACHEBODY);
4833 if (r) return r;
4834 body_get_leaf_types(m->body, types);
4835 return 0;
4836 }
4837
4838 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
4839
message_get_bcc(message_t * m,struct buf * buf)4840 EXPORTED int message_get_bcc(message_t *m, struct buf *buf)
4841 {
4842 return message_get_field(m, "bcc", MESSAGE_RAW, buf);
4843 }
4844
message_get_deliveredto(message_t * m,struct buf * buf)4845 EXPORTED int message_get_deliveredto(message_t *m, struct buf *buf)
4846 {
4847 int r = message_get_field(m, "X-Original-Delivered-To", MESSAGE_RAW, buf);
4848 if (!r && buf_len(buf) == 0) {
4849 r = message_get_field(m, "X-Delivered-To", MESSAGE_RAW, buf);
4850 }
4851 return r;
4852 }
4853
message_get_cc(message_t * m,struct buf * buf)4854 EXPORTED int message_get_cc(message_t *m, struct buf *buf)
4855 {
4856 return message_get_field(m, "cc", MESSAGE_RAW, buf);
4857 }
4858
message_get_to(message_t * m,struct buf * buf)4859 EXPORTED int message_get_to(message_t *m, struct buf *buf)
4860 {
4861 return message_get_field(m, "to", MESSAGE_RAW, buf);
4862 }
4863
message_get_from(message_t * m,struct buf * buf)4864 EXPORTED int message_get_from(message_t *m, struct buf *buf)
4865 {
4866 return message_get_field(m, "from", MESSAGE_RAW, buf);
4867 }
4868
message_get_listid(message_t * m,struct buf * buf)4869 EXPORTED int message_get_listid(message_t *m, struct buf *buf)
4870 {
4871 return message_get_field(m, "list-id", MESSAGE_RAW, buf);
4872 }
4873
message_get_messageid(message_t * m,struct buf * buf)4874 EXPORTED int message_get_messageid(message_t *m, struct buf *buf)
4875 {
4876 return message_get_field(m, "message-id", MESSAGE_RAW, buf);
4877 }
4878
message_get_subject(message_t * m,struct buf * buf)4879 EXPORTED int message_get_subject(message_t *m, struct buf *buf)
4880 {
4881 return message_get_field(m, "subject", MESSAGE_RAW, buf);
4882 }
4883
message_get_mailinglist(message_t * m,struct buf * buf)4884 EXPORTED int message_get_mailinglist(message_t *m, struct buf *buf)
4885 {
4886 return message_get_field(m, "mailing-list", MESSAGE_RAW, buf);
4887 }
4888
message_get_priority(message_t * m,struct buf * buf)4889 EXPORTED int message_get_priority(message_t *m, struct buf *buf)
4890 {
4891 /* Only returns priority value "1" or none. */
4892 int r = message_get_field(m, "X-Priority", MESSAGE_RAW, buf);
4893 buf_trim(buf);
4894 if (!r && !strcmp(buf_cstring(buf), "1")) {
4895 return 0;
4896 }
4897 r = message_get_field(m, "Importance", MESSAGE_RAW, buf);
4898 buf_trim(buf);
4899 if (!r && !strcmp(buf_cstring(buf), "high")) {
4900 buf_setcstr(buf, "1");
4901 return 0;
4902 }
4903 buf_reset(buf);
4904 return r;
4905 }
4906
msg_record(const message_t * m)4907 EXPORTED const struct index_record *msg_record(const message_t *m)
4908 {
4909 assert(!message_need(m, M_RECORD))
4910 return &m->record;
4911 }
4912
msg_mailbox(const message_t * m)4913 EXPORTED struct mailbox *msg_mailbox(const message_t *m)
4914 {
4915 assert(!message_need(m, M_MAILBOX))
4916 return m->mailbox;
4917 }
4918
message_get_size(message_t * m,uint32_t * sizep)4919 EXPORTED int message_get_size(message_t *m, uint32_t *sizep)
4920 {
4921 int r = message_need(m, M_RECORD);
4922 if (!r) {
4923 *sizep = m->record.size;
4924 return 0;
4925 }
4926 r = message_need(m, M_MAP);
4927 if (!r) {
4928 *sizep = buf_len(&m->map);
4929 }
4930 return r;
4931 }
4932
msg_size(const message_t * m)4933 EXPORTED uint32_t msg_size(const message_t *m)
4934 {
4935 assert(!message_need(m, M_RECORD))
4936 return m->record.size;
4937 }
4938
message_get_uid(message_t * m,uint32_t * uidp)4939 EXPORTED int message_get_uid(message_t *m, uint32_t *uidp)
4940 {
4941 int r = message_need(m, M_RECORD);
4942 if (r) return r;
4943 *uidp = m->record.uid;
4944 return 0;
4945 }
4946
msg_uid(const message_t * m)4947 EXPORTED uint32_t msg_uid(const message_t *m)
4948 {
4949 assert(!message_need(m, M_RECORD))
4950 return m->record.uid;
4951 }
4952
message_get_cid(message_t * m,conversation_id_t * cidp)4953 EXPORTED int message_get_cid(message_t *m, conversation_id_t *cidp)
4954 {
4955 int r = message_need(m, M_RECORD);
4956 if (r) return r;
4957 *cidp = m->record.cid;
4958 return 0;
4959 }
4960
msg_cid(const message_t * m)4961 EXPORTED conversation_id_t msg_cid(const message_t *m)
4962 {
4963 assert(!message_need(m, M_RECORD))
4964 return m->record.cid;
4965 }
4966
message_get_modseq(message_t * m,modseq_t * modseqp)4967 EXPORTED int message_get_modseq(message_t *m, modseq_t *modseqp)
4968 {
4969 int r = message_need(m, M_RECORD);
4970 if (r) return r;
4971 *modseqp = m->record.modseq;
4972 return 0;
4973 }
4974
msg_modseq(const message_t * m)4975 EXPORTED modseq_t msg_modseq(const message_t *m)
4976 {
4977 assert(!message_need(m, M_RECORD))
4978 return m->record.modseq;
4979 }
4980
message_get_msgno(message_t * m,uint32_t * msgnop)4981 EXPORTED int message_get_msgno(message_t *m, uint32_t *msgnop)
4982 {
4983 int r = message_need(m, M_INDEX);
4984 if (r) return r;
4985 *msgnop = m->msgno;
4986 return 0;
4987 }
4988
msg_msgno(const message_t * m)4989 EXPORTED uint32_t msg_msgno(const message_t *m)
4990 {
4991 assert(!message_need(m, M_INDEX))
4992 return m->msgno;
4993 }
4994
message_get_guid(message_t * m,const struct message_guid ** guidp)4995 EXPORTED int message_get_guid(message_t *m, const struct message_guid **guidp)
4996 {
4997 int r = message_need(m, M_RECORD);
4998 if (!r) {
4999 *guidp = &m->record.guid;
5000 return 0;
5001 }
5002 if (message_guid_isnull(&m->guid)) {
5003 r = message_need(m, M_MAP);
5004 if (r) return r;
5005 message_guid_generate(&m->guid, buf_base(&m->map), buf_len(&m->map));
5006 }
5007 *guidp = &m->guid;
5008 return 0;
5009 }
5010
msg_guid(const message_t * m)5011 EXPORTED const struct message_guid *msg_guid(const message_t *m)
5012 {
5013 assert(!message_need(m, M_RECORD))
5014 return &m->record.guid;
5015 }
5016
message_get_userflags(message_t * m,uint32_t * flagsp)5017 EXPORTED int message_get_userflags(message_t *m, uint32_t *flagsp)
5018 {
5019 int r = message_need(m, M_RECORD);
5020 int i;
5021 if (r) return r;
5022 for (i = 0; i < MAX_USER_FLAGS/32; i++)
5023 flagsp[i] = m->record.user_flags[i];
5024 return 0;
5025 }
5026
message_get_systemflags(message_t * m,uint32_t * flagsp)5027 EXPORTED int message_get_systemflags(message_t *m, uint32_t *flagsp)
5028 {
5029 int r = message_need(m, M_RECORD);
5030 if (r) return r;
5031 *flagsp = m->record.system_flags;
5032 return 0;
5033 }
5034
message_get_internalflags(message_t * m,uint32_t * flagsp)5035 EXPORTED int message_get_internalflags(message_t *m, uint32_t *flagsp)
5036 {
5037 int r = message_need(m, M_RECORD);
5038 if (r) return r;
5039 *flagsp = m->record.internal_flags;
5040 return 0;
5041 }
5042
message_get_indexflags(message_t * m,uint32_t * flagsp)5043 EXPORTED int message_get_indexflags(message_t *m, uint32_t *flagsp)
5044 {
5045 int r = message_need(m, M_INDEX);
5046 if (r) return r;
5047 *flagsp = m->indexflags;
5048 return 0;
5049 }
5050
message_get_savedate(message_t * m,time_t * datep)5051 EXPORTED int message_get_savedate(message_t *m, time_t *datep)
5052 {
5053 int r = message_need(m, M_RECORD);
5054 if (r) return r;
5055 *datep = m->record.savedate;
5056 if (!*datep) *datep = m->record.internaldate;
5057 return 0;
5058 }
5059
message_get_indexversion(message_t * m,uint32_t * versionp)5060 EXPORTED int message_get_indexversion(message_t *m, uint32_t *versionp)
5061 {
5062 int r = message_need(m, M_MAILBOX);
5063 if (r) return r;
5064 *versionp = m->mailbox->i.minor_version;
5065 return 0;
5066 }
5067
message_get_sentdate(message_t * m,time_t * datep)5068 EXPORTED int message_get_sentdate(message_t *m, time_t *datep)
5069 {
5070 int r = message_need(m, M_RECORD);
5071 if (r) return r;
5072 *datep = m->record.sentdate;
5073 return 0;
5074 }
5075
message_get_gmtime(message_t * m,time_t * tp)5076 EXPORTED int message_get_gmtime(message_t *m, time_t *tp)
5077 {
5078 int r = message_need(m, M_RECORD);
5079 if (r) return r;
5080 *tp = m->record.gmtime;
5081 return 0;
5082 }
5083
message_get_internaldate(message_t * m,time_t * datep)5084 EXPORTED int message_get_internaldate(message_t *m, time_t *datep)
5085 {
5086 int r = message_need(m, M_RECORD);
5087 if (r) return r;
5088 *datep = m->record.internaldate;
5089 return 0;
5090 }
5091
message_get_fname(message_t * m,const char ** fnamep)5092 EXPORTED int message_get_fname(message_t *m, const char **fnamep)
5093 {
5094 int r = message_need(m, M_FILENAME);
5095 if (r) return r;
5096 *fnamep = m->filename;
5097 return 0;
5098 }
5099
5100 /* XXX despite the name, this actually gives back ALL the values of the
5101 * XXX named header, unless flags contains MESSAGE_LAST
5102 */
extract_one(struct buf * buf,const char * name,int flags,int has_name,int isutf8,struct buf * raw)5103 static void extract_one(struct buf *buf,
5104 const char *name,
5105 int flags,
5106 int has_name,
5107 int isutf8,
5108 struct buf *raw)
5109 {
5110 char *p = NULL;
5111
5112 if (raw->len && (flags & MESSAGE_LAST)) {
5113 /* Skip all but the last header value */
5114 const char *q = raw->s;
5115 const char *last = raw->s;
5116 while ((p = strnchr(q, '\r', raw->s + raw->len - q))) {
5117 if (p >= raw->s + raw->len - 2)
5118 break;
5119 if (*(p+1) == '\n' && *(p+2) && !isspace(*(p+2)))
5120 last = p + 2;
5121 q = p + 1;
5122 }
5123 if (last != raw->s)
5124 buf_remove(raw, 0, last - raw->s);
5125 p = NULL;
5126 }
5127
5128 if (has_name && !(flags & MESSAGE_FIELDNAME)) {
5129 /* remove the fieldname and colon */
5130 int pos = buf_findchar(raw, 0, ':');
5131 assert(pos > 0);
5132 buf_remove(raw, 0, pos+1);
5133 }
5134 else if (!has_name && (flags & MESSAGE_FIELDNAME)) {
5135 /* insert a fieldname and colon */
5136 buf_insertcstr(raw, 0, ":");
5137 buf_insertcstr(raw, 0, name);
5138 }
5139
5140 switch (flags & _MESSAGE_FORMAT_MASK) {
5141 case MESSAGE_RAW:
5142 /* Logically, we're appending to the resulting buffer.
5143 * However if the buf is empty we can save a memory copy
5144 * by setting it up as a CoW buffer. This means that
5145 * the caller will need to call buf_cstring() if they
5146 * need a C string. */
5147 if (!raw->alloc)
5148 buf_cowappendmap(buf, raw->s, raw->len);
5149 else
5150 buf_append(buf, raw);
5151 break;
5152 case MESSAGE_DECODED:
5153 /* XXX - this is also broken with utf8ness, but the only caller protects agains the fields
5154 * that could be utf8 (search_header) - so it doesn't matter */
5155 p = charset_parse_mimeheader(buf_cstring(raw), charset_flags);
5156 buf_appendcstr(buf, p);
5157 break;
5158 case MESSAGE_SNIPPET:
5159 if (isutf8) {
5160 charset_t utf8 = charset_lookupname("utf-8");
5161 p = charset_convert(buf_cstring(raw), utf8, charset_snippet_flags);
5162 charset_free(&utf8);
5163 }
5164 else {
5165 p = charset_decode_mimeheader(buf_cstring(raw), charset_snippet_flags);
5166 }
5167 buf_appendcstr(buf, p);
5168 break;
5169 case MESSAGE_SEARCH:
5170 /* TODO: need a variant of decode_mimeheader() which
5171 * takes two struct buf* and a search flag */
5172 if (isutf8) {
5173 charset_t utf8 = charset_lookupname("utf-8");
5174 p = charset_convert(buf_cstring(raw), utf8, charset_flags);
5175 charset_free(&utf8);
5176 }
5177 else {
5178 p = charset_decode_mimeheader(buf_cstring(raw), charset_flags);
5179 }
5180 buf_appendcstr(buf, p);
5181 break;
5182 }
5183
5184 if (flags & MESSAGE_TRIM)
5185 buf_trim(buf);
5186
5187 free(p);
5188 }
5189
message_get_spamscore(message_t * m,uint32_t * valp)5190 EXPORTED int message_get_spamscore(message_t *m, uint32_t *valp)
5191 {
5192 struct buf buf = BUF_INITIALIZER;
5193 int r = message_get_field(m, "X-Spam-score", MESSAGE_RAW, &buf);
5194 *valp = r ? 0 : (int)((atof(buf_cstring(&buf)) * 100) + 0.5);
5195 buf_free(&buf);
5196 return r;
5197 }
5198
message_get_field(message_t * m,const char * hdr,int flags,struct buf * buf)5199 EXPORTED int message_get_field(message_t *m, const char *hdr, int flags, struct buf *buf)
5200 {
5201 strarray_t want = STRARRAY_INITIALIZER;
5202 struct buf raw = BUF_INITIALIZER;
5203 int hasname = 1;
5204 int isutf8 = 0;
5205
5206 if (!strcasecmp(hdr, "rawheaders")) {
5207 int r = message_need(m, M_MAP|M_RECORD);
5208 if (r) return r;
5209 buf_setmap(buf, m->map.s, m->record.header_size);
5210 return 0;
5211 }
5212
5213 if (!strcasecmp(hdr, "rawbody")) {
5214 int r = message_need(m, M_MAP|M_RECORD);
5215 if (r) return r;
5216 buf_setmap(buf, m->map.s + m->record.header_size, m->record.size - m->record.header_size);
5217 return 0;
5218 }
5219
5220 if (!(flags & MESSAGE_APPEND))
5221 buf_reset(buf);
5222
5223 /* Attempt to read field from the least-cost source available */
5224 int found_field = 0;
5225
5226 /* the 5 standalone cache fields */
5227 if (!strcasecmp(hdr, "from")) {
5228 int r = message_need(m, M_CACHE);
5229 if (!r) {
5230 buf_setmap(&raw, cacheitem_base(&m->record, CACHE_FROM),
5231 cacheitem_size(&m->record, CACHE_FROM));
5232 if (raw.len == 3 && raw.s[0] == 'N' && raw.s[1] == 'I' && raw.s[2] == 'L')
5233 buf_reset(&raw);
5234 hasname = 0;
5235 isutf8 = 1;
5236 found_field = 1;
5237 } else if (r != IMAP_NOTFOUND) return r;
5238 }
5239 else if (!strcasecmp(hdr, "to")) {
5240 int r = message_need(m, M_CACHE);
5241 if (!r) {
5242 buf_setmap(&raw, cacheitem_base(&m->record, CACHE_TO),
5243 cacheitem_size(&m->record, CACHE_TO));
5244 if (raw.len == 3 && raw.s[0] == 'N' && raw.s[1] == 'I' && raw.s[2] == 'L')
5245 buf_reset(&raw);
5246 hasname = 0;
5247 isutf8 = 1;
5248 found_field = 1;
5249 } else if (r != IMAP_NOTFOUND) return r;
5250 }
5251 else if (!strcasecmp(hdr, "cc")) {
5252 int r = message_need(m, M_CACHE);
5253 if (!r) {
5254 buf_setmap(&raw, cacheitem_base(&m->record, CACHE_CC),
5255 cacheitem_size(&m->record, CACHE_CC));
5256 if (raw.len == 3 && raw.s[0] == 'N' && raw.s[1] == 'I' && raw.s[2] == 'L')
5257 buf_reset(&raw);
5258 hasname = 0;
5259 isutf8 = 1;
5260 found_field = 1;
5261 } else if (r != IMAP_NOTFOUND) return r;
5262 }
5263 else if (!strcasecmp(hdr, "bcc")) {
5264 int r = message_need(m, M_CACHE);
5265 if (!r) {
5266 buf_setmap(&raw, cacheitem_base(&m->record, CACHE_BCC),
5267 cacheitem_size(&m->record, CACHE_BCC));
5268 if (raw.len == 3 && raw.s[0] == 'N' && raw.s[1] == 'I' && raw.s[2] == 'L')
5269 buf_reset(&raw);
5270 hasname = 0;
5271 isutf8 = 1;
5272 found_field = 1;
5273 } else if (r != IMAP_NOTFOUND) return r;
5274 }
5275 else if (!strcasecmp(hdr, "subject")) {
5276 int r = message_need(m, M_CACHE);
5277 if (!r) {
5278 message1_get_subject(&m->record, &raw);
5279 hasname = 0;
5280 isutf8 = 1;
5281 found_field = 1;
5282 } else if (r != IMAP_NOTFOUND) return r;
5283 }
5284
5285 /* message-id is from the envelope */
5286 else if (!strcasecmp(hdr, "message-id")) {
5287 char *envtokens[NUMENVTOKENS];
5288 char *c_env;
5289 int r = message_need(m, M_CACHE);
5290 if (!r) {
5291 c_env = xstrndup(cacheitem_base(&m->record, CACHE_ENVELOPE) + 1,
5292 cacheitem_size(&m->record, CACHE_ENVELOPE) - 2);
5293 parse_cached_envelope(c_env, envtokens, NUMENVTOKENS);
5294 if (envtokens[ENV_MSGID])
5295 buf_appendcstr(&raw, envtokens[ENV_MSGID]);
5296 free(c_env);
5297 if (raw.len == 3 && raw.s[0] == 'N' && raw.s[1] == 'I' && raw.s[2] == 'L')
5298 buf_reset(&raw);
5299 hasname = 0;
5300 found_field = 1;
5301 } else if (r != IMAP_NOTFOUND) return r;
5302 }
5303 else {
5304 int r = message_need(m, M_RECORD);
5305 unsigned cache_version = mailbox_cached_header(hdr);
5306 if (!r && m->record.cache_version >= cache_version) {
5307 /* it's in the cache */
5308 char *headers = NULL;
5309 int r = message_need(m, M_CACHE);
5310 if (r) return r;
5311 headers = xstrndup(cacheitem_base(&m->record, CACHE_HEADERS),
5312 cacheitem_size(&m->record, CACHE_HEADERS));
5313 strarray_append(&want, hdr);
5314 message_pruneheader(headers, &want, NULL);
5315 buf_appendcstr(&raw, headers);
5316 free(headers);
5317 hasname = 1;
5318 found_field = 1;
5319 } else if (r && r != IMAP_NOTFOUND) return r;
5320 }
5321
5322 if (!found_field) {
5323 /* fall back to read field from raw headers */
5324 char *headers = NULL;
5325 int r = message_need(m, M_MAP|M_CACHEBODY);
5326 if (r) return r;
5327 headers = xstrndup(m->map.s + m->body->header_offset, m->body->header_size);
5328 strarray_append(&want, hdr);
5329 message_pruneheader(headers, &want, NULL);
5330 buf_appendcstr(&raw, headers);
5331 free(headers);
5332 hasname = 1;
5333 found_field = 1;
5334 }
5335
5336 if (raw.len)
5337 extract_one(buf, hdr, flags, hasname, isutf8, &raw);
5338
5339 buf_free(&raw);
5340 strarray_fini(&want);
5341
5342 return 0;
5343 }
5344
message_foreach_header(const char * headers,size_t len,int (* cb)(const char *,const char *,void *),void * rock)5345 EXPORTED int message_foreach_header(const char *headers, size_t len,
5346 int(*cb)(const char*, const char*, void*),
5347 void *rock)
5348 {
5349 struct buf key = BUF_INITIALIZER;
5350 struct buf val = BUF_INITIALIZER;
5351 const char *top = headers + len;
5352 const char *hdr = headers;
5353 int r = 0;
5354
5355 while (hdr < top) {
5356 /* Look for colon separating header name from value */
5357 const char *p = memchr(hdr, ':', top - hdr);
5358 if (!p) {
5359 r = IMAP_INTERNAL;
5360 goto done;
5361 }
5362 buf_setmap(&key, hdr, p - hdr);
5363 p++;
5364 /* Extract raw header value, skipping over folding CRLF */
5365 const char *q = p;
5366 while (q < top && (q = memchr(q, '\n', top - q))) {
5367 if ((++q == top) || (*q != ' ' && *q != '\t'))
5368 break;
5369 }
5370 if (!q) q = top;
5371 /* Chomp of trailing CRLF */
5372 buf_setmap(&val, p, q - p >= 2 ? q - p - 2 : 0);
5373 /* Call callback for header */
5374 r = cb(buf_cstring(&key), buf_cstring(&val), rock);
5375 if (r) break;
5376 /* Prepare next iteration */
5377 buf_reset(&key);
5378 buf_reset(&val);
5379 hdr = q;
5380 }
5381
5382 done:
5383 buf_free(&key);
5384 buf_free(&val);
5385 return r;
5386 }
5387
message_get_type(message_t * m,const char ** strp)5388 EXPORTED int message_get_type(message_t *m, const char **strp)
5389 {
5390 int r = message_need(m, M_CACHEBODY);
5391 if (r) return r;
5392 *strp = m->body->type;
5393 return 0;
5394 }
5395
message_get_subtype(message_t * m,const char ** strp)5396 EXPORTED int message_get_subtype(message_t *m, const char **strp)
5397 {
5398 int r = message_need(m, M_CACHEBODY);
5399 if (r) return r;
5400 *strp = m->body->subtype;
5401 return 0;
5402 }
5403
message_get_encoding(message_t * m,int * encp)5404 EXPORTED int message_get_encoding(message_t *m, int *encp)
5405 {
5406 int r = message_need(m, M_CACHEBODY);
5407 if (r) return r;
5408 *encp = m->body->charset_enc;
5409 return 0;
5410 }
5411
message_get_charset_id(message_t * m,const char ** strp)5412 EXPORTED int message_get_charset_id(message_t *m, const char **strp)
5413 {
5414 int r = message_need(m, M_CACHEBODY);
5415 if (r) return r;
5416 *strp = m->body->charset_id;
5417 return 0;
5418 }
5419
message_get_cachebody(message_t * m,const struct body ** bodyp)5420 EXPORTED int message_get_cachebody(message_t *m, const struct body **bodyp)
5421 {
5422 int r = message_need(m, M_CACHEBODY);
5423 if (r) return r;
5424 *bodyp = m->body;
5425 return 0;
5426 }
5427
message_get_body(message_t * m,struct buf * buf)5428 EXPORTED int message_get_body(message_t *m, struct buf *buf)
5429 {
5430 return message_get_field(m, "rawbody", MESSAGE_RAW, buf);
5431 }
5432
message_get_headers(message_t * m,struct buf * buf)5433 EXPORTED int message_get_headers(message_t *m, struct buf *buf)
5434 {
5435 return message_get_field(m, "rawheaders", MESSAGE_RAW, buf);
5436 }
5437