1 /*
2 mairix - message index builder and finder for maildir folders.
3
4 **********************************************************************
5 * Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006,2007,2010
6 * rfc2047 decode:
7 * Copyright (C) Mikael Ylikoski 2002
8 * gzip mbox support:
9 * Copyright (C) Ico Doornekamp 2005
10 * Copyright (C) Felipe Gustavo de Almeida 2005
11 * bzip2 mbox support:
12 * Copyright (C) Paramjit Oberoi 2005
13 * caching uncompressed mbox data:
14 * Copyright (C) Chris Mason 2006
15 * memory leak fixes:
16 * Copyright (C) Samuel Tardieu 2008
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of version 2 of the GNU General Public License as
20 * published by the Free Software Foundation.
21 *
22 * This program is distributed in the hope that it will be useful, but
23 * WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 * General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License along
28 * with this program; if not, write to the Free Software Foundation, Inc.,
29 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
30 *
31 **********************************************************************
32 */
33
34 #include "mairix.h"
35 #include "nvp.h"
36
37 #include <assert.h>
38 #include <ctype.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <fcntl.h>
42 #include <unistd.h>
43 #include <sys/mman.h>
44 #ifdef USE_GZIP_MBOX
45 # include <zlib.h>
46 #endif
47 #ifdef USE_BZIP_MBOX
48 # include <bzlib.h>
49 #endif
50
51 struct DLL {/*{{{*/
52 struct DLL *next;
53 struct DLL *prev;
54 };
55 /*}}}*/
enqueue(void * head,void * x)56 static void enqueue(void *head, void *x)/*{{{*/
57 {
58 /* Declare this way so it can be used with any kind of double linked list
59 * having next & prev pointers in its first two words. */
60 struct DLL *h = (struct DLL *) head;
61 struct DLL *xx = (struct DLL *) x;
62 xx->next = h;
63 xx->prev = h->prev;
64 h->prev->next = xx;
65 h->prev = xx;
66 return;
67 }
68 /*}}}*/
69
70 enum encoding_type {/*{{{*/
71 ENC_UNKNOWN,
72 ENC_NONE,
73 ENC_BINARY,
74 ENC_7BIT,
75 ENC_8BIT,
76 ENC_QUOTED_PRINTABLE,
77 ENC_BASE64,
78 ENC_UUENCODE
79 };
80 /*}}}*/
81 struct content_type_header {/*{{{*/
82 const char *major; /* e.g. text */
83 const char *minor; /* e.g. plain */
84 const char *boundary; /* for multipart */
85 /* charset? */
86 };
87 /*}}}*/
88 struct line {/*{{{*/
89 struct line *next;
90 struct line *prev;
91 char *text;
92 };
93 /*}}}*/
94
init_headers(struct headers * hdrs)95 static void init_headers(struct headers *hdrs)/*{{{*/
96 {
97 hdrs->to = NULL;
98 hdrs->cc = NULL;
99 hdrs->from = NULL;
100 hdrs->subject = NULL;
101 hdrs->message_id = NULL;
102 hdrs->in_reply_to = NULL;
103 hdrs->references = NULL;
104 hdrs->date = 0;
105 hdrs->flags.seen = 0;
106 hdrs->flags.replied = 0;
107 hdrs->flags.flagged = 0;
108 };
109 /*}}}*/
splice_header_lines(struct line * header)110 static void splice_header_lines(struct line *header)/*{{{*/
111 {
112 /* Deal with newline then tab in header */
113 struct line *x, *next;
114 for (x=header->next; x!=header; x=next) {
115 #if 0
116 printf("next header, x->text=%08lx\n", x->text);
117 printf("header=<%s>\n", x->text);
118 #endif
119 next = x->next;
120 if (isspace(x->text[0] & 0xff)) {
121 /* Glue to previous line */
122 char *p, *newbuf, *oldbuf;
123 struct line *y;
124 for (p=x->text; *p; p++) {
125 if (!isspace(*(unsigned char *)p)) break;
126 }
127 p--; /* point to final space */
128 y = x->prev;
129 #if 0
130 printf("y=%08lx p=%08lx\n", y->text, p);
131 #endif
132 newbuf = new_array(char, strlen(y->text) + strlen(p) + 1);
133 strcpy(newbuf, y->text);
134 strcat(newbuf, p);
135 oldbuf = y->text;
136 y->text = newbuf;
137 free(oldbuf);
138 y->next = x->next;
139 x->next->prev = y;
140 free(x->text);
141 free(x);
142 }
143 }
144 return;
145 }
146 /*}}}*/
audit_header(struct line * header)147 static int audit_header(struct line *header)/*{{{*/
148 {
149 /* Check for obvious broken-ness
150 * 1st line has no leading spaces, single word then colon
151 * following lines have leading spaces or single word followed by colon
152 * */
153 struct line *x;
154 int first=1;
155 int count=1;
156 for (x=header->next; x!=header; x=x->next) {
157 int has_leading_space=0;
158 int is_blank;
159 int has_word_colon=0;
160
161 if (1 || first) {
162 /* Ignore any UUCP or mbox style From line at the start */
163 if (!strncmp("From ", x->text, 5)) {
164 continue;
165 }
166 /* Ignore escaped From line at the start */
167 if (!strncmp(">From ", x->text, 6)) {
168 continue;
169 }
170 }
171
172 is_blank = !(x->text[0]);
173 if (!is_blank) {
174 char *p;
175 int saw_char = 0;
176 has_leading_space = isspace(x->text[0] & 0xff);
177 has_word_colon = 0; /* default */
178 p = x->text;
179 while(*p) {
180 if(*p == ':') {
181 has_word_colon = saw_char;
182 break;
183 } else if (isspace(*(unsigned char *) p)) {
184 has_word_colon = 0;
185 break;
186 } else {
187 saw_char = 1;
188 }
189 p++;
190 }
191 }
192
193 if (( first && (is_blank || has_leading_space || !has_word_colon)) ||
194 (!first && (is_blank || !(has_leading_space || has_word_colon)))) {
195 #if 0
196 fprintf(stderr, "Header line %d <%s> fails because:", count, x->text);
197 if (first && is_blank) { fprintf(stderr, " [first && is_blank]"); }
198 if (first && has_leading_space) { fprintf(stderr, " [first && has_leading_space]"); }
199 if (first && !has_word_colon) { fprintf(stderr, " [first && !has_word_colon]"); }
200 if (!first && is_blank) { fprintf(stderr, " [!first && is_blank]"); }
201 if (!first && !(has_leading_space||has_word_colon)) { fprintf(stderr, " [!first && !has_leading_space||has_word_colon]"); }
202 fprintf(stderr, "\n");
203 #endif
204 /* Header fails the audit */
205 return 0;
206 }
207 first = 0;
208 count++;
209 }
210 /* If we get here the header must have been OK */
211 return 1;
212 }/*}}}*/
match_string(const char * ref,const char * candidate)213 static int match_string(const char *ref, const char *candidate)/*{{{*/
214 {
215 int len = strlen(ref);
216 return !strncasecmp(ref, candidate, len);
217 }
218 /*}}}*/
219
220 static char equal_table[] = {/*{{{*/
221 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00-0f */
222 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10-1f */
223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20-2f */
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* 30-3f */
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40-4f */
226 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 50-5f */
227 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60-6f */
228 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 70-7f */
229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8f */
230 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90-9f */
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a0-af */
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* b0-bf */
233 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* c0-cf */
234 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d0-df */
235 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* e0-ef */
236 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* f0-ff */
237 };
238 /*}}}*/
239 static int base64_table[] = {/*{{{*/
240 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00-0f */
241 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10-1f */
242 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, /* 20-2f */
243 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, 0, -1, -1, /* 30-3f */
244 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 40-4f */
245 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 50-5f */
246 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* 60-6f */
247 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, /* 70-7f */
248 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 80-8f */
249 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 90-9f */
250 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a0-af */
251 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* b0-bf */
252 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* c0-cf */
253 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* d0-df */
254 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* e0-ef */
255 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 /* f0-ff */
256 };
257 /*}}}*/
hex_to_val(char x)258 static int hex_to_val(char x) {/*{{{*/
259 switch (x) {
260 case '0':
261 case '1':
262 case '2':
263 case '3':
264 case '4':
265 case '5':
266 case '6':
267 case '7':
268 case '8':
269 case '9':
270 return (x - '0');
271 break;
272 case 'a':
273 case 'b':
274 case 'c':
275 case 'd':
276 case 'e':
277 case 'f':
278 return 10 + (x - 'a');
279 break;
280 case 'A':
281 case 'B':
282 case 'C':
283 case 'D':
284 case 'E':
285 case 'F':
286 return 10 + (x - 'A');
287 break;
288 default:
289 return 0;
290 }
291 }
292 /*}}}*/
decode_header_value(char * text)293 static void decode_header_value(char *text){/*{{{*/
294 /* rfc2047 decode, written by Mikael Ylikoski */
295
296 char *s, *a, *b, *e, *p, *q;
297
298 for (p = q = s = text; (s = strstr(s, "=?")); s = e + 2) {
299 if (p == q)
300 p = q = s;
301 else
302 while (q != s)
303 *p++ = *q++;
304 s += 2;
305 a = strchr(s, '?');
306 if (!a) break;
307 a++;
308 b = strchr(a, '?');
309 if (!b) break;
310 b++;
311 e = strstr(b, "?=");
312 if (!e) break;
313 /* have found an encoded-word */
314 if (b - a != 2)
315 continue; /* unknown encoding */
316 if (*a == 'q' || *a == 'Q') {
317 int val;
318 q = b;
319 while (q < e) {
320 if (*q == '_') {
321 *p++ = 0x20;
322 q++;
323 } else if (*q == '=') {
324 q++;
325 val = hex_to_val(*q++) << 4;
326 val += hex_to_val(*q++);
327 *p++ = val;
328 } else
329 *p++ = *q++;
330 }
331 } else if (*a == 'b' || *a == 'B') {
332 int reg, nc, eq; /* register, #characters in reg, #equals */
333 int dc; /* decoded character */
334 eq = reg = nc = 0;
335 for (q = b; q < e; q++) {
336 unsigned char cq = *(unsigned char *)q;
337 dc = base64_table[cq];
338 eq += equal_table[cq];
339
340 if (dc >= 0) {
341 reg <<= 6;
342 reg += dc;
343 nc++;
344 if (nc == 4) {
345 *p++ = ((reg >> 16) & 0xff);
346 if (eq < 2) *p++ = ((reg >> 8) & 0xff);
347 if (eq < 1) *p++ = reg & 0xff;
348 nc = reg = 0;
349 if (eq) break;
350 }
351 }
352 }
353 } else {
354 continue; /* unknown encoding */
355 }
356 q = e + 2;
357 }
358 if (p == q) return;
359 while (*q != '\0')
360 *p++ = *q++;
361 *p = '\0';
362 }
363 /*}}}*/
copy_header_value(char * text)364 static char *copy_header_value(char *text){/*{{{*/
365 char *p;
366 for (p = text; *p && (*p != ':'); p++) ;
367 if (!*p) return NULL;
368 p++;
369 p = new_string(p);
370 decode_header_value(p);
371 return p;
372 }
373 /*}}}*/
copy_or_concat_header_value(char ** previous,char * text)374 static void copy_or_concat_header_value(char **previous, char *text){/*{{{*/
375 char *p = copy_header_value(text);
376 if (*previous)
377 {
378 *previous = extend_string(*previous, ", ");
379 *previous = extend_string(*previous, p);
380 free(p);
381 }
382 else
383 *previous = p;
384 }
385 /*}}}*/
decode_encoding_type(const char * e)386 static enum encoding_type decode_encoding_type(const char *e)/*{{{*/
387 {
388 enum encoding_type result;
389 const char *p;
390 if (!e) {
391 result = ENC_NONE;
392 } else {
393 for (p=e; *p && isspace(*(unsigned char *)p); p++) ;
394 if ( match_string("7bit", p)
395 || match_string("7-bit", p)
396 || match_string("7 bit", p)) {
397 result = ENC_7BIT;
398 } else if (match_string("8bit", p)
399 || match_string("8-bit", p)
400 || match_string("8 bit", p)) {
401 result = ENC_8BIT;
402 } else if (match_string("quoted-printable", p)) {
403 result = ENC_QUOTED_PRINTABLE;
404 } else if (match_string("base64", p)) {
405 result = ENC_BASE64;
406 } else if (match_string("binary", p)) {
407 result = ENC_BINARY;
408 } else if (match_string("x-uuencode", p)) {
409 result = ENC_UUENCODE;
410 } else {
411 fprintf(stderr, "Warning: unknown encoding type: '%s'\n", e);
412 result = ENC_UNKNOWN;
413 }
414 }
415 return result;
416 }
417 /*}}}*/
parse_content_type(struct nvp * ct_nvp,struct content_type_header * result)418 static void parse_content_type(struct nvp *ct_nvp, struct content_type_header *result)/*{{{*/
419 {
420 result->major = NULL;
421 result->minor = NULL;
422 result->boundary = NULL;
423
424 result->major = nvp_major(ct_nvp);
425 if (result->major) {
426 result->minor = nvp_minor(ct_nvp);
427 } else {
428 result->minor = NULL;
429 result->major = nvp_first(ct_nvp);
430 }
431
432 result->boundary = nvp_lookupcase(ct_nvp, "boundary");
433 }
434
435 /*}}}*/
looking_at_ws_then_newline(char * start)436 static char *looking_at_ws_then_newline(char *start)/*{{{*/
437 {
438 char *result;
439 result = start;
440 do {
441 if (*result == '\n') return result;
442 else if (!isspace(*(unsigned char *) result)) return NULL;
443 else result++;
444 } while (1);
445
446 /* Can't get here */
447 assert(0);
448 }
449 /*}}}*/
450
unencode_data(struct msg_src * src,char * input,int input_len,const char * enc,int * output_len)451 static char *unencode_data(struct msg_src *src, char *input, int input_len, const char *enc, int *output_len)/*{{{*/
452 {
453 enum encoding_type encoding;
454 char *result, *end_result;
455 char *end_input;
456
457 encoding = decode_encoding_type(enc);
458 end_input = input + input_len;
459
460 /* All mime encodings result in expanded data, so this is guaranteed to
461 * safely oversize the output array */
462 result = new_array(char, input_len + 1);
463
464 /* Now decode */
465 switch (encoding) {
466 case ENC_7BIT:/*{{{*/
467 case ENC_8BIT:
468 case ENC_BINARY:
469 case ENC_NONE:
470 {
471 memcpy(result, input, input_len);
472 end_result = result + input_len;
473 }
474 break;
475 /*}}}*/
476 case ENC_QUOTED_PRINTABLE:/*{{{*/
477 {
478 char *p, *q;
479 p = result;
480 for (p=result, q=input;
481 q<end_input; ) {
482
483 if (*q == '=') {
484 /* followed by optional whitespace then \n? discard them. */
485 char *r;
486 int val;
487 q++;
488 r = looking_at_ws_then_newline(q);
489 if (r) {
490 q = r + 1; /* Point into next line */
491 continue;
492 }
493 /* not that case. */
494 val = hex_to_val(*q++) << 4;
495 val += hex_to_val(*q++);
496 *p++ = val;
497
498 } else {
499 /* Normal character */
500 *p++ = *q++;
501 }
502 }
503 end_result = p;
504 }
505 break;
506 /*}}}*/
507 case ENC_BASE64:/*{{{*/
508 {
509 char *p, *q;
510 int reg, nc, eq; /* register, #characters in reg, #equals */
511 int dc; /* decoded character */
512 eq = reg = nc = 0;
513 for (q=input, p=result; q<end_input; q++) {
514 unsigned char cq = * (unsigned char *)q;
515 /* Might want a 256 entry array instead of this sub-optimal mess
516 * eventually. */
517 dc = base64_table[cq];
518 eq += equal_table[cq];
519
520 if (dc >= 0) {
521 reg <<= 6;
522 reg += dc;
523 nc++;
524 if (nc == 4) {
525 *p++ = ((reg >> 16) & 0xff);
526 if (eq < 2) *p++ = ((reg >> 8) & 0xff);
527 if (eq < 1) *p++ = reg & 0xff;
528 nc = reg = 0;
529 if (eq) goto done_base_64;
530 }
531 }
532 }
533 done_base_64:
534 end_result = p;
535 }
536 break;
537 /*}}}*/
538 case ENC_UUENCODE:/*{{{*/
539 {
540 char *p, *q;
541 /* Find 'begin ' */
542 for (q = input; q < end_input - 6 && memcmp(q, "begin ", 6); q++)
543 ;
544 q += 6;
545 /* skip to EOL */
546 while (q < end_input && *q != '\n')
547 q++;
548 p = result;
549 while (q < end_input) { /* process line */
550 #define DEC(c) (((c) - ' ') & 077)
551 int len = DEC(*q++);
552 if (len == 0)
553 break;
554 for (; len > 0; q += 4, len -= 3) {
555 if (len >= 3) {
556 *p++ = DEC(q[0]) << 2 | DEC(q[1]) >> 4;
557 *p++ = DEC(q[1]) << 4 | DEC(q[2]) >> 2;
558 *p++ = DEC(q[2]) << 6 | DEC(q[3]);
559 } else {
560 if (len >= 1)
561 *p++ = DEC(q[0]) << 2 | DEC(q[1]) >> 4;
562 if (len >= 2)
563 *p++ = DEC(q[1]) << 4 | DEC(q[2]) >> 2;
564 }
565 }
566 while (q < end_input && *q != '\n')
567 q++;
568 }
569 end_result = p;
570 }
571 break;
572 /*}}}*/
573 case ENC_UNKNOWN:/*{{{*/
574 fprintf(stderr, "Unknown encoding type in %s\n", format_msg_src(src));
575 /* fall through - ignore this data */
576 /*}}}*/
577 default:/*{{{*/
578 end_result = result;
579 break;
580 /*}}}*/
581 }
582 *output_len = end_result - result;
583 result[*output_len] = '\0'; /* for convenience with text/plain etc to make it printable */
584 return result;
585 }
586 /*}}}*/
format_msg_src(struct msg_src * src)587 char *format_msg_src(struct msg_src *src)/*{{{*/
588 {
589 static char *buffer = NULL;
590 static int buffer_len = 0;
591 char *result;
592 int len;
593 switch (src->type) {
594 case MS_FILE:
595 result = src->filename;
596 break;
597 case MS_MBOX:
598 len = strlen(src->filename);
599 len += 32;
600 if (!buffer || (len > buffer_len)) {
601 free(buffer);
602 buffer = new_array(char, len);
603 buffer_len = len;
604 }
605 sprintf(buffer, "%s[%d,%d)", src->filename,
606 (int) src->start, (int) (src->start + src->len));
607 result = buffer;
608 break;
609 default:
610 result = NULL;
611 break;
612 }
613 return result;
614 }
615 /*}}}*/
split_and_splice_header(struct msg_src * src,char * data,struct line * header,char ** body_start)616 static int split_and_splice_header(struct msg_src *src, char *data, struct line *header, char **body_start)/*{{{*/
617 {
618 char *sol, *eol;
619 int blank_line;
620 header->next = header->prev = header;
621 sol = data;
622 do {
623 if (!*sol) break;
624 blank_line = 1; /* until proven otherwise */
625 eol = sol;
626 while (*eol && (*eol != '\n')) {
627 if (!isspace(*(unsigned char *) eol)) blank_line = 0;
628 eol++;
629 }
630 if (*eol == '\n') {
631 if (!blank_line) {
632 int line_length = eol - sol;
633 char *line_text = new_array(char, 1 + line_length);
634 struct line *new_header;
635
636 strncpy(line_text, sol, line_length);
637 line_text[line_length] = '\0';
638 new_header = new(struct line);
639 new_header->text = line_text;
640 enqueue(header, new_header);
641 }
642 sol = eol + 1; /* Start of next line */
643 } else { /* must be null char */
644 fprintf(stderr, "Got null character whilst processing header of %s\n",
645 format_msg_src(src));
646 return -1; /* & leak memory */
647 }
648 } while (!blank_line);
649
650 *body_start = sol;
651
652 if (audit_header(header)) {
653 splice_header_lines(header);
654 return 0;
655 } else {
656 #if 0
657 /* Caller generates message */
658 fprintf(stderr, "Message had bad rfc822 headers, ignoring\n");
659 #endif
660 return -1;
661 }
662 }
663 /*}}}*/
664
665 /* Forward prototypes */
666 static void do_multipart(struct msg_src *src, char *input, int input_len,
667 const char *boundary, struct attachment *atts,
668 enum data_to_rfc822_error *error);
669
670 /*{{{ do_body() */
do_body(struct msg_src * src,char * body_start,int body_len,struct nvp * ct_nvp,struct nvp * cte_nvp,struct nvp * cd_nvp,struct attachment * atts,enum data_to_rfc822_error * error)671 static void do_body(struct msg_src *src,
672 char *body_start, int body_len,
673 struct nvp *ct_nvp, struct nvp *cte_nvp,
674 struct nvp *cd_nvp,
675 struct attachment *atts,
676 enum data_to_rfc822_error *error)
677 {
678 char *decoded_body;
679 int decoded_body_len;
680 const char *content_transfer_encoding;
681 content_transfer_encoding = NULL;
682 if (cte_nvp) {
683 content_transfer_encoding = nvp_first(cte_nvp);
684 if (!content_transfer_encoding) {
685 fprintf(stderr, "Giving up on %s, content_transfer_encoding header not parseable\n",
686 format_msg_src(src));
687 return;
688 }
689 }
690
691 decoded_body = unencode_data(src, body_start, body_len, content_transfer_encoding, &decoded_body_len);
692
693 if (ct_nvp) {
694 struct content_type_header ct;
695 parse_content_type(ct_nvp, &ct);
696 if (ct.major && !strcasecmp(ct.major, "multipart")) {
697 do_multipart(src, decoded_body, decoded_body_len, ct.boundary, atts, error);
698 /* Don't need decoded body any longer - copies have been taken if
699 * required when handling multipart attachments. */
700 free(decoded_body);
701 if (error && (*error == DTR8_MISSING_END)) return;
702 } else {
703 /* unipart */
704 struct attachment *new_att;
705 const char *disposition;
706 new_att = new(struct attachment);
707 disposition = cd_nvp ? nvp_first(cd_nvp) : NULL;
708 if (disposition && !strcasecmp(disposition, "attachment")) {
709 const char *lookup;
710 lookup = nvp_lookupcase(cd_nvp, "filename");
711 if (lookup) {
712 new_att->filename = new_string(lookup);
713 } else {
714 /* Some messages have name=... in content-type: instead of
715 * filename=... in content-disposition. */
716 lookup = nvp_lookup(ct_nvp, "name");
717 if (lookup) {
718 new_att->filename = new_string(lookup);
719 } else {
720 new_att->filename = NULL;
721 }
722 }
723 } else {
724 new_att->filename = NULL;
725 }
726 if (ct.major && !strcasecmp(ct.major, "text")) {
727 if (ct.minor && !strcasecmp(ct.minor, "plain")) {
728 new_att->ct = CT_TEXT_PLAIN;
729 } else if (ct.minor && !strcasecmp(ct.minor, "html")) {
730 new_att->ct = CT_TEXT_HTML;
731 } else {
732 new_att->ct = CT_TEXT_OTHER;
733 }
734 } else if (ct.major && !strcasecmp(ct.major, "message") &&
735 ct.minor && !strcasecmp(ct.minor, "rfc822")) {
736 new_att->ct = CT_MESSAGE_RFC822;
737 } else {
738 new_att->ct = CT_OTHER;
739 }
740
741 if (new_att->ct == CT_MESSAGE_RFC822) {
742 new_att->data.rfc822 = data_to_rfc822(src, decoded_body, decoded_body_len, error);
743 free(decoded_body); /* data no longer needed */
744 } else {
745 new_att->data.normal.len = decoded_body_len;
746 new_att->data.normal.bytes = decoded_body;
747 }
748 enqueue(atts, new_att);
749 }
750 } else {
751 /* Treat as text/plain {{{*/
752 struct attachment *new_att;
753 new_att = new(struct attachment);
754 new_att->filename = NULL;
755 new_att->ct = CT_TEXT_PLAIN;
756 new_att->data.normal.len = decoded_body_len;
757 /* Add null termination on the end */
758 new_att->data.normal.bytes = new_array(char, decoded_body_len + 1);
759 memcpy(new_att->data.normal.bytes, decoded_body, decoded_body_len + 1);
760 free(decoded_body);
761 enqueue(atts, new_att);/*}}}*/
762 }
763 }
764 /*}}}*/
765 /*{{{ do_attachment() */
do_attachment(struct msg_src * src,char * start,char * after_end,struct attachment * atts)766 static void do_attachment(struct msg_src *src,
767 char *start, char *after_end,
768 struct attachment *atts)
769 {
770 /* decode attachment and add to attachment list */
771 struct line header, *x, *nx;
772 char *body_start;
773 int body_len;
774
775 struct nvp *ct_nvp, *cte_nvp, *cd_nvp, *nvp;
776
777 if (split_and_splice_header(src, start, &header, &body_start) < 0) {
778 fprintf(stderr, "Giving up on attachment with bad header in %s\n",
779 format_msg_src(src));
780 return;
781 }
782
783 /* Extract key headers */
784 ct_nvp = cte_nvp = cd_nvp = NULL;
785 for (x=header.next; x!=&header; x=x->next) {
786 if ((nvp = make_nvp(src, x->text, "content-type:"))) {
787 ct_nvp = nvp;
788 } else if ((nvp = make_nvp(src, x->text, "content-transfer-encoding:"))) {
789 cte_nvp = nvp;
790 } else if ((nvp = make_nvp(src, x->text, "content-disposition:"))) {
791 cd_nvp = nvp;
792 }
793 }
794
795 #if 0
796 if (ct_nvp) {
797 fprintf(stderr, "======\n");
798 fprintf(stderr, "Dump of content-type hdr\n");
799 nvp_dump(ct_nvp, stderr);
800 free(ct_nvp);
801 }
802
803 if (cte_nvp) {
804 fprintf(stderr, "======\n");
805 fprintf(stderr, "Dump of content-transfer-encoding hdr\n");
806 nvp_dump(cte_nvp, stderr);
807 free(cte_nvp);
808 }
809 #endif
810
811 if (body_start > after_end) {
812 /* This is a (maliciously?) b0rken attachment, e.g. maybe empty */
813 if (verbose) {
814 fprintf(stderr, "Message %s contains an invalid attachment, length=%d bytes\n",
815 format_msg_src(src), (int)(after_end - start));
816 }
817 } else {
818 body_len = after_end - body_start;
819 /* Ignore errors in nested body parts. */
820 do_body(src, body_start, body_len, ct_nvp, cte_nvp, cd_nvp, atts, NULL);
821 }
822
823 /* Free header memory */
824 for (x=header.next; x!=&header; x=nx) {
825 nx = x->next;
826 free(x->text);
827 free(x);
828 }
829
830 if (ct_nvp) free_nvp(ct_nvp);
831 if (cte_nvp) free_nvp(cte_nvp);
832 if (cd_nvp) free_nvp(cd_nvp);
833 }
834 /*}}}*/
835 /*{{{ do_multipart() */
do_multipart(struct msg_src * src,char * input,int input_len,const char * boundary,struct attachment * atts,enum data_to_rfc822_error * error)836 static void do_multipart(struct msg_src *src,
837 char *input, int input_len,
838 const char *boundary,
839 struct attachment *atts,
840 enum data_to_rfc822_error *error)
841 {
842 char *b0, *b1, *be, *bx;
843 char *line_after_b0, *start_b1_search_from;
844 int boundary_len;
845 int looking_at_end_boundary;
846
847 if (!boundary) {
848 fprintf(stderr, "Can't process multipart message %s with no boundary string\n",
849 format_msg_src(src));
850 if (error) *error = DTR8_MULTIPART_SANS_BOUNDARY;
851 return;
852 }
853
854 boundary_len = strlen(boundary);
855
856 b0 = NULL;
857 line_after_b0 = input;
858 be = input + input_len;
859
860 do {
861 int boundary_ok;
862 start_b1_search_from = line_after_b0;
863 do {
864 /* reject boundaries that aren't a whole line */
865 b1 = NULL;
866 for (bx = start_b1_search_from; bx < be - (boundary_len + 2); bx++) {
867 if (bx[0] == '-' && bx[1] == '-' &&
868 !strncmp(bx+2, boundary, boundary_len)) {
869 b1 = bx;
870 break;
871 }
872 }
873 if (!b1) {
874 if (error)
875 *error = DTR8_MISSING_END;
876 return;
877 }
878
879 looking_at_end_boundary = (b1+boundary_len+3 < be) && (b1[boundary_len+2] == '-' &&
880 b1[boundary_len+3] == '-');
881 boundary_ok = 1;
882 if ((b1 > input) && (*(b1-1) != '\n'))
883 boundary_ok = 0;
884 if (!looking_at_end_boundary && (b1 + boundary_len + 3 < be) && !(
885 ((b1 + boundary_len + 2 < input + input_len) && (*(b1 + boundary_len + 2) == '\n')) ||
886 ((b1 + boundary_len + 3 < input + input_len) && (*(b1 + boundary_len + 2) == '\r') && (*(b1 + boundary_len + 3) == '\n'))
887 ))
888 boundary_ok = 0;
889 if (!boundary_ok) {
890 char *eol = strchr(b1, '\n');
891 if (!eol) {
892 fprintf(stderr, "Oops, didn't find another normal boundary in %s\n",
893 format_msg_src(src));
894 return;
895 }
896 start_b1_search_from = 1 + eol;
897 }
898 } while (!boundary_ok);
899
900 /* b1 is now looking at a good boundary, which might be the final one */
901
902 if (b0) {
903 /* don't treat preamble as an attachment */
904 do_attachment(src, line_after_b0, b1, atts);
905 }
906
907 b0 = b1;
908 line_after_b0 = strchr(b0, '\n');
909 if (line_after_b0 == 0)
910 line_after_b0 = b0 + strlen(b0);
911 else
912 ++line_after_b0;
913 } while (b1 < be && !looking_at_end_boundary);
914 }
915 /*}}}*/
parse_rfc822_date(char * date_string)916 static time_t parse_rfc822_date(char *date_string)/*{{{*/
917 {
918 struct tm tm;
919 char *s, *z;
920 /* Format [weekday ,] day-of-month month year hour:minute:second timezone.
921
922 Some of the ideas, sanity checks etc taken from parse.c in the mutt
923 sources, credit to Michael R. Elkins et al
924 */
925
926 s = date_string;
927 z = strchr(s, ',');
928 if (z) s = z + 1;
929 while (*s && isspace(*s)) s++;
930 /* Should now be looking at day number */
931 if (!isdigit(*s)) goto tough_cheese;
932 tm.tm_mday = atoi(s);
933 if (tm.tm_mday > 31) goto tough_cheese;
934
935 while (isdigit(*s)) s++;
936 while (*s && isspace(*s)) s++;
937 if (!*s) goto tough_cheese;
938 if (!strncasecmp(s, "jan", 3)) tm.tm_mon = 0;
939 else if (!strncasecmp(s, "feb", 3)) tm.tm_mon = 1;
940 else if (!strncasecmp(s, "mar", 3)) tm.tm_mon = 2;
941 else if (!strncasecmp(s, "apr", 3)) tm.tm_mon = 3;
942 else if (!strncasecmp(s, "may", 3)) tm.tm_mon = 4;
943 else if (!strncasecmp(s, "jun", 3)) tm.tm_mon = 5;
944 else if (!strncasecmp(s, "jul", 3)) tm.tm_mon = 6;
945 else if (!strncasecmp(s, "aug", 3)) tm.tm_mon = 7;
946 else if (!strncasecmp(s, "sep", 3)) tm.tm_mon = 8;
947 else if (!strncasecmp(s, "oct", 3)) tm.tm_mon = 9;
948 else if (!strncasecmp(s, "nov", 3)) tm.tm_mon = 10;
949 else if (!strncasecmp(s, "dec", 3)) tm.tm_mon = 11;
950 else goto tough_cheese;
951
952 while (!isspace(*s)) s++;
953 while (*s && isspace(*s)) s++;
954 if (!isdigit(*s)) goto tough_cheese;
955 tm.tm_year = atoi(s);
956 if (tm.tm_year < 70) {
957 tm.tm_year += 100;
958 } else if (tm.tm_year >= 1900) {
959 tm.tm_year -= 1900;
960 }
961
962 while (isdigit(*s)) s++;
963 while (*s && isspace(*s)) s++;
964 if (!*s) goto tough_cheese;
965
966 /* Now looking at hms */
967 /* For now, forget this. The searching will be vague enough that nearest day is good enough. */
968
969 tm.tm_hour = 0;
970 tm.tm_min = 0;
971 tm.tm_sec = 0;
972 tm.tm_isdst = 0;
973 return mktime(&tm);
974
975 tough_cheese:
976 return (time_t) -1; /* default value */
977 }
978 /*}}}*/
979
scan_status_flags(const char * s,struct headers * hdrs)980 static void scan_status_flags(const char *s, struct headers *hdrs)/*{{{*/
981 {
982 const char *p;
983 for (p=s; *p; p++) {
984 switch (*p) {
985 case 'R': hdrs->flags.seen = 1; break;
986 case 'A': hdrs->flags.replied = 1; break;
987 case 'F': hdrs->flags.flagged = 1; break;
988 default: break;
989 }
990 }
991 }
992 /*}}}*/
993
994 /*{{{ data_to_rfc822() */
data_to_rfc822(struct msg_src * src,char * data,int length,enum data_to_rfc822_error * error)995 struct rfc822 *data_to_rfc822(struct msg_src *src,
996 char *data, int length,
997 enum data_to_rfc822_error *error)
998 {
999 struct rfc822 *result;
1000 char *body_start;
1001 struct line header;
1002 struct line *x, *nx;
1003 struct nvp *ct_nvp, *cte_nvp, *cd_nvp, *nvp;
1004 int body_len;
1005
1006 if (error) *error = DTR8_OK; /* default */
1007 result = new(struct rfc822);
1008 init_headers(&result->hdrs);
1009 result->atts.next = result->atts.prev = &result->atts;
1010
1011 if (split_and_splice_header(src, data, &header, &body_start) < 0) {
1012 if (verbose) {
1013 fprintf(stderr, "Giving up on message %s with bad header\n",
1014 format_msg_src(src));
1015 }
1016 if (error) *error = DTR8_BAD_HEADERS;
1017 return NULL;
1018 }
1019
1020 /* Extract key headers {{{*/
1021 ct_nvp = cte_nvp = cd_nvp = NULL;
1022 for (x=header.next; x!=&header; x=x->next) {
1023 if (match_string("to:", x->text))
1024 copy_or_concat_header_value(&result->hdrs.to, x->text);
1025 else if (match_string("cc:", x->text))
1026 copy_or_concat_header_value(&result->hdrs.cc, x->text);
1027 else if (!result->hdrs.from && match_string("from:", x->text))
1028 result->hdrs.from = copy_header_value(x->text);
1029 else if (!result->hdrs.subject && match_string("subject:", x->text))
1030 result->hdrs.subject = copy_header_value(x->text);
1031 else if (!ct_nvp && (nvp = make_nvp(src, x->text, "content-type:")))
1032 ct_nvp = nvp;
1033 else if (!cte_nvp && (nvp = make_nvp(src, x->text, "content-transfer-encoding:")))
1034 cte_nvp = nvp;
1035 else if (!cd_nvp && (nvp = make_nvp(src, x->text, "content-disposition:")))
1036 cd_nvp = nvp;
1037 else if (!result->hdrs.date && match_string("date:", x->text)) {
1038 char *date_string = copy_header_value(x->text);
1039 result->hdrs.date = parse_rfc822_date(date_string);
1040 free(date_string);
1041 } else if (!result->hdrs.message_id && match_string("message-id:", x->text))
1042 result->hdrs.message_id = copy_header_value(x->text);
1043 else if (!result->hdrs.in_reply_to && match_string("in-reply-to:", x->text))
1044 result->hdrs.in_reply_to = copy_header_value(x->text);
1045 else if (!result->hdrs.references && match_string("references:", x->text))
1046 result->hdrs.references = copy_header_value(x->text);
1047 else if (match_string("status:", x->text))
1048 scan_status_flags(x->text + sizeof("status:"), &result->hdrs);
1049 else if (match_string("x-status:", x->text))
1050 scan_status_flags(x->text + sizeof("x-status:"), &result->hdrs);
1051 }
1052 /*}}}*/
1053
1054 /* Process body */
1055 body_len = length - (body_start - data);
1056 do_body(src, body_start, body_len, ct_nvp, cte_nvp, cd_nvp, &result->atts, error);
1057
1058 /* Free header memory */
1059 for (x=header.next; x!=&header; x=nx) {
1060 nx = x->next;
1061 free(x->text);
1062 free(x);
1063 }
1064
1065 if (ct_nvp) free_nvp(ct_nvp);
1066 if (cte_nvp) free_nvp(cte_nvp);
1067 if (cd_nvp) free_nvp(cd_nvp);
1068
1069 return result;
1070
1071 }
1072 /*}}}*/
1073
1074 #define ALLOC_NONE 1
1075 #define ALLOC_MMAP 2
1076 #define ALLOC_MALLOC 3
1077
1078 int data_alloc_type;
1079
1080 #if USE_GZIP_MBOX || USE_BZIP_MBOX
1081
1082 #define SIZE_STEP (8 * 1024 * 1024)
1083
1084 #define COMPRESSION_NONE 0
1085 #define COMPRESSION_GZIP 1
1086 #define COMPRESSION_BZIP 2
1087
get_compression_type(const char * filename)1088 static int get_compression_type(const char *filename) {/*{{{*/
1089 size_t len = strlen(filename);
1090 int ptr;
1091
1092 #ifdef USE_GZIP_MBOX
1093 ptr = len - 3;
1094 if (len > 3 && strncasecmp(filename + ptr, ".gz", 3) == 0) {
1095 return COMPRESSION_GZIP;
1096 }
1097 #endif
1098
1099 #ifdef USE_BZIP_MBOX
1100 ptr = len - 4;
1101 if (len > 3 && strncasecmp(filename + ptr, ".bz2", 4) == 0) {
1102 return COMPRESSION_BZIP;
1103 }
1104 #endif
1105
1106 return COMPRESSION_NONE;
1107 }
1108 /*}}}*/
1109
is_compressed(const char * filename)1110 static int is_compressed(const char *filename) {/*{{{*/
1111 return (get_compression_type(filename) != COMPRESSION_NONE);
1112 }
1113 /*}}}*/
1114
1115 struct zFile {/*{{{*/
1116 union {
1117 /* Both gzFile and BZFILE* are defined as void pointers
1118 * in their respective header files.
1119 */
1120 #ifdef USE_GZIP_MBOX
1121 gzFile gzf;
1122 #endif
1123 #ifdef USE_BZIP_MBOX
1124 BZFILE *bzf;
1125 #endif
1126 void *zptr;
1127 } foo;
1128 int type;
1129 };
1130 /*}}}*/
1131
xx_zopen(const char * filename,const char * mode)1132 static struct zFile * xx_zopen(const char *filename, const char *mode) {/*{{{*/
1133 struct zFile *zf = new(struct zFile);
1134
1135 zf->type = get_compression_type(filename);
1136 switch (zf->type) {
1137 #ifdef USE_GZIP_MBOX
1138 case COMPRESSION_GZIP:
1139 zf->foo.gzf = gzopen(filename, "rb");
1140 break;
1141 #endif
1142 #ifdef USE_BZIP_MBOX
1143 case COMPRESSION_BZIP:
1144 zf->foo.bzf = BZ2_bzopen(filename, "rb");
1145 break;
1146 #endif
1147 default:
1148 zf->foo.zptr = NULL;
1149 break;
1150 }
1151
1152 if (!zf->foo.zptr) {
1153 free(zf);
1154 return 0;
1155 }
1156
1157 return zf;
1158 }
1159 /*}}}*/
xx_zclose(struct zFile * zf)1160 static void xx_zclose(struct zFile *zf) {/*{{{*/
1161 switch (zf->type) {
1162 #ifdef USE_GZIP_MBOX
1163 case COMPRESSION_GZIP:
1164 gzclose(zf->foo.gzf);
1165 break;
1166 #endif
1167 #ifdef USE_BZIP_MBOX
1168 case COMPRESSION_BZIP:
1169 BZ2_bzclose(zf->foo.bzf);
1170 break;
1171 #endif
1172 default:
1173 zf->foo.zptr = NULL;
1174 break;
1175 }
1176 free(zf);
1177 }
1178 /*}}}*/
xx_zread(struct zFile * zf,void * buf,int len)1179 static int xx_zread(struct zFile *zf, void *buf, int len) {/*{{{*/
1180 switch (zf->type) {
1181 #ifdef USE_GZIP_MBOX
1182 case COMPRESSION_GZIP:
1183 return gzread(zf->foo.gzf, buf, len);
1184 break;
1185 #endif
1186 #ifdef USE_BZIP_MBOX
1187 case COMPRESSION_BZIP:
1188 return BZ2_bzread(zf->foo.bzf, buf, len);
1189 break;
1190 #endif
1191 default:
1192 return 0;
1193 break;
1194 }
1195 }
1196 /*}}}*/
1197 #endif
1198
1199 #if USE_GZIP_MBOX || USE_BZIP_MBOX
1200 /* do we need ROCACHE_SIZE > 1? the code supports any number here */
1201 #define ROCACHE_SIZE 1
1202 struct ro_mapping {
1203 char *filename;
1204 unsigned char *map;
1205 size_t len;
1206 };
1207 static int ro_cache_init = 0;
1208 static struct ro_mapping ro_mapping_cache[ROCACHE_SIZE];
1209
1210 /* find a temp file in the mapping cache. If nothing is found lasti is
1211 * set to the next slot to use for insertion. You have to check that slot
1212 * to see if it is currently in use
1213 */
find_ro_cache(const char * filename,int * lasti)1214 static struct ro_mapping *find_ro_cache(const char *filename, int *lasti)
1215 {
1216 int i = 0;
1217 struct ro_mapping *ro = NULL;
1218 if (lasti)
1219 *lasti = 0;
1220 if (!ro_cache_init)
1221 return NULL;
1222 for (i = 0 ; i < ROCACHE_SIZE ; i++) {
1223 ro = ro_mapping_cache + i;
1224 if (!ro->map) {
1225 if (lasti)
1226 *lasti = i;
1227 return NULL;
1228 }
1229 if (strcmp(filename, ro->filename) == 0)
1230 return ro;
1231 }
1232 /* if we're here, the map is full. They will reuse slot 0 */
1233 return NULL;
1234 }
1235
1236 /*
1237 * put a new tempfile into the cache. It is mmaped as part of this function
1238 * so you can safely close the file handle after calling this.
1239 */
add_ro_cache(const char * filename,int fd,size_t len)1240 static struct ro_mapping *add_ro_cache(const char *filename, int fd, size_t len)
1241 {
1242 int i = 0;
1243 struct ro_mapping *ro = NULL;
1244 if (!ro_cache_init) {
1245 memset(&ro_mapping_cache, 0, sizeof(ro_mapping_cache));
1246 ro_cache_init = 1;
1247 }
1248 ro = find_ro_cache(filename, &i);
1249 if (ro) {
1250 fprintf(stderr, "%s already in ro cache\n", filename);
1251 return NULL;
1252 }
1253 ro = ro_mapping_cache + i;
1254 if (ro->map) {
1255 munmap(ro->map, ro->len);
1256 ro->map = NULL;
1257 free(ro->filename);
1258 }
1259 ro->map = (unsigned char *)mmap(0, len, PROT_READ, MAP_SHARED, fd, 0);
1260 if (ro->map == MAP_FAILED) {
1261 ro->map = NULL;
1262 perror("rfc822:mmap");
1263 return NULL;
1264 }
1265 ro->len = len;
1266 ro->filename = new_string(filename);
1267 return ro;
1268 }
1269 #endif /* USE_GZIP_MBOX || USE_BZIP_MBOX */
1270
create_ro_mapping(const char * filename,unsigned char ** data,int * len)1271 void create_ro_mapping(const char *filename, unsigned char **data, int *len)/*{{{*/
1272 {
1273 struct stat sb;
1274 int fd;
1275
1276 #if USE_GZIP_MBOX || USE_BZIP_MBOX
1277 struct zFile *zf;
1278 #endif
1279
1280 if (stat(filename, &sb) < 0)
1281 {
1282 report_error("stat", filename);
1283 *data = NULL;
1284 return;
1285 }
1286
1287 #if USE_GZIP_MBOX || USE_BZIP_MBOX
1288 if(is_compressed(filename)) {
1289 unsigned char *p;
1290 size_t cur_read;
1291 struct ro_mapping *ro;
1292 FILE *tmpf;
1293
1294 /* this branch never returns things that are freeable */
1295 data_alloc_type = ALLOC_NONE;
1296 ro = find_ro_cache(filename, NULL);
1297 if (ro) {
1298 *data = ro->map;
1299 *len = ro->len;
1300 return;
1301 }
1302
1303 if(verbose) {
1304 fprintf(stderr, "Decompressing %s...\n", filename);
1305 }
1306
1307 tmpf = tmpfile();
1308 if (!tmpf) {
1309 perror("tmpfile");
1310 goto comp_error;
1311 }
1312 zf = xx_zopen(filename, "rb");
1313 if (!zf) {
1314 fprintf(stderr, "Could not open %s\n", filename);
1315 goto comp_error;
1316 }
1317 p = new_array(unsigned char, SIZE_STEP);
1318 cur_read = xx_zread(zf, p, SIZE_STEP);
1319 if (fwrite(p, cur_read, 1, tmpf) != 1) {
1320 fprintf(stderr, "failed writing to temp file for %s\n", filename);
1321 goto comp_error;
1322 }
1323 *len = cur_read;
1324 if (cur_read >= SIZE_STEP) {
1325 while(1) {
1326 int ret;
1327 cur_read = xx_zread(zf, p, SIZE_STEP);
1328 if (cur_read <= 0)
1329 break;
1330 *len += cur_read;
1331 ret = fwrite(p, cur_read, 1, tmpf);
1332 if (ret != 1) {
1333 fprintf(stderr, "failed writing to temp file for %s\n", filename);
1334 goto comp_error;
1335 }
1336 }
1337 }
1338 free(p);
1339 xx_zclose(zf);
1340
1341 if(*len > 0) {
1342 ro = add_ro_cache(filename, fileno(tmpf), *len);
1343 if (!ro)
1344 goto comp_error;
1345 *data = ro->map;
1346 *len = ro->len;
1347 } else {
1348 *data = NULL;
1349 }
1350 fclose(tmpf);
1351 return;
1352
1353 comp_error:
1354 *data = NULL;
1355 *len = 0;
1356 if (tmpf)
1357 fclose(tmpf);
1358 return;
1359 }
1360 #endif /* USE_GZIP_MBOX || USE_BZIP_MBOX */
1361
1362 *len = sb.st_size;
1363 if (*len == 0) {
1364 *data = NULL;
1365 return;
1366 }
1367
1368 if (!S_ISREG(sb.st_mode)) {
1369 *data = NULL;
1370 return;
1371 }
1372
1373 fd = open(filename, O_RDONLY);
1374 if (fd < 0)
1375 {
1376 report_error("open", filename);
1377 *data = NULL;
1378 return;
1379 }
1380
1381 *data = (unsigned char *) mmap(0, *len, PROT_READ, MAP_SHARED, fd, 0);
1382 if (close(fd) < 0)
1383 report_error("close", filename);
1384 if (*data == MAP_FAILED) {
1385 report_error("rfc822:mmap", filename);
1386 *data = NULL;
1387 return;
1388 }
1389 data_alloc_type = ALLOC_MMAP;
1390 }
1391 /*}}}*/
free_ro_mapping(unsigned char * data,int len)1392 void free_ro_mapping(unsigned char *data, int len)/*{{{*/
1393 {
1394 int r;
1395
1396 if(data_alloc_type == ALLOC_MALLOC) {
1397 free(data);
1398 }
1399
1400 if(data_alloc_type == ALLOC_MMAP) {
1401 r = munmap(data, len);
1402 if(r < 0) {
1403 fprintf(stderr, "munmap() errord\n");
1404 exit(1);
1405 }
1406 }
1407 }
1408 /*}}}*/
1409
setup_msg_src(char * filename)1410 static struct msg_src *setup_msg_src(char *filename)/*{{{*/
1411 {
1412 static struct msg_src result;
1413 result.type = MS_FILE;
1414 result.filename = filename;
1415 return &result;
1416 }
1417 /*}}}*/
make_rfc822(char * filename)1418 struct rfc822 *make_rfc822(char *filename)/*{{{*/
1419 {
1420 int len;
1421 unsigned char *data;
1422 struct rfc822 *result;
1423
1424 create_ro_mapping(filename, &data, &len);
1425
1426 /* Don't process empty files */
1427 result = NULL;
1428
1429 if (data)
1430 {
1431 struct msg_src *src;
1432 /* Now process the data */
1433 src = setup_msg_src(filename);
1434 /* For one message per file, ignore missing end boundary condition. */
1435 result = data_to_rfc822(src, (char *) data, len, NULL);
1436
1437 free_ro_mapping(data, len);
1438 }
1439
1440 return result;
1441 }
1442 /*}}}*/
free_rfc822(struct rfc822 * msg)1443 void free_rfc822(struct rfc822 *msg)/*{{{*/
1444 {
1445 struct attachment *a, *na;
1446
1447 if (!msg) return;
1448
1449 if (msg->hdrs.to) free(msg->hdrs.to);
1450 if (msg->hdrs.cc) free(msg->hdrs.cc);
1451 if (msg->hdrs.from) free(msg->hdrs.from);
1452 if (msg->hdrs.subject) free(msg->hdrs.subject);
1453 if (msg->hdrs.message_id) free(msg->hdrs.message_id);
1454 if (msg->hdrs.in_reply_to) free(msg->hdrs.in_reply_to);
1455 if (msg->hdrs.references) free(msg->hdrs.references);
1456
1457 for (a = msg->atts.next; a != &msg->atts; a = na) {
1458 na = a->next;
1459 if (a->filename) free(a->filename);
1460 if (a->ct == CT_MESSAGE_RFC822) {
1461 free_rfc822(a->data.rfc822);
1462 } else {
1463 free(a->data.normal.bytes);
1464 }
1465 free(a);
1466 }
1467 free(msg);
1468 }
1469 /*}}}*/
1470
1471 #ifdef TEST
1472
do_indent(int indent)1473 static void do_indent(int indent)/*{{{*/
1474 {
1475 int i;
1476 for (i=indent; i>0; i--) {
1477 putchar(' ');
1478 }
1479 }
1480 /*}}}*/
show_header(char * tag,char * x,int indent)1481 static void show_header(char *tag, char *x, int indent)/*{{{*/
1482 {
1483 if (x) {
1484 do_indent(indent);
1485 printf("%s: %s\n", tag, x);
1486 }
1487 }
1488 /*}}}*/
show_rfc822(struct rfc822 * msg,int indent)1489 static void show_rfc822(struct rfc822 *msg, int indent)/*{{{*/
1490 {
1491 struct attachment *a;
1492 show_header("From", msg->hdrs.from, indent);
1493 show_header("To", msg->hdrs.to, indent);
1494 show_header("Cc", msg->hdrs.cc, indent);
1495 show_header("Date", msg->hdrs.date, indent);
1496 show_header("Subject", msg->hdrs.subject, indent);
1497
1498 for (a = msg->atts.next; a != &msg->atts; a=a->next) {
1499 printf("========================\n");
1500 switch (a->ct) {
1501 case CT_TEXT_PLAIN: printf("Attachment type text/plain\n"); break;
1502 case CT_TEXT_HTML: printf("Attachment type text/html\n"); break;
1503 case CT_TEXT_OTHER: printf("Attachment type text/non-plain\n"); break;
1504 case CT_MESSAGE_RFC822: printf("Attachment type message/rfc822\n"); break;
1505 case CT_OTHER: printf("Attachment type other\n"); break;
1506 }
1507 if (a->ct != CT_MESSAGE_RFC822) {
1508 printf("%d bytes\n", a->data.normal.len);
1509 }
1510 if ((a->ct == CT_TEXT_PLAIN) || (a->ct == CT_TEXT_HTML) || (a->ct == CT_TEXT_OTHER)) {
1511 printf("----------\n");
1512 printf("%s\n", a->data.normal.bytes);
1513 }
1514 if (a->ct == CT_MESSAGE_RFC822) {
1515 show_rfc822(a->data.rfc822, indent + 4);
1516 }
1517 }
1518 }
1519 /*}}}*/
1520
main(int argc,char ** argv)1521 int main (int argc, char **argv)/*{{{*/
1522 {
1523 struct rfc822 *msg;
1524
1525 if (argc < 2) {
1526 fprintf(stderr, "Need a path\n");
1527 unlock_and_exit(2);
1528 }
1529
1530 msg = make_rfc822(argv[1]);
1531 show_rfc822(msg, 0);
1532 free_rfc822(msg);
1533
1534 /* Print out some stuff */
1535
1536 return 0;
1537 }
1538 /*}}}*/
1539 #endif /* TEST */
1540