1 /*
2   mairix - message index builder and finder for maildir folders.
3 
4  **********************************************************************
5  * Copyright (C) Richard P. Curnow  2002,2003,2004,2005,2006,2007,2010
6  * rfc2047 decode:
7  * Copyright (C) Mikael Ylikoski 2002
8  * gzip mbox support:
9  * Copyright (C) Ico Doornekamp 2005
10  * Copyright (C) Felipe Gustavo de Almeida 2005
11  * bzip2 mbox support:
12  * Copyright (C) Paramjit Oberoi 2005
13  * caching uncompressed mbox data:
14  * Copyright (C) Chris Mason 2006
15  * memory leak fixes:
16  * Copyright (C) Samuel Tardieu 2008
17  *
18  * This program is free software; you can redistribute it and/or modify
19  * it under the terms of version 2 of the GNU General Public License as
20  * published by the Free Software Foundation.
21  *
22  * This program is distributed in the hope that it will be useful, but
23  * WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25  * General Public License for more details.
26  *
27  * You should have received a copy of the GNU General Public License along
28  * with this program; if not, write to the Free Software Foundation, Inc.,
29  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
30  *
31  **********************************************************************
32  */
33 
34 #include "mairix.h"
35 #include "nvp.h"
36 
37 #include <assert.h>
38 #include <ctype.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <fcntl.h>
42 #include <unistd.h>
43 #include <sys/mman.h>
44 #ifdef USE_GZIP_MBOX
45 #  include <zlib.h>
46 #endif
47 #ifdef USE_BZIP_MBOX
48 #  include <bzlib.h>
49 #endif
50 
51 struct DLL {/*{{{*/
52   struct DLL *next;
53   struct DLL *prev;
54 };
55 /*}}}*/
enqueue(void * head,void * x)56 static void enqueue(void *head, void *x)/*{{{*/
57 {
58   /* Declare this way so it can be used with any kind of double linked list
59    * having next & prev pointers in its first two words. */
60   struct DLL *h = (struct DLL *) head;
61   struct DLL *xx = (struct DLL *) x;
62   xx->next = h;
63   xx->prev = h->prev;
64   h->prev->next = xx;
65   h->prev = xx;
66   return;
67 }
68 /*}}}*/
69 
70 enum encoding_type {/*{{{*/
71   ENC_UNKNOWN,
72   ENC_NONE,
73   ENC_BINARY,
74   ENC_7BIT,
75   ENC_8BIT,
76   ENC_QUOTED_PRINTABLE,
77   ENC_BASE64,
78   ENC_UUENCODE
79 };
80 /*}}}*/
81 struct content_type_header {/*{{{*/
82   const char *major; /* e.g. text */
83   const char *minor; /* e.g. plain */
84   const char *boundary; /* for multipart */
85   /* charset? */
86 };
87 /*}}}*/
88 struct line {/*{{{*/
89   struct line *next;
90   struct line *prev;
91   char *text;
92 };
93 /*}}}*/
94 
init_headers(struct headers * hdrs)95 static void init_headers(struct headers *hdrs)/*{{{*/
96 {
97   hdrs->to = NULL;
98   hdrs->cc = NULL;
99   hdrs->from = NULL;
100   hdrs->subject = NULL;
101   hdrs->message_id = NULL;
102   hdrs->in_reply_to = NULL;
103   hdrs->references = NULL;
104   hdrs->date = 0;
105   hdrs->flags.seen = 0;
106   hdrs->flags.replied = 0;
107   hdrs->flags.flagged = 0;
108 };
109 /*}}}*/
splice_header_lines(struct line * header)110 static void splice_header_lines(struct line *header)/*{{{*/
111 {
112   /* Deal with newline then tab in header */
113   struct line *x, *next;
114   for (x=header->next; x!=header; x=next) {
115 #if 0
116     printf("next header, x->text=%08lx\n", x->text);
117     printf("header=<%s>\n", x->text);
118 #endif
119     next = x->next;
120     if (isspace(x->text[0] & 0xff)) {
121       /* Glue to previous line */
122       char *p, *newbuf, *oldbuf;
123       struct line *y;
124       for (p=x->text; *p; p++) {
125         if (!isspace(*(unsigned char *)p)) break;
126       }
127       p--; /* point to final space */
128       y = x->prev;
129 #if 0
130       printf("y=%08lx p=%08lx\n", y->text, p);
131 #endif
132       newbuf = new_array(char, strlen(y->text) + strlen(p) + 1);
133       strcpy(newbuf, y->text);
134       strcat(newbuf, p);
135       oldbuf = y->text;
136       y->text = newbuf;
137       free(oldbuf);
138       y->next = x->next;
139       x->next->prev = y;
140       free(x->text);
141       free(x);
142     }
143   }
144   return;
145 }
146 /*}}}*/
audit_header(struct line * header)147 static int audit_header(struct line *header)/*{{{*/
148 {
149   /* Check for obvious broken-ness
150    * 1st line has no leading spaces, single word then colon
151    * following lines have leading spaces or single word followed by colon
152    * */
153   struct line *x;
154   int first=1;
155   int count=1;
156   for (x=header->next; x!=header; x=x->next) {
157     int has_leading_space=0;
158     int is_blank;
159     int has_word_colon=0;
160 
161     if (1 || first) {
162       /* Ignore any UUCP or mbox style From line at the start */
163       if (!strncmp("From ", x->text, 5)) {
164         continue;
165       }
166       /* Ignore escaped From line at the start */
167       if (!strncmp(">From ", x->text, 6)) {
168         continue;
169       }
170     }
171 
172     is_blank = !(x->text[0]);
173     if (!is_blank) {
174       char *p;
175       int saw_char = 0;
176       has_leading_space = isspace(x->text[0] & 0xff);
177       has_word_colon = 0; /* default */
178       p = x->text;
179       while(*p) {
180         if(*p == ':') {
181           has_word_colon = saw_char;
182           break;
183         } else if (isspace(*(unsigned char *) p)) {
184           has_word_colon = 0;
185           break;
186         } else {
187           saw_char = 1;
188         }
189         p++;
190       }
191     }
192 
193     if (( first && (is_blank || has_leading_space || !has_word_colon)) ||
194         (!first && (is_blank || !(has_leading_space || has_word_colon)))) {
195 #if 0
196       fprintf(stderr, "Header line %d <%s> fails because:", count, x->text);
197       if (first && is_blank) { fprintf(stderr, " [first && is_blank]"); }
198       if (first && has_leading_space) { fprintf(stderr, " [first && has_leading_space]"); }
199       if (first && !has_word_colon) { fprintf(stderr, " [first && !has_word_colon]"); }
200       if (!first && is_blank) { fprintf(stderr, " [!first && is_blank]"); }
201       if (!first && !(has_leading_space||has_word_colon)) { fprintf(stderr, " [!first && !has_leading_space||has_word_colon]"); }
202       fprintf(stderr, "\n");
203 #endif
204       /* Header fails the audit */
205       return 0;
206     }
207     first = 0;
208     count++;
209   }
210   /* If we get here the header must have been OK */
211   return 1;
212 }/*}}}*/
match_string(const char * ref,const char * candidate)213 static int match_string(const char *ref, const char *candidate)/*{{{*/
214 {
215   int len = strlen(ref);
216   return !strncasecmp(ref, candidate, len);
217 }
218 /*}}}*/
219 
220 static char equal_table[] = {/*{{{*/
221   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 00-0f */
222   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 10-1f */
223   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 20-2f */
224   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,  /* 30-3f */
225   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 40-4f */
226   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 50-5f */
227   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 60-6f */
228   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 70-7f */
229   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 80-8f */
230   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 90-9f */
231   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* a0-af */
232   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* b0-bf */
233   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* c0-cf */
234   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* d0-df */
235   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* e0-ef */
236   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0   /* f0-ff */
237 };
238 /*}}}*/
239 static int base64_table[] = {/*{{{*/
240    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /* 00-0f */
241    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /* 10-1f */
242    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  /* 20-2f */
243    52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,   0,  -1,  -1,  /* 30-3f */
244    -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  /* 40-4f */
245    15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,  /* 50-5f */
246    -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  /* 60-6f */
247    41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,  /* 70-7f */
248    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /* 80-8f */
249    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /* 90-9f */
250    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /* a0-af */
251    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /* b0-bf */
252    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /* c0-cf */
253    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /* d0-df */
254    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /* e0-ef */
255    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1   /* f0-ff */
256 };
257 /*}}}*/
hex_to_val(char x)258 static int hex_to_val(char x) {/*{{{*/
259   switch (x) {
260     case '0':
261     case '1':
262     case '2':
263     case '3':
264     case '4':
265     case '5':
266     case '6':
267     case '7':
268     case '8':
269     case '9':
270       return (x - '0');
271       break;
272     case 'a':
273     case 'b':
274     case 'c':
275     case 'd':
276     case 'e':
277     case 'f':
278       return 10 + (x - 'a');
279       break;
280     case 'A':
281     case 'B':
282     case 'C':
283     case 'D':
284     case 'E':
285     case 'F':
286       return 10 + (x - 'A');
287       break;
288     default:
289       return 0;
290   }
291 }
292 /*}}}*/
decode_header_value(char * text)293 static void decode_header_value(char *text){/*{{{*/
294   /* rfc2047 decode, written by Mikael Ylikoski */
295 
296   char *s, *a, *b, *e, *p, *q;
297 
298   for (p = q = s = text; (s = strstr(s, "=?")); s = e + 2) {
299     if (p == q)
300       p = q = s;
301     else
302       while (q != s)
303         *p++ = *q++;
304     s += 2;
305     a = strchr(s, '?');
306     if (!a) break;
307     a++;
308     b = strchr(a, '?');
309     if (!b) break;
310     b++;
311     e = strstr(b, "?=");
312     if (!e) break;
313     /* have found an encoded-word */
314     if (b - a != 2)
315       continue; /* unknown encoding */
316     if (*a == 'q' || *a == 'Q') {
317       int val;
318       q = b;
319       while (q < e) {
320         if (*q == '_') {
321           *p++ = 0x20;
322           q++;
323         } else if (*q == '=') {
324           q++;
325           val = hex_to_val(*q++) << 4;
326           val += hex_to_val(*q++);
327           *p++ = val;
328         } else
329           *p++ = *q++;
330       }
331     } else if (*a == 'b' || *a == 'B') {
332       int reg, nc, eq; /* register, #characters in reg, #equals */
333       int dc; /* decoded character */
334       eq = reg = nc = 0;
335       for (q = b; q < e; q++) {
336         unsigned char cq = *(unsigned char *)q;
337         dc = base64_table[cq];
338         eq += equal_table[cq];
339 
340         if (dc >= 0) {
341           reg <<= 6;
342           reg += dc;
343           nc++;
344           if (nc == 4) {
345             *p++ = ((reg >> 16) & 0xff);
346             if (eq < 2) *p++ = ((reg >> 8) & 0xff);
347             if (eq < 1) *p++ = reg & 0xff;
348             nc = reg = 0;
349             if (eq) break;
350           }
351         }
352       }
353     } else {
354       continue; /* unknown encoding */
355     }
356     q = e + 2;
357   }
358   if (p == q) return;
359   while (*q != '\0')
360     *p++ = *q++;
361   *p = '\0';
362 }
363 /*}}}*/
copy_header_value(char * text)364 static char *copy_header_value(char *text){/*{{{*/
365   char *p;
366   for (p = text; *p && (*p != ':'); p++) ;
367   if (!*p) return NULL;
368   p++;
369   p = new_string(p);
370   decode_header_value(p);
371   return p;
372 }
373 /*}}}*/
copy_or_concat_header_value(char ** previous,char * text)374 static void copy_or_concat_header_value(char **previous, char *text){/*{{{*/
375   char *p = copy_header_value(text);
376   if (*previous)
377   {
378     *previous = extend_string(*previous, ", ");
379     *previous = extend_string(*previous, p);
380     free(p);
381   }
382   else
383     *previous = p;
384 }
385 /*}}}*/
decode_encoding_type(const char * e)386 static enum encoding_type decode_encoding_type(const char *e)/*{{{*/
387 {
388   enum encoding_type result;
389   const char *p;
390   if (!e) {
391     result = ENC_NONE;
392   } else {
393     for (p=e; *p && isspace(*(unsigned char *)p); p++) ;
394     if (   match_string("7bit", p)
395         || match_string("7-bit", p)
396         || match_string("7 bit", p)) {
397       result = ENC_7BIT;
398     } else if (match_string("8bit", p)
399             || match_string("8-bit", p)
400             || match_string("8 bit", p)) {
401       result = ENC_8BIT;
402     } else if (match_string("quoted-printable", p)) {
403       result = ENC_QUOTED_PRINTABLE;
404     } else if (match_string("base64", p)) {
405       result = ENC_BASE64;
406     } else if (match_string("binary", p)) {
407       result = ENC_BINARY;
408     } else if (match_string("x-uuencode", p)) {
409       result = ENC_UUENCODE;
410     } else {
411       fprintf(stderr, "Warning: unknown encoding type: '%s'\n", e);
412       result = ENC_UNKNOWN;
413     }
414   }
415   return result;
416 }
417 /*}}}*/
parse_content_type(struct nvp * ct_nvp,struct content_type_header * result)418 static void parse_content_type(struct nvp *ct_nvp, struct content_type_header *result)/*{{{*/
419 {
420   result->major = NULL;
421   result->minor = NULL;
422   result->boundary = NULL;
423 
424   result->major = nvp_major(ct_nvp);
425   if (result->major) {
426     result->minor = nvp_minor(ct_nvp);
427   } else {
428     result->minor = NULL;
429     result->major = nvp_first(ct_nvp);
430   }
431 
432   result->boundary = nvp_lookupcase(ct_nvp, "boundary");
433 }
434 
435 /*}}}*/
looking_at_ws_then_newline(char * start)436 static char *looking_at_ws_then_newline(char *start)/*{{{*/
437 {
438   char *result;
439   result = start;
440   do {
441          if (*result == '\n')   return result;
442     else if (!isspace(*(unsigned char *) result)) return NULL;
443     else                        result++;
444   } while (1);
445 
446   /* Can't get here */
447   assert(0);
448 }
449 /*}}}*/
450 
unencode_data(struct msg_src * src,char * input,int input_len,const char * enc,int * output_len)451 static char *unencode_data(struct msg_src *src, char *input, int input_len, const char *enc, int *output_len)/*{{{*/
452 {
453   enum encoding_type encoding;
454   char *result, *end_result;
455   char *end_input;
456 
457   encoding = decode_encoding_type(enc);
458   end_input = input + input_len;
459 
460   /* All mime encodings result in expanded data, so this is guaranteed to
461    * safely oversize the output array */
462   result = new_array(char, input_len + 1);
463 
464   /* Now decode */
465   switch (encoding) {
466     case ENC_7BIT:/*{{{*/
467     case ENC_8BIT:
468     case ENC_BINARY:
469     case ENC_NONE:
470       {
471         memcpy(result, input, input_len);
472         end_result = result + input_len;
473       }
474       break;
475 /*}}}*/
476     case ENC_QUOTED_PRINTABLE:/*{{{*/
477       {
478         char *p, *q;
479         p = result;
480         for (p=result, q=input;
481              q<end_input; ) {
482 
483           if (*q == '=') {
484             /* followed by optional whitespace then \n?  discard them. */
485             char *r;
486             int val;
487             q++;
488             r = looking_at_ws_then_newline(q);
489             if (r) {
490               q = r + 1; /* Point into next line */
491               continue;
492             }
493             /* not that case. */
494             val =  hex_to_val(*q++) << 4;
495             val += hex_to_val(*q++);
496             *p++ = val;
497 
498           } else {
499             /* Normal character */
500             *p++ = *q++;
501           }
502         }
503         end_result = p;
504       }
505       break;
506 /*}}}*/
507     case ENC_BASE64:/*{{{*/
508       {
509         char *p, *q;
510         int reg, nc, eq; /* register, #characters in reg, #equals */
511         int dc; /* decoded character */
512         eq = reg = nc = 0;
513         for (q=input, p=result; q<end_input; q++) {
514           unsigned char cq =  * (unsigned char *)q;
515           /* Might want a 256 entry array instead of this sub-optimal mess
516            * eventually. */
517           dc = base64_table[cq];
518           eq += equal_table[cq];
519 
520           if (dc >= 0) {
521             reg <<= 6;
522             reg += dc;
523             nc++;
524             if (nc == 4) {
525               *p++ = ((reg >> 16) & 0xff);
526               if (eq < 2) *p++ = ((reg >> 8) & 0xff);
527               if (eq < 1) *p++ = reg & 0xff;
528               nc = reg = 0;
529               if (eq) goto done_base_64;
530             }
531           }
532         }
533       done_base_64:
534         end_result = p;
535       }
536       break;
537         /*}}}*/
538     case ENC_UUENCODE:/*{{{*/
539       {
540         char *p, *q;
541         /* Find 'begin ' */
542         for (q = input; q < end_input - 6 && memcmp(q, "begin ", 6); q++)
543           ;
544         q += 6;
545         /* skip to EOL */
546         while (q < end_input && *q != '\n')
547           q++;
548         p = result;
549         while (q < end_input) { /* process line */
550 #define DEC(c) (((c) - ' ') & 077)
551           int len = DEC(*q++);
552           if (len == 0)
553             break;
554           for (; len > 0; q += 4, len -= 3) {
555             if (len >= 3) {
556               *p++ = DEC(q[0]) << 2 | DEC(q[1]) >> 4;
557               *p++ = DEC(q[1]) << 4 | DEC(q[2]) >> 2;
558               *p++ = DEC(q[2]) << 6 | DEC(q[3]);
559             } else {
560               if (len >= 1)
561                 *p++ = DEC(q[0]) << 2 | DEC(q[1]) >> 4;
562               if (len >= 2)
563                 *p++ = DEC(q[1]) << 4 | DEC(q[2]) >> 2;
564             }
565           }
566           while (q < end_input && *q != '\n')
567             q++;
568         }
569         end_result = p;
570       }
571       break;
572         /*}}}*/
573     case ENC_UNKNOWN:/*{{{*/
574       fprintf(stderr, "Unknown encoding type in %s\n", format_msg_src(src));
575       /* fall through - ignore this data */
576     /*}}}*/
577     default:/*{{{*/
578       end_result = result;
579       break;
580       /*}}}*/
581   }
582   *output_len = end_result - result;
583   result[*output_len] = '\0'; /* for convenience with text/plain etc to make it printable */
584   return result;
585 }
586 /*}}}*/
format_msg_src(struct msg_src * src)587 char *format_msg_src(struct msg_src *src)/*{{{*/
588 {
589   static char *buffer = NULL;
590   static int buffer_len = 0;
591   char *result;
592   int len;
593   switch (src->type) {
594     case MS_FILE:
595       result = src->filename;
596       break;
597     case MS_MBOX:
598       len = strlen(src->filename);
599       len += 32;
600       if (!buffer || (len > buffer_len)) {
601         free(buffer);
602         buffer = new_array(char, len);
603         buffer_len = len;
604       }
605       sprintf(buffer, "%s[%d,%d)", src->filename,
606           (int) src->start, (int) (src->start + src->len));
607       result = buffer;
608       break;
609     default:
610       result = NULL;
611       break;
612   }
613   return result;
614 }
615 /*}}}*/
split_and_splice_header(struct msg_src * src,char * data,struct line * header,char ** body_start)616 static int split_and_splice_header(struct msg_src *src, char *data, struct line *header, char **body_start)/*{{{*/
617 {
618   char *sol, *eol;
619   int blank_line;
620   header->next = header->prev = header;
621   sol = data;
622   do {
623     if (!*sol) break;
624     blank_line = 1; /* until proven otherwise */
625     eol = sol;
626     while (*eol && (*eol != '\n')) {
627       if (!isspace(*(unsigned char *) eol)) blank_line = 0;
628       eol++;
629     }
630     if (*eol == '\n') {
631       if (!blank_line) {
632         int line_length = eol - sol;
633         char *line_text = new_array(char, 1 + line_length);
634         struct line *new_header;
635 
636         strncpy(line_text, sol, line_length);
637         line_text[line_length] = '\0';
638         new_header = new(struct line);
639         new_header->text = line_text;
640         enqueue(header, new_header);
641       }
642       sol = eol + 1; /* Start of next line */
643     } else { /* must be null char */
644       fprintf(stderr, "Got null character whilst processing header of %s\n",
645           format_msg_src(src));
646       return -1; /* & leak memory */
647     }
648   } while (!blank_line);
649 
650   *body_start = sol;
651 
652   if (audit_header(header)) {
653     splice_header_lines(header);
654     return 0;
655   } else {
656 #if 0
657     /* Caller generates message */
658     fprintf(stderr, "Message had bad rfc822 headers, ignoring\n");
659 #endif
660     return -1;
661   }
662 }
663 /*}}}*/
664 
665 /* Forward prototypes */
666 static void do_multipart(struct msg_src *src, char *input, int input_len,
667     const char *boundary, struct attachment *atts,
668     enum data_to_rfc822_error *error);
669 
670 /*{{{ do_body() */
do_body(struct msg_src * src,char * body_start,int body_len,struct nvp * ct_nvp,struct nvp * cte_nvp,struct nvp * cd_nvp,struct attachment * atts,enum data_to_rfc822_error * error)671 static void do_body(struct msg_src *src,
672     char *body_start, int body_len,
673     struct nvp *ct_nvp, struct nvp *cte_nvp,
674     struct nvp *cd_nvp,
675     struct attachment *atts,
676     enum data_to_rfc822_error *error)
677 {
678   char *decoded_body;
679   int decoded_body_len;
680   const char *content_transfer_encoding;
681   content_transfer_encoding = NULL;
682   if (cte_nvp) {
683     content_transfer_encoding = nvp_first(cte_nvp);
684     if (!content_transfer_encoding) {
685       fprintf(stderr, "Giving up on %s, content_transfer_encoding header not parseable\n",
686           format_msg_src(src));
687       return;
688     }
689   }
690 
691   decoded_body = unencode_data(src, body_start, body_len, content_transfer_encoding, &decoded_body_len);
692 
693   if (ct_nvp) {
694     struct content_type_header ct;
695     parse_content_type(ct_nvp, &ct);
696     if (ct.major && !strcasecmp(ct.major, "multipart")) {
697       do_multipart(src, decoded_body, decoded_body_len, ct.boundary, atts, error);
698       /* Don't need decoded body any longer - copies have been taken if
699        * required when handling multipart attachments. */
700       free(decoded_body);
701       if (error && (*error == DTR8_MISSING_END)) return;
702     } else {
703       /* unipart */
704       struct attachment *new_att;
705       const char *disposition;
706       new_att = new(struct attachment);
707       disposition = cd_nvp ? nvp_first(cd_nvp) : NULL;
708       if (disposition && !strcasecmp(disposition, "attachment")) {
709         const char *lookup;
710         lookup = nvp_lookupcase(cd_nvp, "filename");
711         if (lookup) {
712           new_att->filename = new_string(lookup);
713         } else {
714           /* Some messages have name=... in content-type: instead of
715            * filename=... in content-disposition. */
716           lookup = nvp_lookup(ct_nvp, "name");
717           if (lookup) {
718             new_att->filename = new_string(lookup);
719           } else {
720             new_att->filename = NULL;
721           }
722         }
723       } else {
724         new_att->filename = NULL;
725       }
726       if (ct.major && !strcasecmp(ct.major, "text")) {
727         if (ct.minor && !strcasecmp(ct.minor, "plain")) {
728           new_att->ct = CT_TEXT_PLAIN;
729         } else if (ct.minor && !strcasecmp(ct.minor, "html")) {
730           new_att->ct = CT_TEXT_HTML;
731         } else {
732           new_att->ct = CT_TEXT_OTHER;
733         }
734       } else if (ct.major && !strcasecmp(ct.major, "message") &&
735                  ct.minor && !strcasecmp(ct.minor, "rfc822")) {
736         new_att->ct = CT_MESSAGE_RFC822;
737       } else {
738         new_att->ct = CT_OTHER;
739       }
740 
741       if (new_att->ct == CT_MESSAGE_RFC822) {
742         new_att->data.rfc822 = data_to_rfc822(src, decoded_body, decoded_body_len, error);
743         free(decoded_body); /* data no longer needed */
744       } else {
745         new_att->data.normal.len = decoded_body_len;
746         new_att->data.normal.bytes = decoded_body;
747       }
748       enqueue(atts, new_att);
749     }
750   } else {
751     /* Treat as text/plain {{{*/
752     struct attachment *new_att;
753     new_att = new(struct attachment);
754     new_att->filename = NULL;
755     new_att->ct = CT_TEXT_PLAIN;
756     new_att->data.normal.len = decoded_body_len;
757     /* Add null termination on the end */
758     new_att->data.normal.bytes = new_array(char, decoded_body_len + 1);
759     memcpy(new_att->data.normal.bytes, decoded_body, decoded_body_len + 1);
760     free(decoded_body);
761     enqueue(atts, new_att);/*}}}*/
762   }
763 }
764 /*}}}*/
765 /*{{{ do_attachment() */
do_attachment(struct msg_src * src,char * start,char * after_end,struct attachment * atts)766 static void do_attachment(struct msg_src *src,
767     char *start, char *after_end,
768     struct attachment *atts)
769 {
770   /* decode attachment and add to attachment list */
771   struct line header, *x, *nx;
772   char *body_start;
773   int body_len;
774 
775   struct nvp *ct_nvp, *cte_nvp, *cd_nvp, *nvp;
776 
777   if (split_and_splice_header(src, start, &header, &body_start) < 0) {
778     fprintf(stderr, "Giving up on attachment with bad header in %s\n",
779         format_msg_src(src));
780     return;
781   }
782 
783   /* Extract key headers */
784   ct_nvp = cte_nvp = cd_nvp = NULL;
785   for (x=header.next; x!=&header; x=x->next) {
786     if ((nvp = make_nvp(src, x->text, "content-type:"))) {
787       ct_nvp = nvp;
788     } else if ((nvp = make_nvp(src, x->text, "content-transfer-encoding:"))) {
789       cte_nvp = nvp;
790     } else if ((nvp = make_nvp(src, x->text, "content-disposition:"))) {
791       cd_nvp = nvp;
792     }
793   }
794 
795 #if 0
796   if (ct_nvp) {
797     fprintf(stderr, "======\n");
798     fprintf(stderr, "Dump of content-type hdr\n");
799     nvp_dump(ct_nvp, stderr);
800     free(ct_nvp);
801   }
802 
803   if (cte_nvp) {
804     fprintf(stderr, "======\n");
805     fprintf(stderr, "Dump of content-transfer-encoding hdr\n");
806     nvp_dump(cte_nvp, stderr);
807     free(cte_nvp);
808   }
809 #endif
810 
811   if (body_start > after_end) {
812     /* This is a (maliciously?) b0rken attachment, e.g. maybe empty */
813     if (verbose) {
814       fprintf(stderr, "Message %s contains an invalid attachment, length=%d bytes\n",
815           format_msg_src(src), (int)(after_end - start));
816     }
817   } else {
818     body_len = after_end - body_start;
819     /* Ignore errors in nested body parts. */
820     do_body(src, body_start, body_len, ct_nvp, cte_nvp, cd_nvp, atts, NULL);
821   }
822 
823   /* Free header memory */
824   for (x=header.next; x!=&header; x=nx) {
825     nx = x->next;
826     free(x->text);
827     free(x);
828   }
829 
830   if (ct_nvp) free_nvp(ct_nvp);
831   if (cte_nvp) free_nvp(cte_nvp);
832   if (cd_nvp) free_nvp(cd_nvp);
833 }
834 /*}}}*/
835 /*{{{ do_multipart() */
do_multipart(struct msg_src * src,char * input,int input_len,const char * boundary,struct attachment * atts,enum data_to_rfc822_error * error)836 static void do_multipart(struct msg_src *src,
837     char *input, int input_len,
838     const char *boundary,
839     struct attachment *atts,
840     enum data_to_rfc822_error *error)
841 {
842   char *b0, *b1, *be, *bx;
843   char *line_after_b0, *start_b1_search_from;
844   int boundary_len;
845   int looking_at_end_boundary;
846 
847   if (!boundary) {
848     fprintf(stderr, "Can't process multipart message %s with no boundary string\n",
849         format_msg_src(src));
850     if (error) *error = DTR8_MULTIPART_SANS_BOUNDARY;
851     return;
852   }
853 
854   boundary_len = strlen(boundary);
855 
856   b0 = NULL;
857   line_after_b0 = input;
858   be = input + input_len;
859 
860   do {
861     int boundary_ok;
862     start_b1_search_from = line_after_b0;
863     do {
864       /* reject boundaries that aren't a whole line */
865       b1 = NULL;
866       for (bx = start_b1_search_from; bx < be - (boundary_len + 2); bx++) {
867         if (bx[0] == '-' && bx[1] == '-' &&
868             !strncmp(bx+2, boundary, boundary_len)) {
869           b1 = bx;
870           break;
871         }
872       }
873       if (!b1) {
874         if (error)
875           *error = DTR8_MISSING_END;
876         return;
877       }
878 
879       looking_at_end_boundary = (b1+boundary_len+3 < be) && (b1[boundary_len+2] == '-' &&
880           b1[boundary_len+3] == '-');
881       boundary_ok = 1;
882       if ((b1 > input) && (*(b1-1) != '\n'))
883         boundary_ok = 0;
884       if (!looking_at_end_boundary && (b1 + boundary_len + 3 < be) && !(
885           ((b1 + boundary_len + 2 < input + input_len) && (*(b1 + boundary_len + 2) == '\n')) ||
886           ((b1 + boundary_len + 3 < input + input_len) && (*(b1 + boundary_len + 2) == '\r') && (*(b1 + boundary_len + 3) == '\n'))
887       ))
888         boundary_ok = 0;
889       if (!boundary_ok) {
890         char *eol = strchr(b1, '\n');
891         if (!eol) {
892           fprintf(stderr, "Oops, didn't find another normal boundary in %s\n",
893               format_msg_src(src));
894           return;
895         }
896         start_b1_search_from = 1 + eol;
897       }
898     } while (!boundary_ok);
899 
900     /* b1 is now looking at a good boundary, which might be the final one */
901 
902     if (b0) {
903       /* don't treat preamble as an attachment */
904       do_attachment(src, line_after_b0, b1, atts);
905     }
906 
907     b0 = b1;
908     line_after_b0 = strchr(b0, '\n');
909     if (line_after_b0 == 0)
910       line_after_b0 = b0 + strlen(b0);
911     else
912       ++line_after_b0;
913   } while (b1 < be && !looking_at_end_boundary);
914 }
915 /*}}}*/
parse_rfc822_date(char * date_string)916 static time_t parse_rfc822_date(char *date_string)/*{{{*/
917 {
918   struct tm tm;
919   char *s, *z;
920   /* Format [weekday ,] day-of-month month year hour:minute:second timezone.
921 
922      Some of the ideas, sanity checks etc taken from parse.c in the mutt
923      sources, credit to Michael R. Elkins et al
924      */
925 
926   s = date_string;
927   z = strchr(s, ',');
928   if (z) s = z + 1;
929   while (*s && isspace(*s)) s++;
930   /* Should now be looking at day number */
931   if (!isdigit(*s)) goto tough_cheese;
932   tm.tm_mday = atoi(s);
933   if (tm.tm_mday > 31) goto tough_cheese;
934 
935   while (isdigit(*s)) s++;
936   while (*s && isspace(*s)) s++;
937   if (!*s) goto tough_cheese;
938   if      (!strncasecmp(s, "jan", 3)) tm.tm_mon =  0;
939   else if (!strncasecmp(s, "feb", 3)) tm.tm_mon =  1;
940   else if (!strncasecmp(s, "mar", 3)) tm.tm_mon =  2;
941   else if (!strncasecmp(s, "apr", 3)) tm.tm_mon =  3;
942   else if (!strncasecmp(s, "may", 3)) tm.tm_mon =  4;
943   else if (!strncasecmp(s, "jun", 3)) tm.tm_mon =  5;
944   else if (!strncasecmp(s, "jul", 3)) tm.tm_mon =  6;
945   else if (!strncasecmp(s, "aug", 3)) tm.tm_mon =  7;
946   else if (!strncasecmp(s, "sep", 3)) tm.tm_mon =  8;
947   else if (!strncasecmp(s, "oct", 3)) tm.tm_mon =  9;
948   else if (!strncasecmp(s, "nov", 3)) tm.tm_mon = 10;
949   else if (!strncasecmp(s, "dec", 3)) tm.tm_mon = 11;
950   else goto tough_cheese;
951 
952   while (!isspace(*s)) s++;
953   while (*s && isspace(*s)) s++;
954   if (!isdigit(*s)) goto tough_cheese;
955   tm.tm_year = atoi(s);
956   if (tm.tm_year < 70) {
957     tm.tm_year += 100;
958   } else if (tm.tm_year >= 1900) {
959     tm.tm_year -= 1900;
960   }
961 
962   while (isdigit(*s)) s++;
963   while (*s && isspace(*s)) s++;
964   if (!*s) goto tough_cheese;
965 
966   /* Now looking at hms */
967   /* For now, forget this.  The searching will be vague enough that nearest day is good enough. */
968 
969   tm.tm_hour = 0;
970   tm.tm_min = 0;
971   tm.tm_sec = 0;
972   tm.tm_isdst = 0;
973   return mktime(&tm);
974 
975 tough_cheese:
976   return (time_t) -1; /* default value */
977 }
978 /*}}}*/
979 
scan_status_flags(const char * s,struct headers * hdrs)980 static void scan_status_flags(const char *s, struct headers *hdrs)/*{{{*/
981 {
982   const char *p;
983   for (p=s; *p; p++) {
984     switch (*p) {
985       case 'R': hdrs->flags.seen = 1; break;
986       case 'A': hdrs->flags.replied = 1; break;
987       case 'F': hdrs->flags.flagged = 1; break;
988       default: break;
989     }
990   }
991 }
992 /*}}}*/
993 
994 /*{{{ data_to_rfc822() */
data_to_rfc822(struct msg_src * src,char * data,int length,enum data_to_rfc822_error * error)995 struct rfc822 *data_to_rfc822(struct msg_src *src,
996     char *data, int length,
997     enum data_to_rfc822_error *error)
998 {
999   struct rfc822 *result;
1000   char *body_start;
1001   struct line header;
1002   struct line *x, *nx;
1003   struct nvp *ct_nvp, *cte_nvp, *cd_nvp, *nvp;
1004   int body_len;
1005 
1006   if (error) *error = DTR8_OK; /* default */
1007   result = new(struct rfc822);
1008   init_headers(&result->hdrs);
1009   result->atts.next = result->atts.prev = &result->atts;
1010 
1011   if (split_and_splice_header(src, data, &header, &body_start) < 0) {
1012     if (verbose) {
1013       fprintf(stderr, "Giving up on message %s with bad header\n",
1014           format_msg_src(src));
1015     }
1016     if (error) *error = DTR8_BAD_HEADERS;
1017     return NULL;
1018   }
1019 
1020   /* Extract key headers {{{*/
1021   ct_nvp = cte_nvp = cd_nvp = NULL;
1022   for (x=header.next; x!=&header; x=x->next) {
1023     if      (match_string("to:", x->text))
1024       copy_or_concat_header_value(&result->hdrs.to, x->text);
1025     else if (match_string("cc:", x->text))
1026       copy_or_concat_header_value(&result->hdrs.cc, x->text);
1027     else if (!result->hdrs.from && match_string("from:", x->text))
1028       result->hdrs.from = copy_header_value(x->text);
1029     else if (!result->hdrs.subject && match_string("subject:", x->text))
1030       result->hdrs.subject = copy_header_value(x->text);
1031     else if (!ct_nvp && (nvp = make_nvp(src, x->text, "content-type:")))
1032       ct_nvp = nvp;
1033     else if (!cte_nvp && (nvp = make_nvp(src, x->text, "content-transfer-encoding:")))
1034       cte_nvp = nvp;
1035     else if (!cd_nvp && (nvp = make_nvp(src, x->text, "content-disposition:")))
1036       cd_nvp = nvp;
1037     else if (!result->hdrs.date && match_string("date:", x->text)) {
1038       char *date_string = copy_header_value(x->text);
1039       result->hdrs.date = parse_rfc822_date(date_string);
1040       free(date_string);
1041     } else if (!result->hdrs.message_id && match_string("message-id:", x->text))
1042       result->hdrs.message_id = copy_header_value(x->text);
1043     else if (!result->hdrs.in_reply_to && match_string("in-reply-to:", x->text))
1044       result->hdrs.in_reply_to = copy_header_value(x->text);
1045     else if (!result->hdrs.references && match_string("references:", x->text))
1046       result->hdrs.references = copy_header_value(x->text);
1047     else if (match_string("status:", x->text))
1048       scan_status_flags(x->text + sizeof("status:"), &result->hdrs);
1049     else if (match_string("x-status:", x->text))
1050       scan_status_flags(x->text + sizeof("x-status:"), &result->hdrs);
1051   }
1052 /*}}}*/
1053 
1054   /* Process body */
1055   body_len = length - (body_start - data);
1056   do_body(src, body_start, body_len, ct_nvp, cte_nvp, cd_nvp, &result->atts, error);
1057 
1058   /* Free header memory */
1059   for (x=header.next; x!=&header; x=nx) {
1060     nx = x->next;
1061     free(x->text);
1062     free(x);
1063   }
1064 
1065   if (ct_nvp) free_nvp(ct_nvp);
1066   if (cte_nvp) free_nvp(cte_nvp);
1067   if (cd_nvp) free_nvp(cd_nvp);
1068 
1069   return result;
1070 
1071 }
1072 /*}}}*/
1073 
1074 #define ALLOC_NONE   1
1075 #define ALLOC_MMAP   2
1076 #define ALLOC_MALLOC 3
1077 
1078 int data_alloc_type;
1079 
1080 #if USE_GZIP_MBOX || USE_BZIP_MBOX
1081 
1082 #define SIZE_STEP (8 * 1024 * 1024)
1083 
1084 #define COMPRESSION_NONE 0
1085 #define COMPRESSION_GZIP 1
1086 #define COMPRESSION_BZIP 2
1087 
get_compression_type(const char * filename)1088 static int get_compression_type(const char *filename) {/*{{{*/
1089   size_t len = strlen(filename);
1090   int ptr;
1091 
1092 #ifdef USE_GZIP_MBOX
1093   ptr = len - 3;
1094   if (len > 3 && strncasecmp(filename + ptr, ".gz", 3) == 0) {
1095     return COMPRESSION_GZIP;
1096   }
1097 #endif
1098 
1099 #ifdef USE_BZIP_MBOX
1100   ptr = len - 4;
1101   if (len > 3 && strncasecmp(filename + ptr, ".bz2", 4) == 0) {
1102     return COMPRESSION_BZIP;
1103   }
1104 #endif
1105 
1106   return COMPRESSION_NONE;
1107 }
1108 /*}}}*/
1109 
is_compressed(const char * filename)1110 static int is_compressed(const char *filename) {/*{{{*/
1111   return (get_compression_type(filename) != COMPRESSION_NONE);
1112 }
1113 /*}}}*/
1114 
1115 struct zFile {/*{{{*/
1116   union {
1117     /* Both gzFile and BZFILE* are defined as void pointers
1118      * in their respective header files.
1119      */
1120 #ifdef USE_GZIP_MBOX
1121     gzFile gzf;
1122 #endif
1123 #ifdef USE_BZIP_MBOX
1124     BZFILE *bzf;
1125 #endif
1126     void *zptr;
1127   } foo;
1128   int type;
1129 };
1130 /*}}}*/
1131 
xx_zopen(const char * filename,const char * mode)1132 static struct zFile * xx_zopen(const char *filename, const char *mode) {/*{{{*/
1133   struct zFile *zf = new(struct zFile);
1134 
1135   zf->type = get_compression_type(filename);
1136   switch (zf->type) {
1137 #ifdef USE_GZIP_MBOX
1138     case COMPRESSION_GZIP:
1139       zf->foo.gzf = gzopen(filename, "rb");
1140       break;
1141 #endif
1142 #ifdef USE_BZIP_MBOX
1143     case COMPRESSION_BZIP:
1144       zf->foo.bzf = BZ2_bzopen(filename, "rb");
1145       break;
1146 #endif
1147     default:
1148       zf->foo.zptr = NULL;
1149       break;
1150   }
1151 
1152   if (!zf->foo.zptr) {
1153     free(zf);
1154     return 0;
1155   }
1156 
1157   return zf;
1158 }
1159 /*}}}*/
xx_zclose(struct zFile * zf)1160 static void xx_zclose(struct zFile *zf) {/*{{{*/
1161   switch (zf->type) {
1162 #ifdef USE_GZIP_MBOX
1163     case COMPRESSION_GZIP:
1164       gzclose(zf->foo.gzf);
1165       break;
1166 #endif
1167 #ifdef USE_BZIP_MBOX
1168     case COMPRESSION_BZIP:
1169       BZ2_bzclose(zf->foo.bzf);
1170       break;
1171 #endif
1172     default:
1173       zf->foo.zptr = NULL;
1174       break;
1175   }
1176   free(zf);
1177 }
1178 /*}}}*/
xx_zread(struct zFile * zf,void * buf,int len)1179 static int xx_zread(struct zFile *zf, void *buf, int len) {/*{{{*/
1180   switch (zf->type) {
1181 #ifdef USE_GZIP_MBOX
1182     case COMPRESSION_GZIP:
1183       return gzread(zf->foo.gzf, buf, len);
1184       break;
1185 #endif
1186 #ifdef USE_BZIP_MBOX
1187     case COMPRESSION_BZIP:
1188       return BZ2_bzread(zf->foo.bzf, buf, len);
1189       break;
1190 #endif
1191     default:
1192       return 0;
1193       break;
1194   }
1195 }
1196 /*}}}*/
1197 #endif
1198 
1199 #if USE_GZIP_MBOX || USE_BZIP_MBOX
1200 /* do we need ROCACHE_SIZE > 1? the code supports any number here */
1201 #define ROCACHE_SIZE 1
1202 struct ro_mapping {
1203   char *filename;
1204   unsigned char *map;
1205   size_t len;
1206 };
1207 static int ro_cache_init = 0;
1208 static struct ro_mapping ro_mapping_cache[ROCACHE_SIZE];
1209 
1210 /* find a temp file in the mapping cache.  If nothing is found lasti is
1211  * set to the next slot to use for insertion.  You have to check that slot
1212  * to see if it is currently in use
1213  */
find_ro_cache(const char * filename,int * lasti)1214 static struct ro_mapping *find_ro_cache(const char *filename, int *lasti)
1215 {
1216   int i = 0;
1217   struct ro_mapping *ro = NULL;
1218   if (lasti)
1219     *lasti = 0;
1220   if (!ro_cache_init)
1221     return NULL;
1222   for (i = 0 ; i < ROCACHE_SIZE ; i++) {
1223     ro = ro_mapping_cache + i;
1224     if (!ro->map) {
1225       if (lasti)
1226         *lasti = i;
1227       return NULL;
1228     }
1229     if (strcmp(filename, ro->filename) == 0)
1230       return ro;
1231   }
1232   /* if we're here, the map is full.  They will reuse slot 0 */
1233   return NULL;
1234 }
1235 
1236 /*
1237  * put a new tempfile into the cache.  It is mmaped as part of this function
1238  * so you can safely close the file handle after calling this.
1239  */
add_ro_cache(const char * filename,int fd,size_t len)1240 static struct ro_mapping *add_ro_cache(const char *filename, int fd, size_t len)
1241 {
1242   int i = 0;
1243   struct ro_mapping *ro = NULL;
1244   if (!ro_cache_init) {
1245     memset(&ro_mapping_cache, 0, sizeof(ro_mapping_cache));
1246     ro_cache_init = 1;
1247   }
1248   ro = find_ro_cache(filename, &i);
1249   if (ro) {
1250     fprintf(stderr, "%s already in ro cache\n", filename);
1251     return NULL;
1252   }
1253   ro = ro_mapping_cache + i;
1254   if (ro->map) {
1255     munmap(ro->map, ro->len);
1256     ro->map = NULL;
1257     free(ro->filename);
1258   }
1259   ro->map = (unsigned char *)mmap(0, len, PROT_READ, MAP_SHARED, fd, 0);
1260   if (ro->map == MAP_FAILED) {
1261     ro->map = NULL;
1262     perror("rfc822:mmap");
1263     return NULL;
1264   }
1265   ro->len = len;
1266   ro->filename = new_string(filename);
1267   return ro;
1268 }
1269 #endif /* USE_GZIP_MBOX || USE_BZIP_MBOX */
1270 
create_ro_mapping(const char * filename,unsigned char ** data,int * len)1271 void create_ro_mapping(const char *filename, unsigned char **data, int *len)/*{{{*/
1272 {
1273   struct stat sb;
1274   int fd;
1275 
1276 #if USE_GZIP_MBOX || USE_BZIP_MBOX
1277   struct zFile *zf;
1278 #endif
1279 
1280   if (stat(filename, &sb) < 0)
1281   {
1282     report_error("stat", filename);
1283     *data = NULL;
1284     return;
1285   }
1286 
1287 #if USE_GZIP_MBOX || USE_BZIP_MBOX
1288   if(is_compressed(filename)) {
1289     unsigned char *p;
1290     size_t cur_read;
1291     struct ro_mapping *ro;
1292     FILE *tmpf;
1293 
1294     /* this branch never returns things that are freeable */
1295     data_alloc_type = ALLOC_NONE;
1296     ro = find_ro_cache(filename, NULL);
1297     if (ro) {
1298       *data = ro->map;
1299       *len = ro->len;
1300       return;
1301     }
1302 
1303     if(verbose) {
1304       fprintf(stderr, "Decompressing %s...\n", filename);
1305     }
1306 
1307     tmpf = tmpfile();
1308     if (!tmpf) {
1309       perror("tmpfile");
1310       goto comp_error;
1311     }
1312     zf = xx_zopen(filename, "rb");
1313     if (!zf) {
1314       fprintf(stderr, "Could not open %s\n", filename);
1315       goto comp_error;
1316     }
1317     p = new_array(unsigned char, SIZE_STEP);
1318     cur_read = xx_zread(zf, p, SIZE_STEP);
1319     if (fwrite(p, cur_read, 1, tmpf) != 1) {
1320       fprintf(stderr, "failed writing to temp file for %s\n", filename);
1321       goto comp_error;
1322     }
1323     *len = cur_read;
1324     if (cur_read >= SIZE_STEP) {
1325       while(1) {
1326         int ret;
1327         cur_read = xx_zread(zf, p, SIZE_STEP);
1328         if (cur_read <= 0)
1329           break;
1330         *len += cur_read;
1331         ret = fwrite(p, cur_read, 1, tmpf);
1332         if (ret != 1) {
1333           fprintf(stderr, "failed writing to temp file for %s\n", filename);
1334           goto comp_error;
1335         }
1336       }
1337     }
1338     free(p);
1339     xx_zclose(zf);
1340 
1341     if(*len > 0) {
1342       ro = add_ro_cache(filename, fileno(tmpf), *len);
1343       if (!ro)
1344         goto comp_error;
1345       *data = ro->map;
1346       *len = ro->len;
1347     } else {
1348       *data = NULL;
1349     }
1350     fclose(tmpf);
1351     return;
1352 
1353 comp_error:
1354     *data = NULL;
1355     *len = 0;
1356     if (tmpf)
1357       fclose(tmpf);
1358     return;
1359   }
1360 #endif /* USE_GZIP_MBOX || USE_BZIP_MBOX */
1361 
1362   *len = sb.st_size;
1363   if (*len == 0) {
1364     *data = NULL;
1365     return;
1366   }
1367 
1368   if (!S_ISREG(sb.st_mode)) {
1369     *data = NULL;
1370     return;
1371   }
1372 
1373   fd = open(filename, O_RDONLY);
1374   if (fd < 0)
1375   {
1376     report_error("open", filename);
1377     *data = NULL;
1378     return;
1379   }
1380 
1381   *data = (unsigned char *) mmap(0, *len, PROT_READ, MAP_SHARED, fd, 0);
1382   if (close(fd) < 0)
1383     report_error("close", filename);
1384   if (*data == MAP_FAILED) {
1385     report_error("rfc822:mmap", filename);
1386     *data = NULL;
1387     return;
1388   }
1389   data_alloc_type = ALLOC_MMAP;
1390 }
1391 /*}}}*/
free_ro_mapping(unsigned char * data,int len)1392 void free_ro_mapping(unsigned char *data, int len)/*{{{*/
1393 {
1394   int r;
1395 
1396   if(data_alloc_type == ALLOC_MALLOC) {
1397     free(data);
1398   }
1399 
1400   if(data_alloc_type == ALLOC_MMAP) {
1401     r = munmap(data, len);
1402     if(r < 0) {
1403       fprintf(stderr, "munmap() errord\n");
1404       exit(1);
1405     }
1406   }
1407 }
1408 /*}}}*/
1409 
setup_msg_src(char * filename)1410 static struct msg_src *setup_msg_src(char *filename)/*{{{*/
1411 {
1412   static struct msg_src result;
1413   result.type = MS_FILE;
1414   result.filename = filename;
1415   return &result;
1416 }
1417 /*}}}*/
make_rfc822(char * filename)1418 struct rfc822 *make_rfc822(char *filename)/*{{{*/
1419 {
1420   int len;
1421   unsigned char *data;
1422   struct rfc822 *result;
1423 
1424   create_ro_mapping(filename, &data, &len);
1425 
1426   /* Don't process empty files */
1427   result = NULL;
1428 
1429   if (data)
1430   {
1431     struct msg_src *src;
1432     /* Now process the data */
1433     src = setup_msg_src(filename);
1434     /* For one message per file, ignore missing end boundary condition. */
1435     result = data_to_rfc822(src, (char *) data, len, NULL);
1436 
1437     free_ro_mapping(data, len);
1438   }
1439 
1440   return result;
1441 }
1442 /*}}}*/
free_rfc822(struct rfc822 * msg)1443 void free_rfc822(struct rfc822 *msg)/*{{{*/
1444 {
1445   struct attachment *a, *na;
1446 
1447   if (!msg) return;
1448 
1449   if (msg->hdrs.to) free(msg->hdrs.to);
1450   if (msg->hdrs.cc) free(msg->hdrs.cc);
1451   if (msg->hdrs.from) free(msg->hdrs.from);
1452   if (msg->hdrs.subject) free(msg->hdrs.subject);
1453   if (msg->hdrs.message_id) free(msg->hdrs.message_id);
1454   if (msg->hdrs.in_reply_to) free(msg->hdrs.in_reply_to);
1455   if (msg->hdrs.references) free(msg->hdrs.references);
1456 
1457   for (a = msg->atts.next; a != &msg->atts; a = na) {
1458     na = a->next;
1459     if (a->filename) free(a->filename);
1460     if (a->ct == CT_MESSAGE_RFC822) {
1461       free_rfc822(a->data.rfc822);
1462     } else {
1463       free(a->data.normal.bytes);
1464     }
1465     free(a);
1466   }
1467   free(msg);
1468 }
1469 /*}}}*/
1470 
1471 #ifdef TEST
1472 
do_indent(int indent)1473 static void do_indent(int indent)/*{{{*/
1474 {
1475   int i;
1476   for (i=indent; i>0; i--) {
1477     putchar(' ');
1478   }
1479 }
1480 /*}}}*/
show_header(char * tag,char * x,int indent)1481 static void show_header(char *tag, char *x, int indent)/*{{{*/
1482 {
1483   if (x) {
1484     do_indent(indent);
1485     printf("%s: %s\n", tag, x);
1486   }
1487 }
1488 /*}}}*/
show_rfc822(struct rfc822 * msg,int indent)1489 static void show_rfc822(struct rfc822 *msg, int indent)/*{{{*/
1490 {
1491   struct attachment *a;
1492   show_header("From", msg->hdrs.from, indent);
1493   show_header("To", msg->hdrs.to, indent);
1494   show_header("Cc", msg->hdrs.cc, indent);
1495   show_header("Date", msg->hdrs.date, indent);
1496   show_header("Subject", msg->hdrs.subject, indent);
1497 
1498   for (a = msg->atts.next; a != &msg->atts; a=a->next) {
1499     printf("========================\n");
1500     switch (a->ct) {
1501       case CT_TEXT_PLAIN: printf("Attachment type text/plain\n"); break;
1502       case CT_TEXT_HTML: printf("Attachment type text/html\n"); break;
1503       case CT_TEXT_OTHER: printf("Attachment type text/non-plain\n"); break;
1504       case CT_MESSAGE_RFC822: printf("Attachment type message/rfc822\n"); break;
1505       case CT_OTHER: printf("Attachment type other\n"); break;
1506     }
1507     if (a->ct != CT_MESSAGE_RFC822) {
1508       printf("%d bytes\n", a->data.normal.len);
1509     }
1510     if ((a->ct == CT_TEXT_PLAIN) || (a->ct == CT_TEXT_HTML) || (a->ct == CT_TEXT_OTHER)) {
1511       printf("----------\n");
1512       printf("%s\n", a->data.normal.bytes);
1513     }
1514     if (a->ct == CT_MESSAGE_RFC822) {
1515       show_rfc822(a->data.rfc822, indent + 4);
1516     }
1517   }
1518 }
1519 /*}}}*/
1520 
main(int argc,char ** argv)1521 int main (int argc, char **argv)/*{{{*/
1522 {
1523   struct rfc822 *msg;
1524 
1525   if (argc < 2) {
1526     fprintf(stderr, "Need a path\n");
1527     unlock_and_exit(2);
1528   }
1529 
1530   msg = make_rfc822(argv[1]);
1531   show_rfc822(msg, 0);
1532   free_rfc822(msg);
1533 
1534   /* Print out some stuff */
1535 
1536   return 0;
1537 }
1538 /*}}}*/
1539 #endif /* TEST */
1540