1 /* message.h -- Message parsing
2  *
3  * Copyright (c) 1994-2010 Carnegie Mellon University.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * 3. The name "Carnegie Mellon University" must not be used to
18  *    endorse or promote products derived from this software without
19  *    prior written permission. For permission or any legal
20  *    details, please contact
21  *      Carnegie Mellon University
22  *      Center for Technology Transfer and Enterprise Creation
23  *      4615 Forbes Avenue
24  *      Suite 302
25  *      Pittsburgh, PA  15213
26  *      (412) 268-7393, fax: (412) 268-7395
27  *      innovation@andrew.cmu.edu
28  *
29  * 4. Redistributions of any form whatsoever must retain the following
30  *    acknowledgment:
31  *    "This product includes software developed by Computing Services
32  *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33  *
34  * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36  * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37  * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41  */
42 
43 #ifndef INCLUDED_MESSAGE_H
44 #define INCLUDED_MESSAGE_H
45 
46 #include <stdio.h>
47 
48 #include "prot.h"
49 #include "mailbox.h"
50 #include "strarray.h"
51 #include "util.h"
52 #include "charset.h"
53 
54 /* (draft standard) MIME tspecials */
55 #define MIME_TSPECIALS "()<>@,;:\\\"/[]?="
56 
57 /*
58  * Parsed form of a body-part
59  */
60 struct body {
61     /* Content-* header information */
62     char *type;
63     char *subtype;
64     struct param *params;
65     char *id;
66     char *description;
67     char *encoding;
68     char *md5;
69     char *disposition;
70     struct param *disposition_params;
71     struct param *language;
72     char *location;
73     int charset_enc;            /* encoding in binary bodystructure */
74     char *charset_id;           /* charset in binary bodystructure */
75     char *part_id;               /* IMAP part identifier of a subpart */
76 
77     /* Location/size information */
78     uint32_t header_offset;
79     uint32_t header_size;
80     uint32_t header_lines;
81     uint32_t content_offset;
82     uint32_t content_size;
83     uint32_t content_lines;
84     uint32_t boundary_size;         /* Size of terminating boundary */
85     uint32_t boundary_lines;
86 
87     /* Content guid - sha1 of binary content */
88     struct message_guid content_guid;
89 
90     int numparts;               /* For multipart types */
91     struct body *subpart;       /* For message/rfc822 and multipart types */
92 
93     uint32_t filesize;          /* to make sure that things match up! */
94 
95     /*
96      * Other header information.
97      * Only meaningful for body-parts at top level or
98      * enclosed in message/rfc-822
99      */
100     char *date;
101     char *subject;
102     struct address *from;
103     struct address *sender;
104     struct address *reply_to;
105     struct address *to;
106     struct address *cc;
107     struct address *bcc;
108     char *in_reply_to;
109     char *message_id;
110     char *x_me_message_id;
111     char *references;
112     char *received_date;
113 
114     /*
115      * Cached headers.  Only filled in at top-level
116      */
117     struct buf cacheheaders;
118 
119     /*
120      * decoded body.  Filled in as needed.
121      */
122     char *decoded_body;
123 
124     /* Decoded content size, or 0 if unknown */
125     uint32_t decoded_content_size;
126 
127     /* Message GUID. Only filled in at top level */
128     struct message_guid guid;
129 };
130 
131 /* List of Content-type parameters */
132 struct param {
133     struct param *next;
134     char *attribute;
135     char *value;
136 };
137 extern void param_free(struct param **paramp);
138 
139 extern int message_copy_strict(struct protstream *from, FILE *to,
140                                unsigned size, int allow_null);
141 
142 extern int message_parse(const char *fname, struct index_record *record);
143 
144 struct message_content {
145     const char *base;  /* memory mapped file */
146     size_t len;
147     struct body *body; /* parsed body structure */
148 };
149 
150 /* MUST keep this struct sync'd with sieve_bodypart in sieve_interface.h */
151 struct bodypart {
152     char section[128];
153     const char *decoded_body;
154 };
155 
156 
157 extern void parse_cached_envelope(char *env, char *tokens[], int tokens_size);
158 
159 extern int message_parse_mapped(const char *msg_base, unsigned long msg_len,
160                                 struct body *body, const char *efname);
161 extern int message_parse_binary_file(FILE *infile, struct body **body,
162                                      const char *efname);
163 extern int message_parse_file(FILE *infile,
164                               const char **msg_base, size_t *msg_len,
165                               struct body **body,
166                               const char *efname);
167 extern void message_parse_string(const char *hdr, char **hdrp);
168 extern void message_pruneheader(char *buf, const strarray_t *headers,
169                                 const strarray_t *headers_not);
170 extern void message_fetch_part(struct message_content *msg,
171                                const char **content_types,
172                                struct bodypart ***parts);
173 extern void message_write_nstring(struct buf *buf, const char *s);
174 extern void message_write_nstring_map(struct buf *buf, const char *s, unsigned int len);
175 extern void message_write_body(struct buf *buf, const struct body *body,
176                                   int newformat);
177 extern void message_write_xdrstring(struct buf *buf, const struct buf *s);
178 extern int message_write_cache(struct index_record *record, const struct body *body);
179 
180 extern int message_create_record(struct index_record *message_index,
181                                  const struct body *body);
182 extern void message_free_body(struct body *body);
183 
184 extern void message_parse_type(const char *hdr, char **typep, char **subtypep, struct param **paramp);
185 extern void message_parse_disposition(const char *hdr, char **hdpr, struct param **paramp);
186 
187 /* NOTE - scribbles on its input */
188 extern void message_parse_env_address(char *str, struct address *addr);
189 
190 extern char *parse_nstring(char **str);
191 
192 extern void message_read_bodystructure(const struct index_record *record,
193                                        struct body **body);
194 
195 extern int message_update_conversations(struct conversations_state *, struct mailbox *, struct index_record *, conversation_t **);
196 
197 /* Call proc for each header in headers, which must contain valid
198  * MIME header bytes. Header keys and values passed to the callback
199  * are zero-terminated strings. Header values are not unfolded and
200  * omit the terminated CR LF sequence. */
201 extern int message_foreach_header(const char *headers, size_t len,
202                    int (*proc)(const char *key, const char *val, void *rock),
203                    void *rock);
204 
205 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
206 /* New message API */
207 
208 typedef struct message message_t;
209 struct mailbox;
210 
211 /* Flags for use as the 'flags' argument to message_get_field(). */
212 enum message_format
213 {
214     /* Original raw octets from the on-the-wire RFC5322 format,
215      * including folding and RFC2047 encoding of non-ASCII characters.
216      * The result may point into a mapping and not be NUL-terminated,
217      * use buf_cstring() if necessary.  */
218     MESSAGE_RAW=        1,
219     /* Unfolded and RFC2047 decoded */
220     MESSAGE_DECODED,
221     /* Unfolded, RFC2047 decoded, and HTML-escaped */
222     MESSAGE_SNIPPET,
223     /* Unfolded, RFC2047 decoded, and search-normalised */
224     MESSAGE_SEARCH,
225 
226 #define _MESSAGE_FORMAT_MASK    (0x7)
227 
228     /* This flag can be OR'd into the format argument to request that
229      * the field name and a colon ':' are left in the result.  Normally
230      * only the field value is returned.  This is useful when calling
231      * multiple times with MESSAGE_APPEND, to accumulate multiple headers
232      * in the buffer. */
233     MESSAGE_FIELDNAME=          (1<<5),
234 
235     /* This flag can be OR'd into the format argument to request that
236      * all the fields of the given name are returned.  Normally only
237      * the first is returned, which is faster. */
238     /* XXX this flag is not implemented, but is effectively always set! */
239     MESSAGE_MULTIPLE=           (1<<6),
240 
241     /* This flag can be OR'd into the format argument to request that
242      * results be appended to the buffer; normally the buffer is reset
243      * first. */
244     MESSAGE_APPEND=             (1<<7),
245 
246     /* This flag can be OR'd into the format argument to request that
247      * leading and trailing space be trimmed from the buffer */
248     MESSAGE_TRIM=               (1<<8),
249 
250     /* This flag can be OR'd into the format argument to request that
251      * only the last field of the given name is returned.  Normally only
252      * the first is returned, which is faster. */
253     MESSAGE_LAST=               (1<<9),
254 };
255 
256 enum message_indexflags
257 {
258     MESSAGE_SEEN=               (1<<0),
259     MESSAGE_RECENT=             (1<<1),
260 };
261 
262 extern message_t *message_new(void);
263 extern message_t *message_new_from_data(const char *base, size_t len);
264 extern message_t *message_new_from_mailbox(struct mailbox *mailbox,
265                                            unsigned int recno);
266 extern message_t *message_new_from_record(struct mailbox *,
267                                           const struct index_record *);
268 extern message_t *message_new_from_index(struct mailbox *,
269                                          const struct index_record *,
270                                          uint32_t msgno,
271                                          uint32_t indexflags);
272 extern message_t *message_new_from_filename(const char *filename);
273 extern void message_set_from_data(const char *base, size_t len,
274                                   message_t *m);
275 extern void message_set_from_mailbox(struct mailbox *mailbox,
276                                      unsigned int recno,
277                                      message_t *m);
278 extern void message_set_from_index(struct mailbox *,
279                                    const struct index_record *,
280                                    uint32_t msgno,
281                                    uint32_t indexflags,
282                                    message_t *m);
283 extern void message_set_from_record(struct mailbox *,
284                                     const struct index_record *,
285                                     message_t *m);
286 
287 extern int message_get_indexversion(message_t *m, uint32_t *versionp);
288 
289 extern message_t *message_ref(message_t *m);
290 extern void message_unref(message_t **m);
291 
292 extern int message_get_field(message_t *m, const char *name,
293                              int format, struct buf *buf);
294 extern int message_get_cachebody(message_t *m, const struct body **bodyp);
295 extern int message_get_body(message_t *m, struct buf *buf);
296 extern int message_get_type(message_t *m, const char **strp);
297 extern int message_get_subtype(message_t *m, const char **strp);
298 extern int message_get_charset_id(message_t *m, const char **strp);
299 extern int message_get_encoding(message_t *m, int *encp);
300 extern int message_get_num_parts(message_t *m, unsigned int *np);
301 extern int message_get_messageid(message_t *m, struct buf *buf);
302 extern int message_get_listid(message_t *m, struct buf *buf);
303 extern int message_get_mailinglist(message_t *m, struct buf *buf);
304 extern int message_get_from(message_t *m, struct buf *buf);
305 extern int message_get_to(message_t *m, struct buf *buf);
306 extern int message_get_cc(message_t *m, struct buf *buf);
307 extern int message_get_bcc(message_t *m, struct buf *buf);
308 extern int message_get_inreplyto(message_t *m, struct buf *buf);
309 extern int message_get_references(message_t *m, struct buf *buf);
310 extern int message_get_subject(message_t *m, struct buf *buf);
311 extern int message_get_gmtime(message_t *m, time_t *tp);
312 extern int message_get_mailbox(message_t *m, struct mailbox **);
313 extern int message_get_uid(message_t *m, uint32_t *uidp);
314 extern int message_get_cid(message_t *m, conversation_id_t *cidp);
315 extern int message_get_guid(message_t *m, const struct message_guid **guidp);
316 extern int message_get_internaldate(message_t *m, time_t *);
317 extern int message_get_spamscore(message_t *m, uint32_t *scorep);
318 extern int message_get_savedate(message_t *m, time_t *);
319 extern int message_get_sentdate(message_t *m, time_t *);
320 extern int message_get_modseq(message_t *m, modseq_t *modseqp);
321 extern int message_get_internalflags(message_t *m, uint32_t *flagsp);
322 extern int message_get_systemflags(message_t *m, uint32_t *);
323 extern int message_get_userflags(message_t *m, uint32_t *flagsp);
324 extern int message_get_indexflags(message_t *m, uint32_t *);
325 extern int message_get_size(message_t *m, uint32_t *sizep);
326 extern int message_get_msgno(message_t *m, uint32_t *msgnop);
327 extern int message_get_fname(message_t *m, const char **fnamep);
328 extern int message_foreach_section(message_t *m,
329                    int (*proc)(int isbody, charset_t charset, int encoding,
330                                const char *type, const char *subtype,
331                                const struct param *type_params,
332                                const char *disposition,
333                                const struct param *disposition_params,
334                                const struct message_guid *content_guid,
335                                const char *part,
336                                struct buf *data,
337                                void *rock),
338                    void *rock);
339 extern int message_get_leaf_types(message_t *m, strarray_t *types);
340 
341 
342 /* less shitty interface */
343 extern const struct index_record *msg_record(const message_t *m);
344 extern uint32_t msg_size(const message_t *m);
345 extern uint32_t msg_uid(const message_t *m);
346 extern conversation_id_t msg_cid(const message_t *m);
347 extern modseq_t msg_modseq(const message_t *m);
348 extern int msg_msgno(const message_t *m);
349 extern const struct message_guid *msg_guid(const message_t *m);
350 
351 /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
352 
353 #endif /* INCLUDED_MESSAGE_H */
354