1 /* $Id$
2
3 Part of SWI-Prolog
4
5 Author: Jan Wielemaker
6 E-mail: wielemak@science.uva.nl
7 WWW: http://www.swi-prolog.org
8 Copyright (C): 1985-2005, University of Amsterdam
9
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
14
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
19
20 You should have received a copy of the GNU Lesser General Public
21 License along with this library; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25 #ifdef HAVE_CONFIG_H
26 #include <config.h>
27 #endif
28
29 #ifdef __WINDOWS__
30 #define HAVE_MALLOC_H 1
31 #endif
32
33 #include <SWI-Stream.h>
34 #include <SWI-Prolog.h>
35 #include <rfc2045.h>
36 #include "error.h"
37 #ifdef HAVE_MALLOC_H
38 #include <malloc.h>
39 #endif
40 #include <errno.h>
41
42 #undef max /* be sure we have ours */
43 #define max(x, y) ((x)>(y) ? (x) : (y))
44
45
46 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
47 This module defines an interface to the rfc2045 (MIME) parsing library
48 by Double Precision, Inc, part of the maildrop system.
49
50 Parsing MIME messages is accomplished using a single predicate. This
51 predicate parses the input and returns a complex term holding the
52 various MIME message parts. The mime message is encoded into the
53 following structure:
54
55 mime(Attributes, Data, SubMimeList)
56
57 Where Data is the (decoded) field data returned as an atom, Attributes
58 is a property-list and SubMimeList is a list of mime/3 terms reflecting
59 the sub-parts. Attributes contains the following members:
60
61 # id(Atom)
62 # description(Atom)
63 # language(Atom)
64 # md5(Atom)
65 # type(Atom)
66 # character_set(Atom)
67 # transfer_encoding(Atom)
68 # disposition(Atom)
69 # filename(Atom)
70 # name(Atom)
71 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
72
73 static atom_t ATOM_;
74 static atom_t ATOM_stream;
75 static functor_t FUNCTOR_type1;
76 static functor_t FUNCTOR_transfer_encoding1;
77 static functor_t FUNCTOR_character_set1;
78 static functor_t FUNCTOR_mime3;
79 static functor_t FUNCTOR_id1;
80 static functor_t FUNCTOR_description1;
81 static functor_t FUNCTOR_language1;
82 static functor_t FUNCTOR_md51;
83 static functor_t FUNCTOR_disposition1;
84 static functor_t FUNCTOR_name1;
85 static functor_t FUNCTOR_filename1;
86
87 struct dbuf
88 { char *buf;
89 int size;
90 int allocated;
91 };
92
93 static int
add_data(const char * ndata,size_t len,void * closure)94 add_data(const char *ndata, size_t len, void *closure)
95 { struct dbuf *dbuf = closure;
96
97 if ( dbuf->size + (int)len > dbuf->allocated )
98 { dbuf->allocated = max(dbuf->allocated, max(1024, dbuf->size + (int)len));
99 if ( dbuf->buf )
100 dbuf->buf = realloc(dbuf->buf, dbuf->allocated);
101 else
102 dbuf->buf = malloc(dbuf->allocated);
103
104 if ( !dbuf->buf )
105 { pl_error("mime_parse", 3, NULL, ERR_ERRNO, errno, "add_data", "mime", 0);
106 return -1;
107 }
108 }
109
110 memcpy(dbuf->buf+dbuf->size, ndata, len);
111 dbuf->size += len;
112
113 return 0;
114 }
115
116
117
118 static int
mime_unify_data(term_t data,struct rfc2045 * rfc,const char * buffer)119 mime_unify_data(term_t data, struct rfc2045 *rfc, const char *buffer)
120 { off_t start_pos, end_pos, start_body, nlines, nbodylines;
121 struct dbuf dbuf;
122 int rval;
123
124 dbuf.buf = NULL;
125 dbuf.size = 0;
126 dbuf.allocated = 0;
127
128 rfc2045_mimepos(rfc,
129 &start_pos, &end_pos, &start_body, &nlines, &nbodylines);
130 rfc2045_cdecode_start(rfc, add_data, &dbuf);
131 if ( rfc2045_cdecode(rfc, buffer+start_body, end_pos-start_body) == 0 &&
132 rfc2045_cdecode_end(rfc) == 0 )
133 { rval = PL_unify_atom_nchars(data, dbuf.size, dbuf.buf);
134 } else
135 rval = FALSE;
136
137 if ( dbuf.buf )
138 free(dbuf.buf);
139
140 return rval;
141 }
142
143
144 /* add_attribute() adds a name(value) term to the list if value is provided
145 (i.e. not NULL and non "")
146 */
147
148 static int
add_attribute(term_t list,const char * value,functor_t functor)149 add_attribute(term_t list, const char *value, functor_t functor)
150 { if ( value && value[0] )
151 { term_t h = PL_new_term_ref();
152 int rval;
153
154 rval = PL_unify_list(list, h, list) &&
155 PL_unify_term(h, PL_FUNCTOR, functor, PL_CHARS, value);
156
157 PL_reset_term_refs(h);
158 return rval;
159 }
160
161 return TRUE;
162 }
163
164
165 static int
mime_unify(term_t result,struct rfc2045 * rfc,const char * buffer)166 mime_unify(term_t result, struct rfc2045 *rfc, const char *buffer)
167 { term_t data = PL_new_term_ref();
168 term_t subs = PL_new_term_ref();
169 term_t atts = PL_new_term_ref();
170
171 if ( !PL_unify_term(result,
172 PL_FUNCTOR, FUNCTOR_mime3,
173 PL_TERM, atts,
174 PL_TERM, data,
175 PL_TERM, subs) )
176 return FALSE;
177
178 if ( rfc->isdummy )
179 { if ( !PL_unify_nil(data) ||
180 !PL_unify_nil(atts) )
181 return FALSE;
182 } else
183 { term_t at = PL_copy_term_ref(atts);
184 const char *type, *enc, *cset;
185 const char *disp, *name, *fnam;
186
187 const char *id = rfc2045_content_id(rfc);
188 const char *desc = rfc2045_content_description(rfc);
189 const char *lang = rfc2045_content_language(rfc);
190 const char *md5 = rfc2045_content_md5(rfc);
191
192 rfc2045_mimeinfo(rfc, &type, &enc, &cset);
193 rfc2045_dispositioninfo(rfc, &disp, &name, &fnam);
194
195 if ( !add_attribute(at, type, FUNCTOR_type1) ) return FALSE;
196 if ( !add_attribute(at, enc, FUNCTOR_transfer_encoding1) ) return FALSE;
197 if ( !add_attribute(at, cset, FUNCTOR_character_set1) ) return FALSE;
198 if ( !add_attribute(at, id, FUNCTOR_id1) ) return FALSE;
199 if ( !add_attribute(at, desc, FUNCTOR_description1) ) return FALSE;
200 if ( !add_attribute(at, lang, FUNCTOR_language1) ) return FALSE;
201 if ( !add_attribute(at, disp, FUNCTOR_disposition1) ) return FALSE;
202 if ( !add_attribute(at, name, FUNCTOR_name1) ) return FALSE;
203 if ( !add_attribute(at, fnam, FUNCTOR_filename1) ) return FALSE;
204 if ( !add_attribute(at, md5, FUNCTOR_md51) ) return FALSE;
205
206 if ( !PL_unify_nil(at) )
207 return FALSE;
208 }
209
210 if ( rfc->firstpart )
211 { term_t st = PL_copy_term_ref(subs);
212 term_t s = PL_new_term_ref();
213 struct rfc2045 *sub;
214
215 if ( !PL_unify_atom(data, ATOM_) )
216 return FALSE;
217
218 for(sub=rfc->firstpart; sub; sub = sub->next)
219 { if ( sub->isdummy )
220 continue;
221
222 if ( !PL_unify_list(st, s, st) ||
223 !mime_unify(s, sub, buffer) )
224 return FALSE;
225 }
226 return PL_unify_nil(st);
227 } else
228 { if ( !PL_unify_nil(subs) ||
229 !mime_unify_data(data, rfc, buffer) )
230 return FALSE;
231 }
232
233 return TRUE;
234 }
235
236
237 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
238 get_character_data()
239 Get a buffer of data from a specification. Currently the following
240 specs are acceptable:
241
242 stream(Stream) All data from this stream
243 stream(Stream, N) At most N characters from stream
244 Atom, String, CodeList Data from native Prolog character data
245 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
246
247 static int
get_character_data(term_t from,char ** data,size_t * len,int * malloced)248 get_character_data(term_t from, char **data, size_t *len, int *malloced)
249 { atom_t name;
250 int arity;
251 char *buf;
252 size_t size;
253
254 if ( PL_get_name_arity(from, &name, &arity) && arity > 0 )
255 { if ( name == ATOM_stream )
256 { IOSTREAM *stream;
257 term_t arg = PL_new_term_ref();
258
259 _PL_get_arg(1, from, arg);
260 if ( !PL_get_stream_handle(arg, &stream) )
261 return pl_error(NULL, 0, NULL, ERR_ARGTYPE, 1, from, "stream");
262
263 if ( arity == 1 ) /* stream(Stream) */
264 { int c;
265 size_t done, allocated = 1024;
266
267 if ( !(buf = malloc(allocated)) )
268 return pl_error(NULL, 0, NULL, ERR_ERRNO, errno, "allocate", "memory", 0);
269
270 for( done=0; (c=Sgetcode(stream)) != EOF; )
271 { if ( done >= allocated )
272 { allocated *= 2;
273
274 if ( !(buf = realloc(buf, allocated)) )
275 return pl_error(NULL, 0, NULL, ERR_ERRNO, errno, "allocate", "memory", 0);
276 }
277
278 buf[done++] = c;
279 }
280
281 *len = done;
282 *data = buf;
283 *malloced = TRUE;
284
285 return TRUE;
286 } else if ( arity == 2 ) /* stream(Stream, Length) */
287 { long size;
288 long done;
289 int c;
290
291 _PL_get_arg(2, from, arg);
292 if ( !PL_get_long(arg, &size) || size < 0 )
293 return pl_error(NULL, 0, NULL, ERR_ARGTYPE, 1, arg, "natural");
294
295 if ( !(buf = malloc(size)) )
296 return pl_error(NULL, 0, NULL, ERR_ERRNO, errno, "allocate", "memory", 0);
297
298 for( done=0; (c=Sgetcode(stream)) != EOF && done < size; )
299 buf[done++] = c;
300
301 *len = done;
302 *data = buf;
303 *malloced = TRUE;
304
305 return TRUE;
306 }
307 }
308 } else if ( PL_get_nchars(from, &size, data, CVT_ATOM|CVT_STRING|CVT_LIST) )
309 { *len = size;
310 *malloced = FALSE;
311
312 return TRUE;
313 }
314
315 return pl_error(NULL, 0, NULL, ERR_ARGTYPE, 1, from, "data");
316 }
317
318
319
320 foreign_t
mime_parse(term_t handle,term_t result)321 mime_parse(term_t handle, term_t result)
322 { char *buf;
323 size_t len = 0;
324 int malloced = FALSE;
325 struct rfc2045 *rfc;
326 int rval;
327
328 if ( !get_character_data(handle, &buf, &len, &malloced) )
329 return FALSE;
330
331 rfc = rfc2045_alloc();
332 rfc2045_parse(rfc, buf, len);
333 rval = mime_unify(result, rfc, buf);
334
335 if ( malloced )
336 free(buf);
337 rfc2045_free(rfc);
338
339 return rval;
340 }
341
342 /*******************************
343 * ERRORS *
344 *******************************/
345
346 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
347 Not typically elegant, but the documentation whishes us to call exit(),
348 which is even worse.
349 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
350
351 void
rfc2045_error(const char * errmsg)352 rfc2045_error(const char *errmsg)
353 { term_t e = PL_new_term_ref();
354
355 if ( (e=PL_new_term_ref()) &&
356 PL_unify_term(e,
357 PL_FUNCTOR_CHARS, "error", 2,
358 PL_FUNCTOR_CHARS, "mime", 1,
359 PL_CHARS, errmsg,
360 PL_VARIABLE) )
361 PL_throw(e);
362
363 PL_fatal_error("Could not recover from rfc2045 error");
364 }
365
366 /*******************************
367 * INSTALL *
368 *******************************/
369
370 #define mkfunctor(n, a) PL_new_functor(PL_new_atom(n), a)
371
372
373 install_t
install_mime()374 install_mime()
375 { ATOM_ = PL_new_atom("");
376 ATOM_stream = PL_new_atom("stream");
377
378 FUNCTOR_type1 = mkfunctor("type", 1);
379 FUNCTOR_transfer_encoding1 = mkfunctor("transfer_encoding", 1);
380 FUNCTOR_character_set1 = mkfunctor("character_set", 1);
381 FUNCTOR_mime3 = mkfunctor("mime", 3);
382 FUNCTOR_id1 = mkfunctor("id", 1);
383 FUNCTOR_description1 = mkfunctor("description", 1);
384 FUNCTOR_language1 = mkfunctor("language", 1);
385 FUNCTOR_md51 = mkfunctor("md5", 1);
386 FUNCTOR_disposition1 = mkfunctor("disposition", 1);
387 FUNCTOR_name1 = mkfunctor("name", 1);
388 FUNCTOR_filename1 = mkfunctor("filename", 1);
389
390 PL_register_foreign("mime_parse", 2, mime_parse, 0);
391 }
392