1 /*  $Id$
2 
3     Part of SWI-Prolog
4 
5     Author:        Jan Wielemaker
6     E-mail:        wielemak@science.uva.nl
7     WWW:           http://www.swi-prolog.org
8     Copyright (C): 1985-2005, University of Amsterdam
9 
10     This library is free software; you can redistribute it and/or
11     modify it under the terms of the GNU Lesser General Public
12     License as published by the Free Software Foundation; either
13     version 2.1 of the License, or (at your option) any later version.
14 
15     This library is distributed in the hope that it will be useful,
16     but WITHOUT ANY WARRANTY; without even the implied warranty of
17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18     Lesser General Public License for more details.
19 
20     You should have received a copy of the GNU Lesser General Public
21     License along with this library; if not, write to the Free Software
22     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23 */
24 
25 #ifdef HAVE_CONFIG_H
26 #include <config.h>
27 #endif
28 
29 #ifdef __WINDOWS__
30 #define HAVE_MALLOC_H 1
31 #endif
32 
33 #include <SWI-Stream.h>
34 #include <SWI-Prolog.h>
35 #include <rfc2045.h>
36 #include "error.h"
37 #ifdef HAVE_MALLOC_H
38 #include <malloc.h>
39 #endif
40 #include <errno.h>
41 
42 #undef max				/* be sure we have ours */
43 #define max(x, y) ((x)>(y) ? (x) : (y))
44 
45 
46 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
47 This module defines an interface to   the rfc2045 (MIME) parsing library
48 by Double Precision, Inc, part of the maildrop system.
49 
50 Parsing MIME messages is accomplished  using   a  single predicate. This
51 predicate parses the input  and  returns   a  complex  term  holding the
52 various MIME message  parts.  The  mime   message  is  encoded  into the
53 following structure:
54 
55 	mime(Attributes, Data, SubMimeList)
56 
57 Where Data is the (decoded) field data   returned as an atom, Attributes
58 is a property-list and SubMimeList is a  list of mime/3 terms reflecting
59 the sub-parts. Attributes contains the following members:
60 
61 	# id(Atom)
62 	# description(Atom)
63 	# language(Atom)
64 	# md5(Atom)
65 	# type(Atom)
66 	# character_set(Atom)
67 	# transfer_encoding(Atom)
68 	# disposition(Atom)
69 	# filename(Atom)
70 	# name(Atom)
71 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
72 
73 static atom_t	 ATOM_;
74 static atom_t	 ATOM_stream;
75 static functor_t FUNCTOR_type1;
76 static functor_t FUNCTOR_transfer_encoding1;
77 static functor_t FUNCTOR_character_set1;
78 static functor_t FUNCTOR_mime3;
79 static functor_t FUNCTOR_id1;
80 static functor_t FUNCTOR_description1;
81 static functor_t FUNCTOR_language1;
82 static functor_t FUNCTOR_md51;
83 static functor_t FUNCTOR_disposition1;
84 static functor_t FUNCTOR_name1;
85 static functor_t FUNCTOR_filename1;
86 
87 struct dbuf
88 { char *buf;
89   int size;
90   int allocated;
91 };
92 
93 static int
add_data(const char * ndata,size_t len,void * closure)94 add_data(const char *ndata, size_t len, void *closure)
95 { struct dbuf *dbuf = closure;
96 
97   if ( dbuf->size + (int)len > dbuf->allocated )
98   { dbuf->allocated = max(dbuf->allocated, max(1024, dbuf->size + (int)len));
99     if ( dbuf->buf )
100       dbuf->buf = realloc(dbuf->buf, dbuf->allocated);
101     else
102       dbuf->buf = malloc(dbuf->allocated);
103 
104     if ( !dbuf->buf )
105     { pl_error("mime_parse", 3, NULL, ERR_ERRNO, errno, "add_data", "mime", 0);
106       return -1;
107     }
108   }
109 
110   memcpy(dbuf->buf+dbuf->size, ndata, len);
111   dbuf->size += len;
112 
113   return 0;
114 }
115 
116 
117 
118 static int
mime_unify_data(term_t data,struct rfc2045 * rfc,const char * buffer)119 mime_unify_data(term_t data, struct rfc2045 *rfc, const char *buffer)
120 { off_t start_pos, end_pos, start_body, nlines, nbodylines;
121   struct dbuf dbuf;
122   int rval;
123 
124   dbuf.buf       = NULL;
125   dbuf.size      = 0;
126   dbuf.allocated = 0;
127 
128   rfc2045_mimepos(rfc,
129 		  &start_pos, &end_pos, &start_body, &nlines, &nbodylines);
130   rfc2045_cdecode_start(rfc, add_data, &dbuf);
131   if ( rfc2045_cdecode(rfc, buffer+start_body, end_pos-start_body) == 0 &&
132        rfc2045_cdecode_end(rfc) == 0 )
133   { rval = PL_unify_atom_nchars(data, dbuf.size, dbuf.buf);
134   } else
135     rval = FALSE;
136 
137   if ( dbuf.buf )
138     free(dbuf.buf);
139 
140   return rval;
141 }
142 
143 
144 /* add_attribute() adds a name(value) term to the list if value is provided
145    (i.e. not NULL and non "")
146 */
147 
148 static int
add_attribute(term_t list,const char * value,functor_t functor)149 add_attribute(term_t list, const char *value, functor_t functor)
150 { if ( value && value[0] )
151   { term_t h = PL_new_term_ref();
152     int rval;
153 
154     rval = PL_unify_list(list, h, list) &&
155 	   PL_unify_term(h, PL_FUNCTOR, functor, PL_CHARS, value);
156 
157     PL_reset_term_refs(h);
158     return rval;
159   }
160 
161   return TRUE;
162 }
163 
164 
165 static int
mime_unify(term_t result,struct rfc2045 * rfc,const char * buffer)166 mime_unify(term_t result, struct rfc2045 *rfc, const char *buffer)
167 { term_t data = PL_new_term_ref();
168   term_t subs = PL_new_term_ref();
169   term_t atts = PL_new_term_ref();
170 
171   if ( !PL_unify_term(result,
172 		      PL_FUNCTOR, FUNCTOR_mime3,
173 		        PL_TERM, atts,
174 		        PL_TERM, data,
175 		        PL_TERM, subs) )
176     return FALSE;
177 
178   if ( rfc->isdummy )
179   { if ( !PL_unify_nil(data) ||
180 	 !PL_unify_nil(atts) )
181       return FALSE;
182   } else
183   { term_t at = PL_copy_term_ref(atts);
184     const char *type, *enc, *cset;
185     const char *disp, *name, *fnam;
186 
187     const char *id   = rfc2045_content_id(rfc);
188     const char *desc = rfc2045_content_description(rfc);
189     const char *lang = rfc2045_content_language(rfc);
190     const char *md5  = rfc2045_content_md5(rfc);
191 
192     rfc2045_mimeinfo(rfc, &type, &enc, &cset);
193     rfc2045_dispositioninfo(rfc, &disp, &name, &fnam);
194 
195     if ( !add_attribute(at, type, FUNCTOR_type1) )              return FALSE;
196     if ( !add_attribute(at, enc,  FUNCTOR_transfer_encoding1) ) return FALSE;
197     if ( !add_attribute(at, cset, FUNCTOR_character_set1) )     return FALSE;
198     if ( !add_attribute(at, id,   FUNCTOR_id1) )                return FALSE;
199     if ( !add_attribute(at, desc, FUNCTOR_description1) )       return FALSE;
200     if ( !add_attribute(at, lang, FUNCTOR_language1) )          return FALSE;
201     if ( !add_attribute(at, disp, FUNCTOR_disposition1) )       return FALSE;
202     if ( !add_attribute(at, name, FUNCTOR_name1) )              return FALSE;
203     if ( !add_attribute(at, fnam, FUNCTOR_filename1) )          return FALSE;
204     if ( !add_attribute(at, md5,  FUNCTOR_md51) )               return FALSE;
205 
206     if ( !PL_unify_nil(at) )
207       return FALSE;
208   }
209 
210   if ( rfc->firstpart )
211   { term_t st = PL_copy_term_ref(subs);
212     term_t s  = PL_new_term_ref();
213     struct rfc2045 *sub;
214 
215     if ( !PL_unify_atom(data, ATOM_) )
216       return FALSE;
217 
218     for(sub=rfc->firstpart; sub; sub = sub->next)
219     { if ( sub->isdummy )
220 	continue;
221 
222       if ( !PL_unify_list(st, s, st) ||
223 	   !mime_unify(s, sub, buffer) )
224 	return FALSE;
225     }
226     return PL_unify_nil(st);
227   } else
228   { if ( !PL_unify_nil(subs) ||
229 	 !mime_unify_data(data, rfc, buffer) )
230       return FALSE;
231   }
232 
233   return TRUE;
234 }
235 
236 
237 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
238 get_character_data()
239     Get a buffer of data from a specification.  Currently the following
240     specs are acceptable:
241 
242 	stream(Stream)		All data from this stream
243 	stream(Stream, N)	At most N characters from stream
244 	Atom, String, CodeList	Data from native Prolog character data
245 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
246 
247 static int
get_character_data(term_t from,char ** data,size_t * len,int * malloced)248 get_character_data(term_t from, char **data, size_t *len, int *malloced)
249 { atom_t name;
250   int arity;
251   char *buf;
252   size_t size;
253 
254   if ( PL_get_name_arity(from, &name, &arity) && arity > 0 )
255   { if ( name == ATOM_stream )
256     { IOSTREAM *stream;
257       term_t arg = PL_new_term_ref();
258 
259       _PL_get_arg(1, from, arg);
260       if ( !PL_get_stream_handle(arg, &stream) )
261 	return pl_error(NULL, 0, NULL, ERR_ARGTYPE, 1, from, "stream");
262 
263       if ( arity == 1 )			/* stream(Stream) */
264       { int c;
265 	size_t done, allocated = 1024;
266 
267 	if ( !(buf = malloc(allocated)) )
268 	  return pl_error(NULL, 0, NULL, ERR_ERRNO, errno, "allocate", "memory", 0);
269 
270 	for( done=0; (c=Sgetcode(stream)) != EOF; )
271 	{ if ( done >= allocated )
272 	  { allocated *= 2;
273 
274 	    if ( !(buf = realloc(buf, allocated)) )
275 	      return pl_error(NULL, 0, NULL, ERR_ERRNO, errno, "allocate", "memory", 0);
276 	  }
277 
278 	  buf[done++] = c;
279 	}
280 
281 	*len = done;
282 	*data = buf;
283 	*malloced = TRUE;
284 
285         return TRUE;
286       }	else if ( arity == 2 )		/* stream(Stream, Length) */
287       { long size;
288 	long done;
289 	int c;
290 
291 	_PL_get_arg(2, from, arg);
292 	if ( !PL_get_long(arg, &size) || size < 0 )
293 	  return pl_error(NULL, 0, NULL, ERR_ARGTYPE, 1, arg, "natural");
294 
295 	if ( !(buf = malloc(size)) )
296 	  return pl_error(NULL, 0, NULL, ERR_ERRNO, errno, "allocate", "memory", 0);
297 
298 	for( done=0; (c=Sgetcode(stream)) != EOF && done < size; )
299 	  buf[done++] = c;
300 
301 	*len = done;
302 	*data = buf;
303 	*malloced = TRUE;
304 
305         return TRUE;
306       }
307     }
308   } else if ( PL_get_nchars(from, &size, data, CVT_ATOM|CVT_STRING|CVT_LIST) )
309   { *len = size;
310     *malloced = FALSE;
311 
312     return TRUE;
313   }
314 
315   return pl_error(NULL, 0, NULL, ERR_ARGTYPE, 1, from, "data");
316 }
317 
318 
319 
320 foreign_t
mime_parse(term_t handle,term_t result)321 mime_parse(term_t handle, term_t result)
322 { char *buf;
323   size_t len = 0;
324   int malloced = FALSE;
325   struct rfc2045 *rfc;
326   int rval;
327 
328   if ( !get_character_data(handle, &buf, &len, &malloced) )
329     return FALSE;
330 
331   rfc = rfc2045_alloc();
332   rfc2045_parse(rfc, buf, len);
333   rval = mime_unify(result, rfc, buf);
334 
335   if ( malloced )
336     free(buf);
337   rfc2045_free(rfc);
338 
339   return rval;
340 }
341 
342 		 /*******************************
343 		 *	       ERRORS		*
344 		 *******************************/
345 
346 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
347 Not typically elegant, but the documentation  whishes us to call exit(),
348 which is even worse.
349 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
350 
351 void
rfc2045_error(const char * errmsg)352 rfc2045_error(const char *errmsg)
353 { term_t e = PL_new_term_ref();
354 
355   if ( (e=PL_new_term_ref()) &&
356        PL_unify_term(e,
357 		     PL_FUNCTOR_CHARS, "error", 2,
358 		       PL_FUNCTOR_CHARS, "mime", 1,
359 		         PL_CHARS, errmsg,
360 		       PL_VARIABLE) )
361     PL_throw(e);
362 
363   PL_fatal_error("Could not recover from rfc2045 error");
364 }
365 
366 		 /*******************************
367 		 *	      INSTALL		*
368 		 *******************************/
369 
370 #define mkfunctor(n, a) PL_new_functor(PL_new_atom(n), a)
371 
372 
373 install_t
install_mime()374 install_mime()
375 { ATOM_			     = PL_new_atom("");
376   ATOM_stream		     = PL_new_atom("stream");
377 
378   FUNCTOR_type1		     = mkfunctor("type", 1);
379   FUNCTOR_transfer_encoding1 = mkfunctor("transfer_encoding", 1);
380   FUNCTOR_character_set1     = mkfunctor("character_set", 1);
381   FUNCTOR_mime3	             = mkfunctor("mime", 3);
382   FUNCTOR_id1                = mkfunctor("id", 1);
383   FUNCTOR_description1       = mkfunctor("description", 1);
384   FUNCTOR_language1          = mkfunctor("language", 1);
385   FUNCTOR_md51               = mkfunctor("md5", 1);
386   FUNCTOR_disposition1       = mkfunctor("disposition", 1);
387   FUNCTOR_name1		     = mkfunctor("name", 1);
388   FUNCTOR_filename1	     = mkfunctor("filename", 1);
389 
390   PL_register_foreign("mime_parse", 2, mime_parse, 0);
391 }
392