1 /*  Part of SWI-Prolog
2 
3     Author:        Jan Wielemaker
4     E-mail:        J.Wielemaker@vu.nl
5     WWW:           http://www.swi-prolog.org
6     Copyright (c)  2014-2016, University of Amsterdam
7                               VU University Amsterdam
8     All rights reserved.
9 
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions
12     are met:
13 
14     1. Redistributions of source code must retain the above copyright
15        notice, this list of conditions and the following disclaimer.
16 
17     2. Redistributions in binary form must reproduce the above copyright
18        notice, this list of conditions and the following disclaimer in
19        the documentation and/or other materials provided with the
20        distribution.
21 
22     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26     COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33     POSSIBILITY OF SUCH DAMAGE.
34 */
35 
36 #include <SWI-Stream.h>
37 #include <SWI-Prolog.h>
38 
39 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
40 
41 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
42 
43 #define O_DEBUG 1
44 #include <SWI-Stream.h>
45 #include <SWI-Prolog.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <assert.h>
49 #include <time.h>
50 #include <errno.h>
51 
52 static atom_t ATOM_close_parent;	/* close_parent(Bool) */
53 static atom_t ATOM_boundary;		/* boundary(String) */
54 
55 #ifndef DEBUG
56 static int multipart_debug = 0;
57 #define DEBUG(l, g) do { if (multipart_debug >= l) {g;} } while(0)
58 #endif
59 
60 
61 		 /*******************************
62 		 *	       TYPES		*
63 		 *******************************/
64 
65 #define BUFSIZE SIO_BUFSIZE		/* raw I/O buffer */
66 
67 typedef enum state
68 { s_start = 0,
69   s_start_boundary,
70   s_part_data_start,
71   s_part_data,
72   s_part_data_almost_boundary,
73   s_part_data_boundary,
74   s_part_data_almost_end,
75   s_part_data_end,
76   s_part_data_next,
77   s_part_data_final_hyphen,
78   s_end
79 } mp_state;
80 
81 typedef struct multipart_context
82 { IOSTREAM	   *stream;		/* Original stream */
83   IOSTREAM	   *multipart_stream;	/* Stream I'm handle of */
84   int		    close_parent;	/* close parent on close */
85   IOENC		    parent_encoding;	/* Saved encoding of parent */
86   char		   *boundary;		/* Our boundary */
87   size_t	    boundary_length;	/* length of the boundary */
88   char		   *lookbehind;		/* Collected part of potential boundary */
89   char		   *unprocessed;	/* Could not emit this data now */
90   size_t	    unprocessed_len;	/* length of unprocessed data */
91   size_t	    index;
92   mp_state	    state;		/* current state */
93 } multipart_context;
94 
95 
96 static size_t
97 multipart_parser_execute(multipart_context* p,
98 			 const char *buf, size_t len,
99 			 char **out, size_t *out_lenp);
100 
101 static multipart_context*
alloc_multipart_context(IOSTREAM * s)102 alloc_multipart_context(IOSTREAM *s)
103 { multipart_context *ctx = malloc(sizeof(*ctx));
104 
105   if ( ctx )
106   { memset(ctx, 0, sizeof(*ctx));
107     ctx->stream       = s;
108   }
109 
110   return ctx;
111 }
112 
113 
114 static void
free_multipart_context(multipart_context * ctx)115 free_multipart_context(multipart_context *ctx)
116 { if ( ctx->stream->upstream )
117     Sset_filter(ctx->stream, NULL);
118   else
119     PL_release_stream(ctx->stream);
120 
121   if ( ctx->boundary )
122     free(ctx->boundary);
123 
124   free(ctx);
125 }
126 
127 
128 		 /*******************************
129 		 *	    MULTIPART I/O	*
130 		 *******************************/
131 
132 static ssize_t
multipart_read(void * handle,char * buf,size_t size)133 multipart_read(void *handle, char *buf, size_t size)
134 { multipart_context *ctx = handle;
135   IOSTREAM *in = ctx->stream;
136   char    *out = buf;
137   size_t  left = size;
138 
139   if ( ctx->unprocessed_len )
140   { size_t len = ctx->unprocessed_len;
141 
142     DEBUG(1, Sdprintf("Unprocessed: %ld\n", (long)len));
143 
144     if ( len > size )
145       len = size;
146     memcpy(buf, ctx->unprocessed, len);
147     ctx->unprocessed_len -= len;
148     ctx->unprocessed += len;
149 
150     return len;
151   }
152 
153   if ( ctx->state == s_end )
154   { return 0;
155   } else if ( ctx->state == s_part_data_end )
156   { return 0;
157   }
158 
159   for(;;)
160   { if ( in->bufp >= in->limitp )
161     { if ( S__fillbuf(in) == EOF )
162       { Sseterr(in, SIO_FERR, "Incomplete multipart/form-data");
163 	return -1;
164       }
165       in->bufp--;
166     }
167 
168     do
169     { size_t processed;
170 
171 #if defined(O_DEBUG) && defined(DEBUG_MULTIPART)
172       char tmp[10000];
173       memcpy(tmp, in->bufp, in->limitp-in->bufp);
174       tmp[in->limitp-in->bufp] = 0;
175 
176       DEBUG(1, Sdprintf("multipart_parser_execute(%ld bytes: \"%s\")\n",
177 			(long)(in->limitp-in->bufp), tmp));
178 #endif
179 
180       processed = multipart_parser_execute(ctx,
181 					   in->bufp, in->limitp-in->bufp,
182 					   &out, &left);
183       DEBUG(1, Sdprintf("processed %ld bytes, state=%d, left=%ld\n",
184 			(long)processed, ctx->state, (long)left));
185 
186       if ( processed == 0 )
187       { Sseterr(in, SIO_FERR, "Invalid multipart/form-data");
188 	return -1;
189       }
190 
191       in->bufp += processed;
192     } while( left > 0 &&
193 	     in->bufp < in->limitp &&
194 	     ctx->state != s_end &&
195 	     ctx->state != s_part_data_end );
196 
197     if ( out > buf || ctx->state == s_end || ctx->state == s_part_data_end )
198     { DEBUG(1, Sdprintf("Reply %ld bytes\n", (long)(out-buf)));
199       return out-buf;
200     }
201   }
202 }
203 
204 
205 static int
multipart_control(void * handle,int op,void * data)206 multipart_control(void *handle, int op, void *data)
207 { multipart_context *ctx = handle;
208 
209   switch(op)
210   { case SIO_SETENCODING:
211       return 0;				/* allow switching encoding */
212     default:
213       if ( ctx->stream->functions->control )
214 	return (*ctx->stream->functions->control)(ctx->stream->handle, op, data);
215       return -1;
216   }
217 }
218 
219 
220 static int
multipart_close(void * handle)221 multipart_close(void *handle)
222 { multipart_context *ctx = handle;
223   int rc = 0;
224 
225   DEBUG(1, Sdprintf("multipart_close() ...\n"));
226 
227   ctx->stream->encoding = ctx->parent_encoding;
228 
229   if ( ctx->close_parent )
230   { IOSTREAM *parent = ctx->stream;
231     int rc2;
232 
233     free_multipart_context(ctx);
234     rc2 = Sclose(parent);
235     if ( rc == 0 )
236       rc = rc2;
237   } else
238   { free_multipart_context(ctx);
239   }
240 
241   return rc;
242 }
243 
244 
245 static IOFUNCTIONS multipart_functions =
246 { multipart_read,
247   NULL,
248   NULL,					/* seek */
249   multipart_close,
250   multipart_control,			/* zcontrol */
251   NULL,					/* seek64 */
252 };
253 
254 
255 		 /*******************************
256 		 *	 PROLOG CONNECTION	*
257 		 *******************************/
258 
259 #define COPY_FLAGS (SIO_INPUT|SIO_OUTPUT| \
260 		    SIO_TEXT| \
261 		    SIO_REPXML|SIO_REPPL|\
262 		    SIO_RECORDPOS)
263 
264 static foreign_t
multipart_open(term_t org,term_t new,term_t options)265 multipart_open(term_t org, term_t new, term_t options)
266 { term_t tail = PL_copy_term_ref(options);
267   term_t head = PL_new_term_ref();
268   multipart_context *ctx;
269   IOSTREAM *s, *s2;
270   int close_parent = FALSE;
271   char *boundary = NULL;
272   size_t boundary_len = 0;
273 
274   while(PL_get_list(tail, head, tail))
275   { atom_t name;
276     size_t arity;
277     term_t arg = PL_new_term_ref();
278 
279     if ( !PL_get_name_arity(head, &name, &arity) || arity != 1 )
280       return PL_type_error("option", head);
281     _PL_get_arg(1, head, arg);
282 
283     if ( name == ATOM_boundary )
284     { if ( !PL_get_nchars(arg, &boundary_len, &boundary,
285 			  CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION) )
286 	return FALSE;
287     } else if ( name == ATOM_close_parent )
288     { if ( !PL_get_bool_ex(arg, &close_parent) )
289 	return FALSE;
290     }
291   }
292   if ( !PL_get_nil_ex(tail) )
293     return FALSE;
294 
295   if ( !PL_get_stream_handle(org, &s) )
296     return FALSE;			/* Error */
297   if ( !(ctx = alloc_multipart_context(s)) )
298     return PL_resource_error("memory");
299   ctx->close_parent = close_parent;
300 
301   if ( boundary )
302   { if ( !(ctx->boundary = malloc(boundary_len*2+9)) )
303     { free_multipart_context(ctx);
304       return PL_resource_error("memory");
305     }
306     memcpy(ctx->boundary, "--", 2);
307     memcpy(ctx->boundary+2, boundary, boundary_len);
308     boundary_len += 2;
309     ctx->boundary_length = boundary_len;
310     ctx->boundary[boundary_len] = 0;
311     ctx->lookbehind = ctx->boundary+boundary_len+1;
312   }
313 
314   if ( !(s2 = Snew(ctx,
315 		   (s->flags&COPY_FLAGS)|SIO_FBUF,
316 		   &multipart_functions))	)
317   { free_multipart_context(ctx);	/* no memory */
318 
319     return FALSE;
320   }
321 
322   s2->encoding = s->encoding;
323   ctx->parent_encoding = s->encoding;
324   s->encoding = ENC_OCTET;
325   ctx->multipart_stream = s2;
326   if ( PL_unify_stream(new, s2) )
327   { Sset_filter(s, s2);
328     PL_release_stream(s);
329 
330     return TRUE;
331   } else if ( PL_exception(0) )
332   { return FALSE;
333   } else
334   { return PL_uninstantiation_error(new);
335   }
336 }
337 
338 
339 static foreign_t
multipart_open_next(term_t stream)340 multipart_open_next(term_t stream)
341 { IOSTREAM *s;
342   multipart_context *ctx;
343 
344   if ( !PL_get_stream_handle(stream, &s) )
345     return FALSE;
346   if ( s->functions != &multipart_functions )
347   { PL_release_stream(s);
348     return PL_type_error("multipart_stream", stream);
349   }
350   ctx = s->handle;
351 
352   switch ( ctx->state )
353   { case s_part_data_end:
354     { ctx->state = s_part_data_next;
355       Sclearerr(ctx->multipart_stream);
356       ctx->multipart_stream->encoding = ENC_OCTET;
357       return TRUE;
358     }
359     case s_end:
360       return FALSE;
361     default:
362       return PL_permission_error("open_next", "multi_part_stream", stream);
363   }
364 }
365 
366 
367 		 /*******************************
368 		 *	       INSTALL		*
369 		 *******************************/
370 
371 static void
install_multipart(void)372 install_multipart(void)
373 { ATOM_close_parent   = PL_new_atom("close_parent");
374   ATOM_boundary       = PL_new_atom("boundary");
375 
376   PL_register_foreign("multipart_open",      3, multipart_open,      0);
377   PL_register_foreign("multipart_open_next", 1, multipart_open_next, 0);
378 }
379 
380 
381 		 /*******************************
382 		 *				*
383 		 *******************************/
384 
385 /* Based on node-formidable by Felix Geisendörfer
386  * Igor Afonov - afonov@gmail.com - 2012
387  * MIT License - http://www.opensource.org/licenses/mit-license.php
388  */
389 
390 #define LF 10
391 #define CR 13
392 
multipart_log(const char * format,...)393 static void multipart_log(const char * format, ...)
394 {
395 #ifdef DEBUG_MULTIPART
396     va_list args;
397     va_start(args, format);
398 
399     DEBUG(2,
400 	  { Sdprintf("[HTTP_MULTIPART_PARSER] %s:%d: ", __FILE__, __LINE__);
401 	    Svdprintf(format, args);
402 	    Sdprintf("\n");
403 	  });
404 
405     va_end(args);
406 #endif
407 }
408 
409 
410 static size_t
multipart_parser_execute(multipart_context * p,const char * buf,size_t len,char ** out,size_t * out_lenp)411 multipart_parser_execute(multipart_context* p,
412 			 const char *buf, size_t len,
413 			 char **out, size_t *out_lenp)
414 { size_t i = 0;
415   size_t mark = 0;
416   char c;
417   int is_last = 0;
418 
419 #define	NOTIFY_CB(f) (void)0
420 #define EMIT_DATA_CB(part_data, data, len)		\
421 	do { size_t _len = (len);			\
422 	     char *_data = (char*)(data);		\
423 	     if	( _len > *out_lenp )			\
424 	     { p->unprocessed     = &_data[*out_lenp];	\
425 	       p->unprocessed_len = _len - *out_lenp;	\
426 	       _len = *out_lenp;			\
427 	     }						\
428 	     memcpy(*out, _data, _len);			\
429 	     (*out) += _len;				\
430 	     (*out_lenp) -= _len;			\
431 	} while(0)
432 
433   while(i < len && p->unprocessed_len == 0) {
434     c = buf[i];
435     is_last = (i == (len - 1));
436     switch (p->state) {
437       case s_start:
438 	multipart_log("s_start");
439         p->index = 0;
440         p->state = s_start_boundary;
441 
442       /* fallthrough */
443       case s_start_boundary:
444 	multipart_log("s_start_boundary");
445         if (p->index == p->boundary_length) {
446           if (c != CR) {
447             return i;
448           }
449           p->index++;
450           break;
451         } else if (p->index == (p->boundary_length + 1)) {
452           if (c != LF) {
453             return i;
454           }
455           p->index = 0;
456           NOTIFY_CB(part_data_begin);
457           p->state = s_part_data_start;
458           break;
459         }
460         if (c != p->boundary[p->index]) {
461            /* Skip anything before the first boundary
462               RFC-1341 refers to this as the 'preamble' and says we should ignore it */
463            p->index = -1;
464         }
465         p->index++;
466         break;
467 
468       case s_part_data_start:
469 	multipart_log("s_part_data_start at %ld", (long)i);
470         mark = i;
471         p->state = s_part_data;
472 
473       /* fallthrough */
474       case s_part_data:
475         multipart_log("s_part_data");
476         if (c == CR) {
477             EMIT_DATA_CB(part_data, buf + mark, i - mark);
478             mark = i;
479             p->state = s_part_data_almost_boundary;
480             p->lookbehind[0] = CR;
481             break;
482         }
483         if (is_last)
484             EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
485         break;
486 
487       case s_part_data_almost_boundary:
488         multipart_log("s_part_data_almost_boundary");
489         if (c == LF) {
490             p->state = s_part_data_boundary;
491             p->lookbehind[1] = LF;
492             p->index = 0;
493             break;
494         }
495         EMIT_DATA_CB(part_data, p->lookbehind, 1);
496         p->state = s_part_data;
497         mark = i --;
498         break;
499 
500       case s_part_data_boundary:
501         multipart_log("s_part_data_boundary");
502         if (p->boundary[p->index] != c) {
503           EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
504           p->state = s_part_data;
505           mark = i --;
506           break;
507         }
508         p->lookbehind[2 + p->index] = c;
509         if ((++ p->index) == p->boundary_length) {
510             NOTIFY_CB(part_data_end);
511             p->state = s_part_data_almost_end;
512         }
513         break;
514 
515       case s_part_data_almost_end:
516         multipart_log("s_part_data_almost_end");
517         if (c == '-') {
518             p->state = s_part_data_final_hyphen;
519             break;
520         }
521         if (c == CR) {
522             p->state = s_part_data_end;
523 	    i++;
524         }
525         return i;
526 
527       case s_part_data_final_hyphen:
528         multipart_log("s_part_data_final_hyphen");
529         if (c == '-') {
530             NOTIFY_CB(body_end);
531             p->state = s_end;
532             break;
533         }
534         return i;
535 
536       case s_part_data_next:
537         multipart_log("s_part_data_next");
538         if (c == LF) {
539             p->state = s_part_data_start;
540             NOTIFY_CB(part_data_begin);
541             break;
542         }
543         return i;
544 
545       case s_end:
546         multipart_log("s_end: %02X", (int) c);
547         break;
548 
549       default:
550         multipart_log("Multipart parser unrecoverable error");
551         return 0;
552     }
553     ++ i;
554   }
555 
556   return i;
557 }
558