1 /* Part of SWI-Prolog
2
3 Author: Jan Wielemaker
4 E-mail: J.Wielemaker@vu.nl
5 WWW: http://www.swi-prolog.org
6 Copyright (c) 2014-2016, University of Amsterdam
7 VU University Amsterdam
8 All rights reserved.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
12 are met:
13
14 1. Redistributions of source code must retain the above copyright
15 notice, this list of conditions and the following disclaimer.
16
17 2. Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in
19 the documentation and/or other materials provided with the
20 distribution.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 #include <SWI-Stream.h>
37 #include <SWI-Prolog.h>
38
39 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
40
41 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
42
43 #define O_DEBUG 1
44 #include <SWI-Stream.h>
45 #include <SWI-Prolog.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <assert.h>
49 #include <time.h>
50 #include <errno.h>
51
52 static atom_t ATOM_close_parent; /* close_parent(Bool) */
53 static atom_t ATOM_boundary; /* boundary(String) */
54
55 #ifndef DEBUG
56 static int multipart_debug = 0;
57 #define DEBUG(l, g) do { if (multipart_debug >= l) {g;} } while(0)
58 #endif
59
60
61 /*******************************
62 * TYPES *
63 *******************************/
64
65 #define BUFSIZE SIO_BUFSIZE /* raw I/O buffer */
66
67 typedef enum state
68 { s_start = 0,
69 s_start_boundary,
70 s_part_data_start,
71 s_part_data,
72 s_part_data_almost_boundary,
73 s_part_data_boundary,
74 s_part_data_almost_end,
75 s_part_data_end,
76 s_part_data_next,
77 s_part_data_final_hyphen,
78 s_end
79 } mp_state;
80
81 typedef struct multipart_context
82 { IOSTREAM *stream; /* Original stream */
83 IOSTREAM *multipart_stream; /* Stream I'm handle of */
84 int close_parent; /* close parent on close */
85 IOENC parent_encoding; /* Saved encoding of parent */
86 char *boundary; /* Our boundary */
87 size_t boundary_length; /* length of the boundary */
88 char *lookbehind; /* Collected part of potential boundary */
89 char *unprocessed; /* Could not emit this data now */
90 size_t unprocessed_len; /* length of unprocessed data */
91 size_t index;
92 mp_state state; /* current state */
93 } multipart_context;
94
95
96 static size_t
97 multipart_parser_execute(multipart_context* p,
98 const char *buf, size_t len,
99 char **out, size_t *out_lenp);
100
101 static multipart_context*
alloc_multipart_context(IOSTREAM * s)102 alloc_multipart_context(IOSTREAM *s)
103 { multipart_context *ctx = malloc(sizeof(*ctx));
104
105 if ( ctx )
106 { memset(ctx, 0, sizeof(*ctx));
107 ctx->stream = s;
108 }
109
110 return ctx;
111 }
112
113
114 static void
free_multipart_context(multipart_context * ctx)115 free_multipart_context(multipart_context *ctx)
116 { if ( ctx->stream->upstream )
117 Sset_filter(ctx->stream, NULL);
118 else
119 PL_release_stream(ctx->stream);
120
121 if ( ctx->boundary )
122 free(ctx->boundary);
123
124 free(ctx);
125 }
126
127
128 /*******************************
129 * MULTIPART I/O *
130 *******************************/
131
132 static ssize_t
multipart_read(void * handle,char * buf,size_t size)133 multipart_read(void *handle, char *buf, size_t size)
134 { multipart_context *ctx = handle;
135 IOSTREAM *in = ctx->stream;
136 char *out = buf;
137 size_t left = size;
138
139 if ( ctx->unprocessed_len )
140 { size_t len = ctx->unprocessed_len;
141
142 DEBUG(1, Sdprintf("Unprocessed: %ld\n", (long)len));
143
144 if ( len > size )
145 len = size;
146 memcpy(buf, ctx->unprocessed, len);
147 ctx->unprocessed_len -= len;
148 ctx->unprocessed += len;
149
150 return len;
151 }
152
153 if ( ctx->state == s_end )
154 { return 0;
155 } else if ( ctx->state == s_part_data_end )
156 { return 0;
157 }
158
159 for(;;)
160 { if ( in->bufp >= in->limitp )
161 { if ( S__fillbuf(in) == EOF )
162 { Sseterr(in, SIO_FERR, "Incomplete multipart/form-data");
163 return -1;
164 }
165 in->bufp--;
166 }
167
168 do
169 { size_t processed;
170
171 #if defined(O_DEBUG) && defined(DEBUG_MULTIPART)
172 char tmp[10000];
173 memcpy(tmp, in->bufp, in->limitp-in->bufp);
174 tmp[in->limitp-in->bufp] = 0;
175
176 DEBUG(1, Sdprintf("multipart_parser_execute(%ld bytes: \"%s\")\n",
177 (long)(in->limitp-in->bufp), tmp));
178 #endif
179
180 processed = multipart_parser_execute(ctx,
181 in->bufp, in->limitp-in->bufp,
182 &out, &left);
183 DEBUG(1, Sdprintf("processed %ld bytes, state=%d, left=%ld\n",
184 (long)processed, ctx->state, (long)left));
185
186 if ( processed == 0 )
187 { Sseterr(in, SIO_FERR, "Invalid multipart/form-data");
188 return -1;
189 }
190
191 in->bufp += processed;
192 } while( left > 0 &&
193 in->bufp < in->limitp &&
194 ctx->state != s_end &&
195 ctx->state != s_part_data_end );
196
197 if ( out > buf || ctx->state == s_end || ctx->state == s_part_data_end )
198 { DEBUG(1, Sdprintf("Reply %ld bytes\n", (long)(out-buf)));
199 return out-buf;
200 }
201 }
202 }
203
204
205 static int
multipart_control(void * handle,int op,void * data)206 multipart_control(void *handle, int op, void *data)
207 { multipart_context *ctx = handle;
208
209 switch(op)
210 { case SIO_SETENCODING:
211 return 0; /* allow switching encoding */
212 default:
213 if ( ctx->stream->functions->control )
214 return (*ctx->stream->functions->control)(ctx->stream->handle, op, data);
215 return -1;
216 }
217 }
218
219
220 static int
multipart_close(void * handle)221 multipart_close(void *handle)
222 { multipart_context *ctx = handle;
223 int rc = 0;
224
225 DEBUG(1, Sdprintf("multipart_close() ...\n"));
226
227 ctx->stream->encoding = ctx->parent_encoding;
228
229 if ( ctx->close_parent )
230 { IOSTREAM *parent = ctx->stream;
231 int rc2;
232
233 free_multipart_context(ctx);
234 rc2 = Sclose(parent);
235 if ( rc == 0 )
236 rc = rc2;
237 } else
238 { free_multipart_context(ctx);
239 }
240
241 return rc;
242 }
243
244
245 static IOFUNCTIONS multipart_functions =
246 { multipart_read,
247 NULL,
248 NULL, /* seek */
249 multipart_close,
250 multipart_control, /* zcontrol */
251 NULL, /* seek64 */
252 };
253
254
255 /*******************************
256 * PROLOG CONNECTION *
257 *******************************/
258
259 #define COPY_FLAGS (SIO_INPUT|SIO_OUTPUT| \
260 SIO_TEXT| \
261 SIO_REPXML|SIO_REPPL|\
262 SIO_RECORDPOS)
263
264 static foreign_t
multipart_open(term_t org,term_t new,term_t options)265 multipart_open(term_t org, term_t new, term_t options)
266 { term_t tail = PL_copy_term_ref(options);
267 term_t head = PL_new_term_ref();
268 multipart_context *ctx;
269 IOSTREAM *s, *s2;
270 int close_parent = FALSE;
271 char *boundary = NULL;
272 size_t boundary_len = 0;
273
274 while(PL_get_list(tail, head, tail))
275 { atom_t name;
276 size_t arity;
277 term_t arg = PL_new_term_ref();
278
279 if ( !PL_get_name_arity(head, &name, &arity) || arity != 1 )
280 return PL_type_error("option", head);
281 _PL_get_arg(1, head, arg);
282
283 if ( name == ATOM_boundary )
284 { if ( !PL_get_nchars(arg, &boundary_len, &boundary,
285 CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION) )
286 return FALSE;
287 } else if ( name == ATOM_close_parent )
288 { if ( !PL_get_bool_ex(arg, &close_parent) )
289 return FALSE;
290 }
291 }
292 if ( !PL_get_nil_ex(tail) )
293 return FALSE;
294
295 if ( !PL_get_stream_handle(org, &s) )
296 return FALSE; /* Error */
297 if ( !(ctx = alloc_multipart_context(s)) )
298 return PL_resource_error("memory");
299 ctx->close_parent = close_parent;
300
301 if ( boundary )
302 { if ( !(ctx->boundary = malloc(boundary_len*2+9)) )
303 { free_multipart_context(ctx);
304 return PL_resource_error("memory");
305 }
306 memcpy(ctx->boundary, "--", 2);
307 memcpy(ctx->boundary+2, boundary, boundary_len);
308 boundary_len += 2;
309 ctx->boundary_length = boundary_len;
310 ctx->boundary[boundary_len] = 0;
311 ctx->lookbehind = ctx->boundary+boundary_len+1;
312 }
313
314 if ( !(s2 = Snew(ctx,
315 (s->flags©_FLAGS)|SIO_FBUF,
316 &multipart_functions)) )
317 { free_multipart_context(ctx); /* no memory */
318
319 return FALSE;
320 }
321
322 s2->encoding = s->encoding;
323 ctx->parent_encoding = s->encoding;
324 s->encoding = ENC_OCTET;
325 ctx->multipart_stream = s2;
326 if ( PL_unify_stream(new, s2) )
327 { Sset_filter(s, s2);
328 PL_release_stream(s);
329
330 return TRUE;
331 } else if ( PL_exception(0) )
332 { return FALSE;
333 } else
334 { return PL_uninstantiation_error(new);
335 }
336 }
337
338
339 static foreign_t
multipart_open_next(term_t stream)340 multipart_open_next(term_t stream)
341 { IOSTREAM *s;
342 multipart_context *ctx;
343
344 if ( !PL_get_stream_handle(stream, &s) )
345 return FALSE;
346 if ( s->functions != &multipart_functions )
347 { PL_release_stream(s);
348 return PL_type_error("multipart_stream", stream);
349 }
350 ctx = s->handle;
351
352 switch ( ctx->state )
353 { case s_part_data_end:
354 { ctx->state = s_part_data_next;
355 Sclearerr(ctx->multipart_stream);
356 ctx->multipart_stream->encoding = ENC_OCTET;
357 return TRUE;
358 }
359 case s_end:
360 return FALSE;
361 default:
362 return PL_permission_error("open_next", "multi_part_stream", stream);
363 }
364 }
365
366
367 /*******************************
368 * INSTALL *
369 *******************************/
370
371 static void
install_multipart(void)372 install_multipart(void)
373 { ATOM_close_parent = PL_new_atom("close_parent");
374 ATOM_boundary = PL_new_atom("boundary");
375
376 PL_register_foreign("multipart_open", 3, multipart_open, 0);
377 PL_register_foreign("multipart_open_next", 1, multipart_open_next, 0);
378 }
379
380
381 /*******************************
382 * *
383 *******************************/
384
385 /* Based on node-formidable by Felix Geisendörfer
386 * Igor Afonov - afonov@gmail.com - 2012
387 * MIT License - http://www.opensource.org/licenses/mit-license.php
388 */
389
390 #define LF 10
391 #define CR 13
392
multipart_log(const char * format,...)393 static void multipart_log(const char * format, ...)
394 {
395 #ifdef DEBUG_MULTIPART
396 va_list args;
397 va_start(args, format);
398
399 DEBUG(2,
400 { Sdprintf("[HTTP_MULTIPART_PARSER] %s:%d: ", __FILE__, __LINE__);
401 Svdprintf(format, args);
402 Sdprintf("\n");
403 });
404
405 va_end(args);
406 #endif
407 }
408
409
410 static size_t
multipart_parser_execute(multipart_context * p,const char * buf,size_t len,char ** out,size_t * out_lenp)411 multipart_parser_execute(multipart_context* p,
412 const char *buf, size_t len,
413 char **out, size_t *out_lenp)
414 { size_t i = 0;
415 size_t mark = 0;
416 char c;
417 int is_last = 0;
418
419 #define NOTIFY_CB(f) (void)0
420 #define EMIT_DATA_CB(part_data, data, len) \
421 do { size_t _len = (len); \
422 char *_data = (char*)(data); \
423 if ( _len > *out_lenp ) \
424 { p->unprocessed = &_data[*out_lenp]; \
425 p->unprocessed_len = _len - *out_lenp; \
426 _len = *out_lenp; \
427 } \
428 memcpy(*out, _data, _len); \
429 (*out) += _len; \
430 (*out_lenp) -= _len; \
431 } while(0)
432
433 while(i < len && p->unprocessed_len == 0) {
434 c = buf[i];
435 is_last = (i == (len - 1));
436 switch (p->state) {
437 case s_start:
438 multipart_log("s_start");
439 p->index = 0;
440 p->state = s_start_boundary;
441
442 /* fallthrough */
443 case s_start_boundary:
444 multipart_log("s_start_boundary");
445 if (p->index == p->boundary_length) {
446 if (c != CR) {
447 return i;
448 }
449 p->index++;
450 break;
451 } else if (p->index == (p->boundary_length + 1)) {
452 if (c != LF) {
453 return i;
454 }
455 p->index = 0;
456 NOTIFY_CB(part_data_begin);
457 p->state = s_part_data_start;
458 break;
459 }
460 if (c != p->boundary[p->index]) {
461 /* Skip anything before the first boundary
462 RFC-1341 refers to this as the 'preamble' and says we should ignore it */
463 p->index = -1;
464 }
465 p->index++;
466 break;
467
468 case s_part_data_start:
469 multipart_log("s_part_data_start at %ld", (long)i);
470 mark = i;
471 p->state = s_part_data;
472
473 /* fallthrough */
474 case s_part_data:
475 multipart_log("s_part_data");
476 if (c == CR) {
477 EMIT_DATA_CB(part_data, buf + mark, i - mark);
478 mark = i;
479 p->state = s_part_data_almost_boundary;
480 p->lookbehind[0] = CR;
481 break;
482 }
483 if (is_last)
484 EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
485 break;
486
487 case s_part_data_almost_boundary:
488 multipart_log("s_part_data_almost_boundary");
489 if (c == LF) {
490 p->state = s_part_data_boundary;
491 p->lookbehind[1] = LF;
492 p->index = 0;
493 break;
494 }
495 EMIT_DATA_CB(part_data, p->lookbehind, 1);
496 p->state = s_part_data;
497 mark = i --;
498 break;
499
500 case s_part_data_boundary:
501 multipart_log("s_part_data_boundary");
502 if (p->boundary[p->index] != c) {
503 EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
504 p->state = s_part_data;
505 mark = i --;
506 break;
507 }
508 p->lookbehind[2 + p->index] = c;
509 if ((++ p->index) == p->boundary_length) {
510 NOTIFY_CB(part_data_end);
511 p->state = s_part_data_almost_end;
512 }
513 break;
514
515 case s_part_data_almost_end:
516 multipart_log("s_part_data_almost_end");
517 if (c == '-') {
518 p->state = s_part_data_final_hyphen;
519 break;
520 }
521 if (c == CR) {
522 p->state = s_part_data_end;
523 i++;
524 }
525 return i;
526
527 case s_part_data_final_hyphen:
528 multipart_log("s_part_data_final_hyphen");
529 if (c == '-') {
530 NOTIFY_CB(body_end);
531 p->state = s_end;
532 break;
533 }
534 return i;
535
536 case s_part_data_next:
537 multipart_log("s_part_data_next");
538 if (c == LF) {
539 p->state = s_part_data_start;
540 NOTIFY_CB(part_data_begin);
541 break;
542 }
543 return i;
544
545 case s_end:
546 multipart_log("s_end: %02X", (int) c);
547 break;
548
549 default:
550 multipart_log("Multipart parser unrecoverable error");
551 return 0;
552 }
553 ++ i;
554 }
555
556 return i;
557 }
558