1 /*
2  * This file Copyright (C) 2008-2014 Mnemosyne LLC
3  *
4  * It may be used under the GNU GPL versions 2 or 3
5  * or any future license endorsed by Mnemosyne LLC.
6  *
7  */
8 
9 #include <ctype.h>
10 #include <math.h> /* fabs() */
11 #include <stdio.h>
12 #include <string.h>
13 #include <errno.h> /* EILSEQ, EINVAL */
14 
15 #include <event2/buffer.h> /* evbuffer_add() */
16 #include <event2/util.h> /* evutil_strtoll() */
17 
18 #define JSONSL_STATE_USER_FIELDS /* no fields */
19 #include "jsonsl.h"
20 #include "jsonsl.c"
21 
22 #define __LIBTRANSMISSION_VARIANT_MODULE__
23 
24 #include "transmission.h"
25 #include "ConvertUTF.h"
26 #include "list.h"
27 #include "log.h"
28 #include "ptrarray.h"
29 #include "tr-assert.h"
30 #include "utils.h"
31 #include "variant.h"
32 #include "variant-common.h"
33 
34 /* arbitrary value... this is much deeper than our code goes */
35 #define MAX_DEPTH 64
36 
37 struct json_wrapper_data
38 {
39     int error;
40     bool has_content;
41     tr_variant* top;
42     char const* key;
43     size_t keylen;
44     struct evbuffer* keybuf;
45     struct evbuffer* strbuf;
46     char const* source;
47     tr_ptrArray stack;
48 
49     /* A very common pattern is for a container's children to be similar,
50      * e.g. they may all be objects with the same set of keys. So when
51      * a container is popped off the stack, remember its size to use as
52      * a preallocation heuristic for the next container at that depth. */
53     size_t preallocGuess[MAX_DEPTH];
54 };
55 
get_node(struct jsonsl_st * jsn)56 static tr_variant* get_node(struct jsonsl_st* jsn)
57 {
58     tr_variant* parent;
59     tr_variant* node = NULL;
60     struct json_wrapper_data* data = jsn->data;
61 
62     parent = tr_ptrArrayEmpty(&data->stack) ? NULL : tr_ptrArrayBack(&data->stack);
63 
64     if (parent == NULL)
65     {
66         node = data->top;
67     }
68     else if (tr_variantIsList(parent))
69     {
70         node = tr_variantListAdd(parent);
71     }
72     else if (tr_variantIsDict(parent) && data->key != NULL)
73     {
74         node = tr_variantDictAdd(parent, tr_quark_new(data->key, data->keylen));
75 
76         data->key = NULL;
77         data->keylen = 0;
78     }
79 
80     return node;
81 }
82 
error_handler(jsonsl_t jsn,jsonsl_error_t error,struct jsonsl_state_st * state UNUSED,jsonsl_char_t const * buf)83 static void error_handler(jsonsl_t jsn, jsonsl_error_t error, struct jsonsl_state_st* state UNUSED, jsonsl_char_t const* buf)
84 {
85     struct json_wrapper_data* data = jsn->data;
86 
87     if (data->source != NULL)
88     {
89         tr_logAddError("JSON parse failed in %s at pos %zu: %s -- remaining text \"%.16s\"", data->source, jsn->pos,
90             jsonsl_strerror(error), buf);
91     }
92     else
93     {
94         tr_logAddError("JSON parse failed at pos %zu: %s -- remaining text \"%.16s\"", jsn->pos, jsonsl_strerror(error), buf);
95     }
96 
97     data->error = EILSEQ;
98 }
99 
error_callback(jsonsl_t jsn,jsonsl_error_t error,struct jsonsl_state_st * state,jsonsl_char_t * at)100 static int error_callback(jsonsl_t jsn, jsonsl_error_t error, struct jsonsl_state_st* state, jsonsl_char_t* at)
101 {
102     error_handler(jsn, error, state, at);
103     return 0; /* bail */
104 }
105 
action_callback_PUSH(jsonsl_t jsn,jsonsl_action_t action UNUSED,struct jsonsl_state_st * state,jsonsl_char_t const * buf UNUSED)106 static void action_callback_PUSH(jsonsl_t jsn, jsonsl_action_t action UNUSED, struct jsonsl_state_st* state,
107     jsonsl_char_t const* buf UNUSED)
108 {
109     tr_variant* node;
110     struct json_wrapper_data* data = jsn->data;
111 
112     if ((state->type == JSONSL_T_LIST) || (state->type == JSONSL_T_OBJECT))
113     {
114         data->has_content = true;
115         node = get_node(jsn);
116         tr_ptrArrayAppend(&data->stack, node);
117 
118         int const depth = tr_ptrArraySize(&data->stack);
119         size_t const n = depth < MAX_DEPTH ? data->preallocGuess[depth] : 0;
120         if (state->type == JSONSL_T_LIST)
121         {
122             tr_variantInitList(node, n);
123         }
124         else
125         {
126             tr_variantInitDict(node, n);
127         }
128     }
129 }
130 
131 /* like sscanf(in+2, "%4x", &val) but less slow */
decode_hex_string(char const * in,unsigned int * setme)132 static bool decode_hex_string(char const* in, unsigned int* setme)
133 {
134     TR_ASSERT(in != NULL);
135 
136     unsigned int val = 0;
137     char const* const end = in + 6;
138 
139     TR_ASSERT(in[0] == '\\');
140     TR_ASSERT(in[1] == 'u');
141     in += 2;
142 
143     do
144     {
145         val <<= 4;
146 
147         if ('0' <= *in && *in <= '9')
148         {
149             val += *in - '0';
150         }
151         else if ('a' <= *in && *in <= 'f')
152         {
153             val += *in - 'a' + 10U;
154         }
155         else if ('A' <= *in && *in <= 'F')
156         {
157             val += *in - 'A' + 10U;
158         }
159         else
160         {
161             return false;
162         }
163     }
164     while (++in != end);
165 
166     *setme = val;
167     return true;
168 }
169 
extract_escaped_string(char const * in,size_t in_len,size_t * len,struct evbuffer * buf)170 static char* extract_escaped_string(char const* in, size_t in_len, size_t* len, struct evbuffer* buf)
171 {
172     char const* const in_end = in + in_len;
173 
174     evbuffer_drain(buf, evbuffer_get_length(buf));
175 
176     while (in < in_end)
177     {
178         bool unescaped = false;
179 
180         if (*in == '\\' && in_end - in >= 2)
181         {
182             switch (in[1])
183             {
184             case 'b':
185                 evbuffer_add(buf, "\b", 1);
186                 in += 2;
187                 unescaped = true;
188                 break;
189 
190             case 'f':
191                 evbuffer_add(buf, "\f", 1);
192                 in += 2;
193                 unescaped = true;
194                 break;
195 
196             case 'n':
197                 evbuffer_add(buf, "\n", 1);
198                 in += 2;
199                 unescaped = true;
200                 break;
201 
202             case 'r':
203                 evbuffer_add(buf, "\r", 1);
204                 in += 2;
205                 unescaped = true;
206                 break;
207 
208             case 't':
209                 evbuffer_add(buf, "\t", 1);
210                 in += 2;
211                 unescaped = true;
212                 break;
213 
214             case '/':
215                 evbuffer_add(buf, "/", 1);
216                 in += 2;
217                 unescaped = true;
218                 break;
219 
220             case '"':
221                 evbuffer_add(buf, "\"", 1);
222                 in += 2;
223                 unescaped = true;
224                 break;
225 
226             case '\\':
227                 evbuffer_add(buf, "\\", 1);
228                 in += 2;
229                 unescaped = true;
230                 break;
231 
232             case 'u':
233                 {
234                     if (in_end - in >= 6)
235                     {
236                         unsigned int val = 0;
237 
238                         if (decode_hex_string(in, &val))
239                         {
240                             UTF32 str32_buf[2] = { val, 0 };
241                             UTF32 const* str32_walk = str32_buf;
242                             UTF32 const* str32_end = str32_buf + 1;
243                             UTF8 str8_buf[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
244                             UTF8* str8_walk = str8_buf;
245                             UTF8* str8_end = str8_buf + 8;
246 
247                             if (ConvertUTF32toUTF8(&str32_walk, str32_end, &str8_walk, str8_end, 0) == 0)
248                             {
249                                 size_t const len = str8_walk - str8_buf;
250                                 evbuffer_add(buf, str8_buf, len);
251                                 unescaped = true;
252                             }
253 
254                             in += 6;
255                             break;
256                         }
257                     }
258                 }
259             }
260         }
261 
262         if (!unescaped)
263         {
264             evbuffer_add(buf, in, 1);
265             ++in;
266         }
267     }
268 
269     *len = evbuffer_get_length(buf);
270     return (char*)evbuffer_pullup(buf, -1);
271 }
272 
extract_string(jsonsl_t jsn,struct jsonsl_state_st * state,size_t * len,struct evbuffer * buf)273 static char const* extract_string(jsonsl_t jsn, struct jsonsl_state_st* state, size_t* len, struct evbuffer* buf)
274 {
275     char const* ret;
276     char const* in_begin;
277     char const* in_end;
278     size_t in_len;
279 
280     /* figure out where the string is */
281     in_begin = jsn->base + state->pos_begin;
282 
283     if (*in_begin == '"')
284     {
285         in_begin++;
286     }
287 
288     in_end = jsn->base + state->pos_cur;
289     in_len = in_end - in_begin;
290 
291     if (memchr(in_begin, '\\', in_len) == NULL)
292     {
293         /* it's not escaped */
294         ret = in_begin;
295         *len = in_len;
296     }
297     else
298     {
299         ret = extract_escaped_string(in_begin, in_len, len, buf);
300     }
301 
302     return ret;
303 }
304 
action_callback_POP(jsonsl_t jsn,jsonsl_action_t action UNUSED,struct jsonsl_state_st * state,jsonsl_char_t const * buf UNUSED)305 static void action_callback_POP(jsonsl_t jsn, jsonsl_action_t action UNUSED, struct jsonsl_state_st* state,
306     jsonsl_char_t const* buf UNUSED)
307 {
308     struct json_wrapper_data* data = jsn->data;
309 
310     if (state->type == JSONSL_T_STRING)
311     {
312         size_t len;
313         char const* str = extract_string(jsn, state, &len, data->strbuf);
314         tr_variantInitStr(get_node(jsn), str, len);
315         data->has_content = true;
316     }
317     else if (state->type == JSONSL_T_HKEY)
318     {
319         data->has_content = true;
320         data->key = extract_string(jsn, state, &data->keylen, data->keybuf);
321     }
322     else if (state->type == JSONSL_T_LIST || state->type == JSONSL_T_OBJECT)
323     {
324         int const depth = tr_ptrArraySize(&data->stack);
325         tr_variant const* v = tr_ptrArrayPop(&data->stack);
326         if (depth < MAX_DEPTH)
327         {
328             data->preallocGuess[depth] = v->val.l.count;
329         }
330     }
331     else if (state->type == JSONSL_T_SPECIAL)
332     {
333         if ((state->special_flags & JSONSL_SPECIALf_NUMNOINT) != 0)
334         {
335             char const* begin = jsn->base + state->pos_begin;
336             data->has_content = true;
337             tr_variantInitReal(get_node(jsn), strtod(begin, NULL));
338         }
339         else if ((state->special_flags & JSONSL_SPECIALf_NUMERIC) != 0)
340         {
341             char const* begin = jsn->base + state->pos_begin;
342             data->has_content = true;
343             tr_variantInitInt(get_node(jsn), evutil_strtoll(begin, NULL, 10));
344         }
345         else if ((state->special_flags & JSONSL_SPECIALf_BOOLEAN) != 0)
346         {
347             bool const b = (state->special_flags & JSONSL_SPECIALf_TRUE) != 0;
348             data->has_content = true;
349             tr_variantInitBool(get_node(jsn), b);
350         }
351         else if ((state->special_flags & JSONSL_SPECIALf_NULL) != 0)
352         {
353             data->has_content = true;
354             tr_variantInitQuark(get_node(jsn), TR_KEY_NONE);
355         }
356     }
357 }
358 
tr_jsonParse(char const * source,void const * vbuf,size_t len,tr_variant * setme_variant,char const ** setme_end)359 int tr_jsonParse(char const* source, void const* vbuf, size_t len, tr_variant* setme_variant, char const** setme_end)
360 {
361     int error;
362     jsonsl_t jsn;
363     struct json_wrapper_data data;
364 
365     jsn = jsonsl_new(MAX_DEPTH);
366     jsn->action_callback_PUSH = action_callback_PUSH;
367     jsn->action_callback_POP = action_callback_POP;
368     jsn->error_callback = error_callback;
369     jsn->data = &data;
370     jsonsl_enable_all_callbacks(jsn);
371 
372     data.error = 0;
373     data.has_content = false;
374     data.key = NULL;
375     data.top = setme_variant;
376     data.stack = TR_PTR_ARRAY_INIT;
377     data.source = source;
378     data.keybuf = evbuffer_new();
379     data.strbuf = evbuffer_new();
380     for (int i = 0; i < MAX_DEPTH; ++i)
381     {
382         data.preallocGuess[i] = 0;
383     }
384 
385     /* parse it */
386     jsonsl_feed(jsn, vbuf, len);
387 
388     /* EINVAL if there was no content */
389     if (data.error == 0 && !data.has_content)
390     {
391         data.error = EINVAL;
392     }
393 
394     /* maybe set the end ptr */
395     if (setme_end != NULL)
396     {
397         *setme_end = ((char const*)vbuf) + jsn->pos;
398     }
399 
400     /* cleanup */
401     error = data.error;
402     evbuffer_free(data.keybuf);
403     evbuffer_free(data.strbuf);
404     tr_ptrArrayDestruct(&data.stack, NULL);
405     jsonsl_destroy(jsn);
406     return error;
407 }
408 
409 /****
410 *****
411 ****/
412 
413 struct ParentState
414 {
415     int variantType;
416     int childIndex;
417     int childCount;
418 };
419 
420 struct jsonWalk
421 {
422     bool doIndent;
423     tr_list* parents;
424     struct evbuffer* out;
425 };
426 
jsonIndent(struct jsonWalk * data)427 static void jsonIndent(struct jsonWalk* data)
428 {
429     static char buf[1024] = { '\0' };
430 
431     if (*buf == '\0')
432     {
433         memset(buf, ' ', sizeof(buf));
434         buf[0] = '\n';
435     }
436 
437     if (data->doIndent)
438     {
439         evbuffer_add(data->out, buf, tr_list_size(data->parents) * 4 + 1);
440     }
441 }
442 
jsonChildFunc(struct jsonWalk * data)443 static void jsonChildFunc(struct jsonWalk* data)
444 {
445     if (data->parents != NULL && data->parents->data != NULL)
446     {
447         struct ParentState* pstate = data->parents->data;
448 
449         switch (pstate->variantType)
450         {
451         case TR_VARIANT_TYPE_DICT:
452             {
453                 int const i = pstate->childIndex;
454                 ++pstate->childIndex;
455 
456                 if (i % 2 == 0)
457                 {
458                     evbuffer_add(data->out, ": ", data->doIndent ? 2 : 1);
459                 }
460                 else
461                 {
462                     bool const isLast = pstate->childIndex == pstate->childCount;
463 
464                     if (!isLast)
465                     {
466                         evbuffer_add(data->out, ",", 1);
467                         jsonIndent(data);
468                     }
469                 }
470 
471                 break;
472             }
473 
474         case TR_VARIANT_TYPE_LIST:
475             {
476                 ++pstate->childIndex;
477                 bool const isLast = pstate->childIndex == pstate->childCount;
478 
479                 if (!isLast)
480                 {
481                     evbuffer_add(data->out, ",", 1);
482                     jsonIndent(data);
483                 }
484 
485                 break;
486             }
487 
488         default:
489             break;
490         }
491     }
492 }
493 
jsonPushParent(struct jsonWalk * data,tr_variant const * v)494 static void jsonPushParent(struct jsonWalk* data, tr_variant const* v)
495 {
496     struct ParentState* pstate = tr_new(struct ParentState, 1);
497 
498     pstate->variantType = v->type;
499     pstate->childIndex = 0;
500     pstate->childCount = v->val.l.count;
501 
502     if (tr_variantIsDict(v))
503     {
504         pstate->childCount *= 2;
505     }
506 
507     tr_list_prepend(&data->parents, pstate);
508 }
509 
jsonPopParent(struct jsonWalk * data)510 static void jsonPopParent(struct jsonWalk* data)
511 {
512     tr_free(tr_list_pop_front(&data->parents));
513 }
514 
jsonIntFunc(tr_variant const * val,void * vdata)515 static void jsonIntFunc(tr_variant const* val, void* vdata)
516 {
517     struct jsonWalk* data = vdata;
518     evbuffer_add_printf(data->out, "%" PRId64, val->val.i);
519     jsonChildFunc(data);
520 }
521 
jsonBoolFunc(tr_variant const * val,void * vdata)522 static void jsonBoolFunc(tr_variant const* val, void* vdata)
523 {
524     struct jsonWalk* data = vdata;
525 
526     if (val->val.b)
527     {
528         evbuffer_add(data->out, "true", 4);
529     }
530     else
531     {
532         evbuffer_add(data->out, "false", 5);
533     }
534 
535     jsonChildFunc(data);
536 }
537 
jsonRealFunc(tr_variant const * val,void * vdata)538 static void jsonRealFunc(tr_variant const* val, void* vdata)
539 {
540     struct jsonWalk* data = vdata;
541 
542     if (fabs(val->val.d - (int)val->val.d) < 0.00001)
543     {
544         evbuffer_add_printf(data->out, "%d", (int)val->val.d);
545     }
546     else
547     {
548         evbuffer_add_printf(data->out, "%.4f", tr_truncd(val->val.d, 4));
549     }
550 
551     jsonChildFunc(data);
552 }
553 
jsonStringFunc(tr_variant const * val,void * vdata)554 static void jsonStringFunc(tr_variant const* val, void* vdata)
555 {
556     char* out;
557     char* outwalk;
558     char* outend;
559     struct evbuffer_iovec vec[1];
560     struct jsonWalk* data = vdata;
561     char const* str;
562     size_t len;
563     unsigned char const* it;
564     unsigned char const* end;
565 
566     tr_variantGetStr(val, &str, &len);
567     it = (unsigned char const*)str;
568     end = it + len;
569 
570     evbuffer_reserve_space(data->out, len * 4, vec, 1);
571     out = vec[0].iov_base;
572     outend = out + vec[0].iov_len;
573 
574     outwalk = out;
575     *outwalk++ = '"';
576 
577     for (; it != end; ++it)
578     {
579         switch (*it)
580         {
581         case '\b':
582             *outwalk++ = '\\';
583             *outwalk++ = 'b';
584             break;
585 
586         case '\f':
587             *outwalk++ = '\\';
588             *outwalk++ = 'f';
589             break;
590 
591         case '\n':
592             *outwalk++ = '\\';
593             *outwalk++ = 'n';
594             break;
595 
596         case '\r':
597             *outwalk++ = '\\';
598             *outwalk++ = 'r';
599             break;
600 
601         case '\t':
602             *outwalk++ = '\\';
603             *outwalk++ = 't';
604             break;
605 
606         case '"':
607             *outwalk++ = '\\';
608             *outwalk++ = '"';
609             break;
610 
611         case '\\':
612             *outwalk++ = '\\';
613             *outwalk++ = '\\';
614             break;
615 
616         default:
617             if (isprint(*it))
618             {
619                 *outwalk++ = *it;
620             }
621             else
622             {
623                 UTF8 const* tmp = it;
624                 UTF32 buf[1] = { 0 };
625                 UTF32* u32 = buf;
626                 ConversionResult result = ConvertUTF8toUTF32(&tmp, end, &u32, buf + 1, 0);
627 
628                 if ((result == conversionOK || result == targetExhausted) && tmp != it)
629                 {
630                     outwalk += tr_snprintf(outwalk, outend - outwalk, "\\u%04x", (unsigned int)buf[0]);
631                     it = tmp - 1;
632                 }
633             }
634 
635             break;
636         }
637     }
638 
639     *outwalk++ = '"';
640     vec[0].iov_len = outwalk - out;
641     evbuffer_commit_space(data->out, vec, 1);
642 
643     jsonChildFunc(data);
644 }
645 
jsonDictBeginFunc(tr_variant const * val,void * vdata)646 static void jsonDictBeginFunc(tr_variant const* val, void* vdata)
647 {
648     struct jsonWalk* data = vdata;
649 
650     jsonPushParent(data, val);
651     evbuffer_add(data->out, "{", 1);
652 
653     if (val->val.l.count != 0)
654     {
655         jsonIndent(data);
656     }
657 }
658 
jsonListBeginFunc(tr_variant const * val,void * vdata)659 static void jsonListBeginFunc(tr_variant const* val, void* vdata)
660 {
661     size_t const nChildren = tr_variantListSize(val);
662     struct jsonWalk* data = vdata;
663 
664     jsonPushParent(data, val);
665     evbuffer_add(data->out, "[", 1);
666 
667     if (nChildren != 0)
668     {
669         jsonIndent(data);
670     }
671 }
672 
jsonContainerEndFunc(tr_variant const * val,void * vdata)673 static void jsonContainerEndFunc(tr_variant const* val, void* vdata)
674 {
675     struct jsonWalk* data = vdata;
676     bool emptyContainer = false;
677 
678     jsonPopParent(data);
679 
680     if (!emptyContainer)
681     {
682         jsonIndent(data);
683     }
684 
685     if (tr_variantIsDict(val))
686     {
687         evbuffer_add(data->out, "}", 1);
688     }
689     else /* list */
690     {
691         evbuffer_add(data->out, "]", 1);
692     }
693 
694     jsonChildFunc(data);
695 }
696 
697 static struct VariantWalkFuncs const walk_funcs =
698 {
699     jsonIntFunc,
700     jsonBoolFunc,
701     jsonRealFunc,
702     jsonStringFunc,
703     jsonDictBeginFunc,
704     jsonListBeginFunc,
705     jsonContainerEndFunc
706 };
707 
tr_variantToBufJson(tr_variant const * top,struct evbuffer * buf,bool lean)708 void tr_variantToBufJson(tr_variant const* top, struct evbuffer* buf, bool lean)
709 {
710     struct jsonWalk data;
711 
712     data.doIndent = !lean;
713     data.out = buf;
714     data.parents = NULL;
715 
716     tr_variantWalk(top, &walk_funcs, &data, true);
717 
718     if (evbuffer_get_length(buf) != 0)
719     {
720         evbuffer_add_printf(buf, "\n");
721     }
722 }
723