1 /*
2  *  json_parser.cpp
3  *  PHD Guiding
4  *
5  *  Created by Andy Galasso
6  *  Copyright (c) 2013 Andy Galasso
7  *  All rights reserved.
8  *
9  *  This source code is distributed under the following "BSD" license
10  *  Redistribution and use in source and binary forms, with or without
11  *  modification, are permitted provided that the following conditions are met:
12  *    Redistributions of source code must retain the above copyright notice,
13  *     this list of conditions and the following disclaimer.
14  *    Redistributions in binary form must reproduce the above copyright notice,
15  *     this list of conditions and the following disclaimer in the
16  *     documentation and/or other materials provided with the distribution.
17  *    Neither the name of Craig Stark, Stark Labs nor the names of its
18  *     contributors may be used to endorse or promote products derived from
19  *     this software without specific prior written permission.
20  *
21  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  *  POSSIBILITY OF SUCH DAMAGE.
32  *
33  * This file contains a modified version of vjson, which comes with the following
34  * license:
35  *
36  * Copyright (c) 2010, Ivan Vashchaev
37  *
38  * Permission is hereby granted, free of charge, to any person obtaining a copy
39  *  of this software and associated documentation files (the "Software"), to deal
40  *  in the Software without restriction, including without limitation the rights
41  *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
42  *  copies of the Software, and to permit persons to whom the Software is
43  *  furnished to do so, subject to the following conditions:
44  *
45  *  The above copyright notice and this permission notice shall be included in
46  *  all copies or substantial portions of the Software.
47  *
48  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
49  *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
50  *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
51  *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
52  *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
53  *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
54  *  THE SOFTWARE.
55  */
56 
57 #include "phd.h"
58 #include "json_parser.h"
59 
60 #include <algorithm>
61 #include <memory.h>
62 
63 class block_allocator
64 {
65 private:
66     struct block
67     {
68         size_t size;
69         size_t used;
70         block *next;
71     };
72 
73     block *m_head;
74     size_t m_blocksize;
75 
76     block_allocator(const block_allocator &);
77     block_allocator &operator=(block_allocator &);
78 
79     // exchange contents with rhs
80     void swap(block_allocator &rhs);
81 
82 public:
83     block_allocator(size_t blocksize);
84     ~block_allocator();
85 
86     // allocate memory
87     void *malloc(size_t size);
88 
89     // reset to empty state, keeping one allocated block
90     void reset();
91 
92     // free all allocated blocks
93     void free();
94 };
95 
block_allocator(size_t blocksize)96 block_allocator::block_allocator(size_t blocksize): m_head(0), m_blocksize(blocksize)
97 {
98 }
99 
~block_allocator()100 block_allocator::~block_allocator()
101 {
102     while (m_head)
103     {
104         block *temp = m_head->next;
105         ::free(m_head);
106         m_head = temp;
107     }
108 }
109 
reset()110 void block_allocator::reset()
111 {
112     if (m_head)
113     {
114         block *b = m_head->next;
115         m_head->next = 0;
116         while (b)
117         {
118             block *t = b->next;
119             ::free(b);
120             b = t;
121         }
122         m_head->used = sizeof(block);
123     }
124 }
125 
swap(block_allocator & rhs)126 void block_allocator::swap(block_allocator& rhs)
127 {
128     std::swap(m_blocksize, rhs.m_blocksize);
129     std::swap(m_head, rhs.m_head);
130 }
131 
malloc(size_t size)132 void *block_allocator::malloc(size_t size)
133 {
134     if (!m_head || m_head->used + size > m_head->size)
135     {
136         // calc needed size for allocation
137         size_t alloc_size = std::max(sizeof(block) + size, m_blocksize);
138 
139         // create new block
140         block *b = (block *)::malloc(alloc_size);
141         b->size = alloc_size;
142         b->used = sizeof(block);
143         b->next = m_head;
144         m_head = b;
145     }
146 
147     void *ptr = (char *) m_head + m_head->used;
148     m_head->used += size;
149     return ptr;
150 }
151 
free()152 void block_allocator::free()
153 {
154     if (m_head)
155         block_allocator(0).swap(*this);
156 }
157 
158 // true if character represent a digit
159 #define IS_DIGIT(c) (c >= '0' && c <= '9')
160 
161 // convert string to integer
atoi(char * first,char * last,int * out)162 static char *atoi(char *first, char *last, int *out)
163 {
164     int sign = 1;
165     if (first != last)
166     {
167         if (*first == '-')
168         {
169             sign = -1;
170             ++first;
171         }
172         else if (*first == '+')
173         {
174             ++first;
175         }
176     }
177 
178     int result = 0;
179     for (; first != last && IS_DIGIT(*first); ++first)
180     {
181         result = 10 * result + (*first - '0');
182     }
183     *out = result * sign;
184 
185     return first;
186 }
187 
188 // convert hexadecimal string to unsigned integer
hatoui(char * first,char * last,unsigned int * out)189 static char *hatoui(char *first, char *last, unsigned int *out)
190 {
191     unsigned int result = 0;
192     for (; first != last; ++first)
193     {
194         int digit;
195         if (IS_DIGIT(*first))
196         {
197             digit = *first - '0';
198         }
199         else if (*first >= 'a' && *first <= 'f')
200         {
201             digit = *first - 'a' + 10;
202         }
203         else if (*first >= 'A' && *first <= 'F')
204         {
205             digit = *first - 'A' + 10;
206         }
207         else
208         {
209             break;
210         }
211         result = 16 * result + digit;
212     }
213     *out = result;
214 
215     return first;
216 }
217 
218 // convert string to floating point
atof(char * first,char * last,float * out)219 static char *atof(char *first, char *last, float *out)
220 {
221     // sign
222     float sign = 1;
223     if (first != last)
224     {
225         if (*first == '-')
226         {
227             sign = -1;
228             ++first;
229         }
230         else if (*first == '+')
231         {
232             ++first;
233         }
234     }
235 
236     // integer part
237     float result = 0;
238     for (; first != last && IS_DIGIT(*first); ++first)
239     {
240         result = 10 * result + (*first - '0');
241     }
242 
243     // fraction part
244     if (first != last && *first == '.')
245     {
246         ++first;
247 
248         float inv_base = 0.1f;
249         for (; first != last && IS_DIGIT(*first); ++first)
250         {
251             result += (*first - '0') * inv_base;
252             inv_base *= 0.1f;
253         }
254     }
255 
256     // result w\o exponent
257     result *= sign;
258 
259     // exponent
260     bool exponent_negative = false;
261     int exponent = 0;
262     if (first != last && (*first == 'e' || *first == 'E'))
263     {
264         ++first;
265 
266         if (*first == '-')
267         {
268             exponent_negative = true;
269             ++first;
270         }
271         else if (*first == '+')
272         {
273             ++first;
274         }
275 
276         for (; first != last && IS_DIGIT(*first); ++first)
277         {
278             exponent = 10 * exponent + (*first - '0');
279         }
280     }
281 
282     if (exponent)
283     {
284         float power_of_ten = 10;
285         for (; exponent > 1; exponent--)
286         {
287             power_of_ten *= 10;
288         }
289 
290         if (exponent_negative)
291         {
292             result /= power_of_ten;
293         }
294         else
295         {
296             result *= power_of_ten;
297         }
298     }
299 
300     *out = result;
301 
302     return first;
303 }
304 
json_alloc(block_allocator * allocator)305 static json_value *json_alloc(block_allocator *allocator)
306 {
307     json_value *value = (json_value *)allocator->malloc(sizeof(json_value));
308     memset(value, 0, sizeof(json_value));
309     return value;
310 }
311 
json_append(json_value * lhs,json_value * rhs)312 static void json_append(json_value *lhs, json_value *rhs)
313 {
314     rhs->parent = lhs;
315     if (lhs->last_child)
316     {
317         lhs->last_child = lhs->last_child->next_sibling = rhs;
318     }
319     else
320     {
321         lhs->first_child = lhs->last_child = rhs;
322     }
323 }
324 
325 #define JSON_ERROR(it, desc) do { \
326     *error_pos = it; \
327     *error_desc = desc; \
328     *error_line = 1 - escaped_newlines; \
329     for (const char *c = it; c != source; --c) \
330         if (*c == '\n') ++*error_line; \
331     return 0; \
332 } while (false)
333 
334 #define CHECK_TOP() if (!top) JSON_ERROR(it, "Unexpected character")
335 
json_parse(char * source,const char ** error_pos,const char ** error_desc,int * error_line,block_allocator * allocator)336 static json_value *json_parse(char *source, const char **error_pos,
337                               const char **error_desc, int *error_line,
338                               block_allocator *allocator)
339 {
340     json_value *root = 0;
341     json_value *top = 0;
342 
343     char *name = 0;
344     char *it = source;
345 
346     int escaped_newlines = 0;
347 
348     // skip leading whitespace
349     while (*it == '\x20' || *it == '\x9' || *it == '\xD' || *it == '\xA')
350     {
351         ++it;
352     }
353 
354     while (*it)
355     {
356         switch (*it)
357         {
358         case '{':
359         case '[':
360             {
361                 // create new value
362                 json_value *object = json_alloc(allocator);
363 
364                 // name
365                 object->name = name;
366                 name = 0;
367 
368                 // type
369                 object->type = (*it == '{') ? JSON_OBJECT : JSON_ARRAY;
370 
371                 // skip open character
372                 ++it;
373 
374                 // set top and root
375                 if (top)
376                 {
377                     json_append(top, object);
378                 }
379                 else if (!root)
380                 {
381                     root = object;
382                 }
383                 else
384                 {
385                     JSON_ERROR(it, "Second root. Only one root allowed");
386                 }
387                 top = object;
388             }
389             break;
390 
391         case '}':
392         case ']':
393             {
394                 if (!top || top->type != ((*it == '}') ? JSON_OBJECT : JSON_ARRAY))
395                 {
396                     JSON_ERROR(it, "Mismatch closing brace/bracket");
397                 }
398 
399                 // skip close character
400                 ++it;
401 
402                 // set top
403                 top = top->parent;
404             }
405             break;
406 
407         case ':':
408             if (!top || top->type != JSON_OBJECT || !name)
409             {
410                 JSON_ERROR(it, "Unexpected character");
411             }
412             ++it;
413             break;
414 
415         case ',':
416             CHECK_TOP();
417             ++it;
418             break;
419 
420         case '"':
421             {
422                 CHECK_TOP();
423 
424                 // skip '"' character
425                 ++it;
426 
427                 char *first = it;
428                 char *last = it;
429                 while (*it)
430                 {
431                     if ((unsigned char)*it < '\x20')
432                     {
433                         JSON_ERROR(first, "Control characters not allowed in strings");
434                     }
435                     else if (*it == '\\')
436                     {
437                         switch (it[1])
438                         {
439                         case '"':
440                             *last = '"';
441                             break;
442                         case '\\':
443                             *last = '\\';
444                             break;
445                         case '/':
446                             *last = '/';
447                             break;
448                         case 'b':
449                             *last = '\b';
450                             break;
451                         case 'f':
452                             *last = '\f';
453                             break;
454                         case 'n':
455                             *last = '\n';
456                             ++escaped_newlines;
457                             break;
458                         case 'r':
459                             *last = '\r';
460                             break;
461                         case 't':
462                             *last = '\t';
463                             break;
464                         case 'u':
465                             {
466                                 unsigned int codepoint;
467                                 if (hatoui(it + 2, it + 6, &codepoint) != it + 6)
468                                 {
469                                     JSON_ERROR(it, "Bad unicode codepoint");
470                                 }
471 
472                                 if (codepoint <= 0x7F)
473                                 {
474                                     *last = (char)codepoint;
475                                 }
476                                 else if (codepoint <= 0x7FF)
477                                 {
478                                     *last++ = (char)(0xC0 | (codepoint >> 6));
479                                     *last = (char)(0x80 | (codepoint & 0x3F));
480                                 }
481                                 else if (codepoint <= 0xFFFF)
482                                 {
483                                     *last++ = (char)(0xE0 | (codepoint >> 12));
484                                     *last++ = (char)(0x80 | ((codepoint >> 6) & 0x3F));
485                                     *last = (char)(0x80 | (codepoint & 0x3F));
486                                 }
487                             }
488                             it += 4;
489                             break;
490                         default:
491                             JSON_ERROR(first, "Unrecognized escape sequence");
492                         }
493 
494                         ++last;
495                         it += 2;
496                     }
497                     else if (*it == '"')
498                     {
499                         *last = 0;
500                         ++it;
501                         break;
502                     }
503                     else
504                     {
505                         *last++ = *it++;
506                     }
507                 }
508 
509                 if (!name && top->type == JSON_OBJECT)
510                 {
511                     // field name in object
512                     name = first;
513                 }
514                 else
515                 {
516                     // new string value
517                     json_value *object = json_alloc(allocator);
518 
519                     object->name = name;
520                     name = 0;
521 
522                     object->type = JSON_STRING;
523                     object->string_value = first;
524 
525                     json_append(top, object);
526                 }
527             }
528             break;
529 
530         case 'n':
531         case 't':
532         case 'f':
533             {
534                 CHECK_TOP();
535 
536                 // new null/bool value
537                 json_value *object = json_alloc(allocator);
538 
539                 if (top->type == JSON_OBJECT && !name)
540                 {
541                     JSON_ERROR(it, "Missing name");
542                 }
543 
544                 object->name = name;
545                 name = 0;
546 
547                 // null
548                 if (it[0] == 'n' && it[1] == 'u' && it[2] == 'l' && it[3] == 'l')
549                 {
550                     object->type = JSON_NULL;
551                     it += 4;
552                 }
553                 // true
554                 else if (it[0] == 't' && it[1] == 'r' && it[2] == 'u' && it[3] == 'e')
555                 {
556                     object->type = JSON_BOOL;
557                     object->int_value = 1;
558                     it += 4;
559                 }
560                 // false
561                 else if (it[0] == 'f' && it[1] == 'a' && it[2] == 'l' && it[3] == 's' && it[4] == 'e')
562                 {
563                     object->type = JSON_BOOL;
564                     object->int_value = 0;
565                     it += 5;
566                 }
567                 else
568                 {
569                     JSON_ERROR(it, "Unknown identifier");
570                 }
571 
572                 json_append(top, object);
573             }
574             break;
575 
576         case '-':
577         case '0':
578         case '1':
579         case '2':
580         case '3':
581         case '4':
582         case '5':
583         case '6':
584         case '7':
585         case '8':
586         case '9':
587             {
588                 CHECK_TOP();
589 
590                 // new number value
591                 json_value *object = json_alloc(allocator);
592 
593                 if (top->type == JSON_OBJECT && !name)
594                 {
595                     JSON_ERROR(it, "Missing name");
596                 }
597 
598                 object->name = name;
599                 name = 0;
600 
601                 object->type = JSON_INT;
602 
603                 char *first = it;
604                 while (*it != '\x20' && *it != '\x9' && *it != '\xD' && *it != '\xA' && *it != ',' && *it != ']' && *it != '}')
605                 {
606                     if (*it == '.' || *it == 'e' || *it == 'E')
607                     {
608                         object->type = JSON_FLOAT;
609                     }
610                     ++it;
611                 }
612 
613                 if (object->type == JSON_INT && atoi(first, it, &object->int_value) != it)
614                 {
615                     JSON_ERROR(first, "Bad integer number");
616                 }
617 
618                 if (object->type == JSON_FLOAT && atof(first, it, &object->float_value) != it)
619                 {
620                     JSON_ERROR(first, "Bad float number");
621                 }
622 
623                 json_append(top, object);
624             }
625             break;
626 
627         default:
628             JSON_ERROR(it, "Unexpected character");
629         }
630 
631         // skip white space
632         while (*it == '\x20' || *it == '\x9' || *it == '\xD' || *it == '\xA')
633         {
634             ++it;
635         }
636     }
637 
638     if (top)
639     {
640         JSON_ERROR(it, "Not all objects/arrays have been properly closed");
641     }
642 
643     if (!root)
644     {
645         JSON_ERROR(it, "empty string");
646     }
647 
648     return root;
649 }
650 
651 // ===== public interface ======
652 
653 struct JsonParserImpl
654 {
655     block_allocator alloc;
656     void *tmpbuf;
657 
658     json_value *root;
659 
660     const char *error_pos;
661     const char *error_desc;
662     int error_line;
663 
JsonParserImplJsonParserImpl664     JsonParserImpl() : alloc(4096), tmpbuf(nullptr) { }
~JsonParserImplJsonParserImpl665     ~JsonParserImpl() { if (tmpbuf) ::free(tmpbuf); }
666 };
667 
JsonParser()668 JsonParser::JsonParser()
669     : m_impl(new JsonParserImpl())
670 {
671 }
672 
~JsonParser()673 JsonParser::~JsonParser()
674 {
675     delete m_impl;
676 }
677 
Parse(char * str)678 bool JsonParser::Parse(char *str)
679 {
680     m_impl->alloc.reset();
681     m_impl->root = json_parse(str, &m_impl->error_pos, &m_impl->error_desc, &m_impl->error_line, &m_impl->alloc);
682     return m_impl->root != 0;
683 }
684 
Parse(const std::string & str)685 bool JsonParser::Parse(const std::string& str)
686 {
687     if (m_impl->tmpbuf)
688         ::free(m_impl->tmpbuf);
689     m_impl->tmpbuf = ::malloc(str.length() + 1);
690     memcpy(m_impl->tmpbuf, str.c_str(), str.length() + 1);
691     return Parse(static_cast<char *>(m_impl->tmpbuf));
692 }
693 
ErrorPos() const694 const char *JsonParser::ErrorPos() const
695 {
696     return m_impl->error_pos;
697 }
698 
ErrorDesc() const699 const char *JsonParser::ErrorDesc() const
700 {
701     return m_impl->error_desc;
702 }
703 
ErrorLine() const704 int JsonParser::ErrorLine() const
705 {
706     return m_impl->error_line;
707 }
708 
Root()709 const json_value *JsonParser::Root()
710 {
711     return m_impl->root;
712 }
713