1 /*==============================================================================
2  *
3  *                            PUBLIC DOMAIN NOTICE
4  *               National Center for Biotechnology Information
5  *
6  *  This software/database is a "United States Government Work" under the
7  *  terms of the United States Copyright Act.  It was written as part of
8  *  the author's official duties as a United States Government employee and
9  *  thus cannot be copyrighted.  This software/database is freely available
10  *  to the public for use. The National Library of Medicine and the U.S.
11  *  Government have not placed any restriction on its use or reproduction.
12  *
13  *  Although all reasonable efforts have been taken to ensure the accuracy
14  *  and reliability of the software and data, the NLM and the U.S.
15  *  Government do not and cannot warrant the performance or results that
16  *  may be obtained by using this software or data. The NLM and the U.S.
17  *  Government disclaim all warranties, express or implied, including
18  *  warranties of performance, merchantability or fitness for any particular
19  *  purpose.
20  *
21  *  Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  */
26 
27 #include "json-priv.hpp"
28 #include <ncbi/secure/payload.hpp>
29 
30 #include <assert.h>
31 #include <ctype.h>
32 //#include <codecvt>
33 #include <locale>
34 #include <errno.h>
35 
36 namespace ncbi
37 {
38     static bool have_limits;
39     JSON :: Limits JSON :: default_limits;
40 
41     struct JSONError
42     {
JSONErrorncbi::JSONError43         JSONError ( const char * _what, const String :: Iterator & _curs )
44             : what ( _what )
45             , curs ( _curs )
46         {
47         }
48 
49         const char * what;
50         const String :: Iterator & curs;
51     };
52 
53     static
operator <<(XP & xp,const JSONError & x)54     XP & operator << ( XP & xp, const JSONError & x )
55     {
56         const String :: Iterator & curs = x . curs;
57         xp
58             << curs . charIndex ()
59             << ", byte offset "
60             << curs . byteOffset ()
61             << ": "
62             << x . what
63             ;
64         return xp;
65     }
66 
67     struct JSONExpected
68     {
JSONExpectedncbi::JSONExpected69         JSONExpected ( const char * _what, const String :: Iterator & _curs )
70             : what ( _what )
71             , curs ( _curs )
72         {
73         }
74 
75         const char * what;
76         const String :: Iterator & curs;
77     };
78 
79     static
operator <<(XP & xp,const JSONExpected & x)80     XP & operator << ( XP & xp, const JSONExpected & x )
81     {
82         const String :: Iterator & curs = x . curs;
83         xp
84             << curs . charIndex ()
85             << ", byte offset "
86             << curs . byteOffset ()
87             << " expected: "
88             << x . what
89             << " but found '"
90             << curs
91             << '\''
92             ;
93         if ( curs . isValid () )
94         {
95             xp
96                 << " ("
97                 << ( U32 ) * curs
98                 << ')'
99                 ;
100         }
101         return xp;
102     }
103 
104     struct JSONTrailingBytes
105     {
JSONTrailingBytesncbi::JSONTrailingBytes106         JSONTrailingBytes ( const String :: Iterator & _curs )
107             : curs ( _curs )
108         {
109         }
110 
111         const String :: Iterator & curs;
112     };
113 
114     static
operator <<(XP & xp,const JSONTrailingBytes & x)115     XP & operator << ( XP & xp, const JSONTrailingBytes & x )
116     {
117         const String :: Iterator & curs = x . curs;
118         xp
119             << curs . charIndex ()
120             << ", byte offset "
121             << curs . byteOffset ()
122             ;
123         return xp;
124     }
125 
126     // Without the leading string or other parameter for
127     // which there is an operator defined within the XP class,
128     // C++ has difficulty associating a global "operator <<".
129     // Making this into a macro to force a leading expression
130     // gets past the problem.
131 #define JSON_ERROR( what, curs ) \
132     "JSON: at character index " << JSONError ( what, curs )
133 #define JSON_EXPECTED( what, curs ) \
134     "JSON: at character index " << JSONExpected ( what, curs )
135 #define JSON_TRAILING_BYTES( curs ) \
136     "JSON: trailing bytes in text at character index " << JSONTrailingBytes ( curs )
137 #define JSON_LIMIT_VIOLATION( what, actual, lim ) \
138     what << " ( " << actual << " ) exceeds allowed limit ( " << lim << " )"
139 
double_to_string(long double val,unsigned int precision)140     String double_to_string ( long double val, unsigned int precision )
141     {
142         // TBD - come up with a more precise cutoff
143         if ( precision > 40 )
144             precision = 40;
145 
146         char buffer [ 1024 ];
147         int len = std :: snprintf ( buffer, sizeof buffer, "%.*Lg", precision, val );
148         if ( len < 0 )
149         {
150             int status = errno;
151             throw JSONInternalError (
152                 XP ( XLOC )
153                 << "failed to convert long double to string - "
154                 << syserr ( status )
155                 );
156         }
157         else if ( ( size_t ) len >= sizeof buffer )
158         {
159             throw JSONInternalError (
160                 XP ( XLOC )
161                 << "failed to convert long double to string - "
162                 << "1024-byte buffer was insufficient"
163                 );
164         }
165 
166         return String ( buffer, len );
167     }
168 
169     // skip whitespace
170     // adjust the cursor position until it reaches end or something other than whitespace
171     // returns true if text found ( not end of text )
172     static
skip_whitespace(String::Iterator & curs)173     bool skip_whitespace ( String :: Iterator & curs )
174     {
175         for ( ; curs . isValid (); ++ curs )
176         {
177             if ( ! iswspace ( * curs ) )
178                 return true;
179         }
180 
181         return false;
182     }
183 
184     static
htoi(UTF32 h)185     unsigned int htoi ( UTF32 h )
186     {
187         assert ( iswxdigit ( h ) );
188         if ( iswdigit ( h ) )
189             return h - '0';
190         return towupper ( h ) - 'A' + 10;
191     }
192 
193     static
htoi(UTF32 h1,UTF32 h2)194     unsigned int htoi ( UTF32 h1, UTF32 h2 )
195     {
196         return ( htoi ( h1 ) << 4 ) | htoi ( h2 );
197     }
198 
199     static
htoi(UTF32 h1,UTF32 h2,UTF32 h3,UTF32 h4)200     unsigned int htoi ( UTF32 h1, UTF32 h2, UTF32 h3, UTF32 h4 )
201     {
202         return ( htoi ( h1, h2 ) << 8 ) | htoi ( h3, h4 );
203     }
204 
205     static
hex_to_utf8(const JSON::Limits & lim,String::Iterator & curs,const String::Iterator & start)206     String hex_to_utf8 ( const JSON :: Limits & lim, String :: Iterator & curs,
207         const String :: Iterator & start )
208     {
209         assert ( start . isValid () );
210         assert ( curs . isValid () );
211 
212         Payload pay ( 256 );
213         UTF16 * buffer = ( UTF16 * ) pay . data ();
214         count_t bend = pay . capacity () / sizeof buffer [ 0 ];
215         count_t bidx = 0;
216 
217         unsigned int start_offset = ( unsigned int ) start . byteOffset ();
218         unsigned int start_index = ( unsigned int ) start . charIndex ();
219 
220         while ( curs . isValid () )
221         {
222             size_t str_size = curs . byteOffset () - start_offset;
223             if ( ( unsigned int ) str_size >= lim . string_size )
224             {
225                 throw JSONLimitViolation (
226                     XP ( XLOC )
227                     << JSON_LIMIT_VIOLATION ( "string size", str_size, lim . string_size )
228                     );
229             }
230 
231             count_t str_length = curs . charIndex () - start_index;
232             if ( ( unsigned int ) str_length >= lim . string_length )
233             {
234                 throw JSONLimitViolation (
235                     XP ( XLOC )
236                     << JSON_LIMIT_VIOLATION ( "string length", str_length, lim . string_length )
237                     );
238             }
239 
240             auto save = curs;
241             try
242             {
243                 // look for escape
244                 if ( * curs != '\\' )
245                     break;
246 
247                 // look for 'u'
248                 if ( * ++ curs != 'u' )
249                 {
250                     -- curs;
251                     break;
252                 }
253 
254                 // no turning back now.
255                 // expect 4 hex digits
256                 UTF32 h1 = * ++ curs;
257                 if ( ! iswxdigit ( h1 ) )
258                 {
259                     throw MalformedJSON (
260                         XP ( XLOC )
261                         << JSON_EXPECTED ( "hex digit", curs )
262                         );
263                 }
264 
265                 UTF32 h2 = * ++ curs;
266                 if ( ! iswxdigit ( h2 ) )
267                 {
268                     throw MalformedJSON (
269                         XP ( XLOC )
270                         << JSON_EXPECTED ( "hex digit", curs )
271                         );
272                 }
273 
274                 UTF32 h3 = * ++ curs;
275                 if ( ! iswxdigit ( h3 ) )
276                 {
277                     throw MalformedJSON (
278                         XP ( XLOC )
279                         << JSON_EXPECTED ( "hex digit", curs )
280                         );
281                 }
282 
283                 UTF32 h4 = * ++ curs;
284                 if ( ! iswxdigit ( h4 ) )
285                 {
286                     throw MalformedJSON (
287                         XP ( XLOC )
288                         << JSON_EXPECTED ( "hex digit", curs )
289                         );
290                 }
291 
292                 ++ curs;
293 
294                 // check buffer capacity
295                 if ( bidx == bend )
296                 {
297                     pay . increaseCapacity ();
298                     buffer = ( UTF16 * ) pay . data ();
299                     bend = pay . capacity () / sizeof buffer [ 0 ];
300                 }
301 
302                 // convert to a UTF-16 character
303                 buffer [ bidx ++ ] = htoi ( h1, h2, h3, h4 );
304 
305                 // record size update
306                 pay . setSize ( bidx * sizeof buffer [ 0 ] );
307 
308             }
309             catch ( BoundsException & x )
310             {
311                 curs = save;
312                 throw MalformedJSON (
313                     XP ( XLOC )
314                     << JSON_ERROR ( "bad escape sequence", curs )
315                     );
316             }
317             catch ( ... )
318             {
319                 curs = save;
320                 throw;
321             }
322         }
323 
324         // convert UTF-16 to UTF-8
325         return String ( buffer, bidx );
326     }
327 
328     static
test_depth(const JSON::Limits & lim,unsigned int & depth)329     void test_depth ( const JSON :: Limits & lim, unsigned int & depth )
330     {
331         if ( ++ depth > lim . recursion_depth )
332         {
333             throw JSONLimitViolation (
334                 XP ( XLOC )
335                 << JSON_LIMIT_VIOLATION ( "parsing recursion depth", depth, lim . recursion_depth )
336                 );
337         }
338     }
339 
parse(const String & json)340     JSONValueRef JSON :: parse ( const String & json )
341     {
342         // parse with default limits
343         initLimits ();
344         return parse ( default_limits, json );
345     }
346 
parse(const Limits & lim,const String & json)347     JSONValueRef JSON :: parse ( const Limits & lim, const String & json )
348     {
349         // refuse empty JSON
350         if ( json . isEmpty () )
351             throw MalformedJSON ( XP ( XLOC ) << "Empty JSON source" );
352 
353         // check up front if it is huge
354         if ( json . size () > ( size_t ) lim . json_string_size )
355         {
356             throw JSONLimitViolation (
357                 XP ( XLOC )
358                 << JSON_LIMIT_VIOLATION ( "JSON source size", json . size (), lim . json_string_size )
359                 );
360         }
361 
362         // walk across string with iterator
363         auto curs = json . makeIterator ();
364 
365         // all whitespace is as good as empty
366         if ( ! skip_whitespace ( curs ) )
367         {
368             throw MalformedJSON (
369                 XP ( XLOC )
370                 << JSON_EXPECTED ( "'{' or '['", curs )
371                 );
372         }
373 
374         // return value
375         JSONValueRef val;
376         switch ( * curs )
377         {
378         case '{':
379             val = parseObject ( lim, json, curs, 0 ) . release ();
380             break;
381         case '[':
382             val = parseArray ( lim, json, curs, 0 ) . release ();
383             break;
384         case '"':
385             val = parseString ( lim, json, curs );
386             break;
387         case 'f':
388         case 't':
389             val = parseBoolean ( curs );
390             break;
391         case '-':
392             val = parseNumber ( lim, json, curs );
393             break;
394         case 'n':
395             val = parseNull ( curs );
396             break;
397         default:
398             if ( iswdigit ( * curs ) )
399             {
400                 val = parseNumber ( lim, json, curs );
401                 break;
402             }
403 
404             throw MalformedJSON (
405                 XP ( XLOC )
406                 << JSON_EXPECTED ( "'{' or '['", curs )
407                 );
408         }
409 
410         // reject if any trailing characters are left
411         if ( skip_whitespace ( curs ) )
412         {
413             throw MalformedJSON (
414                 XP ( XLOC )
415                 << JSON_TRAILING_BYTES ( curs )
416                 );
417         }
418 
419         return val;
420     }
421 
parseArray(const String & json)422     JSONArrayRef JSON :: parseArray ( const String & json )
423     {
424         // parse with default limits
425         initLimits ();
426         return parseArray ( default_limits, json );
427     }
428 
parseArray(const Limits & lim,const String & json)429     JSONArrayRef JSON :: parseArray ( const Limits & lim, const String & json )
430     {
431         // refuse empty JSON
432         if ( json . isEmpty () )
433             throw MalformedJSON ( XP ( XLOC ) << "Empty JSON source" );
434 
435         // check up front if it is huge
436         if ( json . size () > ( size_t ) lim . json_string_size )
437         {
438             throw JSONLimitViolation (
439                 XP ( XLOC )
440                 << JSON_LIMIT_VIOLATION ( "JSON source size", json . size (), lim . json_string_size )
441                 );
442         }
443 
444         // walk across string with iterator
445         auto curs = json . makeIterator ();
446 
447         // all whitespace is as good as empty
448         if ( ! skip_whitespace ( curs ) )
449         {
450             throw MalformedJSON (
451                 XP ( XLOC )
452                 << JSON_EXPECTED ( "'['", curs )
453                 );
454         }
455 
456         // return array
457         JSONArrayRef array;
458         switch ( * curs )
459         {
460         case '[':
461             array = parseArray ( lim, json, curs, 0 );
462             break;
463         default:
464             throw MalformedJSON (
465                 XP ( XLOC )
466                 << JSON_EXPECTED ( "'['", curs )
467                 );
468         }
469 
470         // reject if any trailing characters are left
471         if ( skip_whitespace ( curs ) )
472         {
473             throw MalformedJSON (
474                 XP ( XLOC )
475                 << JSON_TRAILING_BYTES ( curs )
476                 );
477         }
478 
479         return array;
480     }
481 
parseObject(const String & json)482     JSONObjectRef JSON :: parseObject ( const String & json )
483     {
484         // parse with default limits
485         initLimits ();
486         return parseObject ( default_limits, json );
487     }
488 
parseObject(const Limits & lim,const String & json)489     JSONObjectRef JSON :: parseObject ( const Limits & lim, const String & json )
490     {
491         // refuse empty JSON
492         if ( json . isEmpty () )
493             throw MalformedJSON ( XP ( XLOC ) << "Empty JSON source" );
494 
495         // check up front if it is huge
496         if ( json . size () > ( size_t ) lim . json_string_size )
497         {
498             throw JSONLimitViolation (
499                 XP ( XLOC )
500                 << JSON_LIMIT_VIOLATION ( "JSON source size", json . size (), lim . json_string_size )
501                 );
502         }
503 
504         // walk across string with iterator
505         auto curs = json . makeIterator ();
506 
507         // all whitespace is as good as empty
508         if ( ! skip_whitespace ( curs ) )
509         {
510             throw MalformedJSON (
511                 XP ( XLOC )
512                 << JSON_EXPECTED ( "'{'", curs )
513                 );
514         }
515 
516         // return object
517         JSONObjectRef obj;
518         switch ( * curs )
519         {
520         case '{':
521             obj = parseObject ( lim, json, curs, 0 );
522             break;
523         default:
524             throw MalformedJSON (
525                 XP ( XLOC )
526                 << JSON_EXPECTED ( "'{'", curs )
527                 );
528         }
529 
530         // reject if any trailing characters are left
531         if ( skip_whitespace ( curs ) )
532         {
533             throw MalformedJSON (
534                 XP ( XLOC )
535                 << JSON_TRAILING_BYTES ( curs )
536                 );
537         }
538 
539         return obj;
540     }
541 
makeNull()542     JSONValueRef JSON :: makeNull ()
543     {
544         // create a wrapper with no value
545         return JSONValueRef ( new JSONWrapper ( jvt_null ) );
546     }
547 
makeBoolean(bool val)548     JSONValueRef JSON :: makeBoolean ( bool val )
549     {
550         // create a wrapper with a Boolean value
551         return JSONValueRef ( new JSONWrapper ( jvt_bool, new JSONBoolean ( val ) ) );
552     }
553 
makeNumber(const String & val)554     JSONValueRef JSON :: makeNumber ( const String & val )
555     {
556         // parse a number from a string using default limits
557         initLimits ();
558         auto curs = val . makeIterator ();
559         return parseNumber ( default_limits, val, curs );
560     }
561 
makeInteger(long long int val)562     JSONValueRef JSON :: makeInteger ( long long int val )
563     {
564         // make an integer from binary value
565         return JSONValueRef ( new JSONWrapper ( jvt_int, new JSONInteger ( val ) ) );
566     }
567 
makeDouble(long double val,unsigned int precision)568     JSONValueRef JSON :: makeDouble ( long double val, unsigned int precision )
569     {
570         // make a textual number from binary value and precision
571         return makeParsedNumber ( double_to_string ( val, precision ) );
572     }
573 
makeString(const String & str)574     JSONValueRef JSON :: makeString ( const String & str )
575     {
576         initLimits ();
577 
578         // check string size limit
579         if ( str . size () > default_limits . string_size )
580         {
581             throw JSONLimitViolation (
582                 XP ( XLOC )
583                 << JSON_LIMIT_VIOLATION ( "string size", str . size (), default_limits . string_size )
584                 );
585         }
586 
587         // check string length limit
588         if ( str . length () > default_limits . string_length )
589         {
590             throw JSONLimitViolation (
591                 XP ( XLOC )
592                 << JSON_LIMIT_VIOLATION ( "string length", str . length (), default_limits . string_length )
593                 );
594         }
595 
596         // make a string from outside
597         return makeParsedString ( str );
598     }
599 
makeArray()600     JSONArrayRef JSON :: makeArray ()
601     {
602         // make a new empty array
603         return JSONArrayRef ( new JSONArray () );
604     }
605 
makeObject()606     JSONObjectRef JSON :: makeObject ()
607     {
608         // make a new empty object
609         return JSONObjectRef ( new JSONObject () );
610     }
611 
initLimits()612     void JSON :: initLimits ()
613     {
614         if ( ! have_limits )
615             default_limits = Limits ();
616     }
617 
parse(const Limits & lim,const String & json,String::Iterator & curs,unsigned int depth)618     JSONValueRef JSON :: parse ( const Limits & lim, const String & json,
619         String :: Iterator & curs, unsigned int depth )
620     {
621         // parse potentially empty text
622         if ( skip_whitespace ( curs ) )
623         {
624             switch ( * curs )
625             {
626                 case '{':
627                     return parseObject ( lim, json, curs, depth ) . release ();
628                 case '[':
629                     return parseArray ( lim, json, curs, depth ) . release ();
630                 case '"':
631                     return parseString ( lim, json, curs );
632                 case 'f':
633                 case 't':
634                     return parseBoolean ( curs );
635                 case '-':
636                     return parseNumber ( lim, json, curs );
637                 case 'n':
638                     return parseNull ( curs );
639                 default:
640                     if ( iswdigit ( * curs ) )
641                         return parseNumber ( lim, json, curs );
642 
643                     // garbage
644                     throw MalformedJSON (
645                         XP ( XLOC )
646                         << JSON_EXPECTED ( "'{' or '[' or '\"' or 'true' or 'false' or number", curs )
647                         );
648             }
649         }
650 
651         // this is where we need to check returns for nullptr
652         return JSONValueRef ( nullptr );
653     }
654 
parseNull(String::Iterator & curs)655     JSONValueRef JSON :: parseNull ( String :: Iterator & curs )
656     {
657         do
658         {
659             try
660             {
661                 // spell 'n' 'u' 'l' 'l' one character at a time
662                 assert ( * curs == 'n' );
663                 if ( * ++ curs != 'u' )
664                     break;
665                 if ( * ++ curs != 'l' )
666                     break;
667                 if ( * ++ curs != 'l' )
668                     break;
669                 ++ curs;
670 
671                 // ensure that it doesn't appear to continue
672                 if ( curs . isValid () && iswalnum ( * curs ) )
673                     break;
674 
675                 // make a null element
676                 return makeNull ();
677             }
678             catch ( ... )
679             {
680             }
681 
682         }
683         while ( false );
684 
685         // bad JSON
686         throw MalformedJSON (
687             XP ( XLOC )
688             << JSON_EXPECTED ( "keyword 'null'", curs )
689             ) ;
690     }
691 
parseBoolean(String::Iterator & curs)692     JSONValueRef JSON :: parseBoolean ( String :: Iterator & curs )
693     {
694         bool which = false;
695         do
696         {
697             try
698             {
699                 if ( * curs == 't' )
700                 {
701                     which = true;
702                     if ( * ++ curs != 'r' )
703                         break;
704                     if ( * ++ curs != 'u' )
705                         break;
706                     if ( * ++ curs != 'e' )
707                         break;
708                 }
709                 else
710                 {
711                     which = false;
712                     assert ( * curs == 'f' );
713                     if ( * ++ curs != 'a' )
714                         break;
715                     if ( * ++ curs != 'l' )
716                         break;
717                     if ( * ++ curs != 's' )
718                         break;
719                     if ( * ++ curs != 'e' )
720                         break;
721                 }
722 
723                 ++ curs;
724 
725                 // ensure that it doesn't appear to continue
726                 if ( curs . isValid () && iswalnum ( * curs ) )
727                     break;
728 
729                 return makeBoolean ( which );
730             }
731             catch ( ... )
732             {
733             }
734 
735         }
736         while ( false );
737 
738         // bad JSON
739         const char * what = which ?
740             "keyword 'true'" : "keyword 'false'";
741         throw MalformedJSON (
742             XP ( XLOC )
743             << JSON_EXPECTED ( what, curs )
744             ) ;
745     }
746 
parseNumber(const Limits & lim,const String & json,String::Iterator & curs)747     JSONValueRef JSON :: parseNumber ( const Limits & lim,
748         const String & json, String :: Iterator & curs )
749     {
750         assert ( iswdigit ( * curs ) || * curs == '-' );
751 
752         // record starting position within string
753         count_t start = curs . charIndex ();
754 
755         // skip over negation
756         if ( * curs == '-' )
757             ++ curs;
758 
759         if ( ! curs . isValid () || ! iswdigit ( * curs ) )
760         {
761             throw MalformedJSON (
762                 XP ( XLOC )
763                 << JSON_EXPECTED ( "digit", curs )
764                 );
765         }
766 
767         // check for 0
768         if ( * curs == '0' )
769             ++ curs;
770         else
771         {
772             // we know from the tests above that val is 1..9
773             assert ( * curs >= '1' && * curs <= '9' );
774             ++ curs;
775 
776             // just find the end of the number
777             while ( curs . isValid () && iswdigit ( * curs ) )
778                 ++ curs;
779         }
780 
781         // declare a cursor to peek ahead
782         auto peek = curs;
783 
784         bool is_float = false;
785         if ( peek . isValid () )
786         {
787             switch ( * peek )
788             {
789             case '.':
790             {
791                 // skip digits in search of float indicator
792                 ++ peek;
793                 while ( peek . isValid () && iswdigit ( * peek ) )
794                 {
795                     ++ peek;
796                     is_float = true;
797                 }
798 
799                 // must have at least one digit
800                 if ( ! is_float )
801                     break; // we have an integer
802 
803                 // if a character other than was [eE] found, break
804                 if ( peek . isValid () && towupper ( * peek ) != 'E' )
805                     break;
806 
807                 // no break - we have an [eE], fall through
808             }
809             case 'E':
810             case 'e':
811             {
812                 ++ peek;
813                 if ( peek . isValid () )
814                 {
815                     switch ( * peek )
816                     {
817                     case '+':
818                     case '-':
819                         ++ peek;
820                         break;
821                     }
822                 }
823 
824                 while ( peek . isValid () && iswdigit ( * peek ) )
825                 {
826                     ++ peek;
827                     is_float = true;
828                 }
829 
830                 break;
831             }}
832         }
833 
834         // update curs if we found floating point
835         if ( is_float )
836             curs = peek;
837 
838         // check the number of total characters
839         count_t num_length = curs . charIndex () - start;
840         if ( num_length > lim . numeral_length )
841         {
842             throw JSONLimitViolation (
843                 XP ( XLOC )
844                 << JSON_LIMIT_VIOLATION ( "numeral length", num_length, lim . numeral_length )
845                 );
846         }
847 
848         // this is the numeric string
849         String num_str = json . subString ( start, curs . charIndex () - start );
850         if ( ! is_float )
851         {
852             try
853             {
854                 // try to convert it to a binary integer
855                 long long int num = decToLongLongInteger ( num_str );
856                 return makeInteger ( num );
857             }
858             catch ( OverflowException & x )
859             {
860                 // too big - fall out
861             }
862         }
863 
864         // keep it as a string
865         return makeParsedNumber ( num_str );
866     }
867 
parseString(const Limits & lim,const String & json,String::Iterator & curs)868     JSONValueRef JSON :: parseString ( const Limits & lim,
869         const String & json, String :: Iterator & curs )
870     {
871         assert ( * curs == '"' );
872 
873         // accumulate text here
874         StringBuffer sb;
875 
876         // mark the start of the string
877         String :: Iterator start ( ++ curs );
878 
879         // a look-ahead
880         String :: Iterator delim = curs;
881 
882         // Find ending '"' or escaped characters
883         if ( ! delim . findFirstOf ( "\\\"" ) )
884         {
885             throw MalformedJSON (
886                 XP ( XLOC )
887                 << JSON_ERROR ( "unterminated string", delim )
888                 );
889         }
890 
891         while ( 1 )
892         {
893             // add everything before the delimiter to the new string
894             size_t proj_size = sb . size () + ( delim . byteOffset () - curs . byteOffset () );
895             if ( proj_size > lim . string_size )
896             {
897                 throw JSONLimitViolation (
898                     XP ( XLOC )
899                     << JSON_LIMIT_VIOLATION ( "string size", proj_size, lim . string_size )
900                     );
901             }
902 
903             count_t proj_len = sb . length () + ( delim - curs );
904             if ( proj_len > lim . string_length )
905             {
906                 throw JSONLimitViolation (
907                     XP ( XLOC )
908                     << JSON_LIMIT_VIOLATION ( "string length", proj_len, lim . string_length )
909                     );
910             }
911 
912             sb += json . subString ( curs . charIndex (), delim - curs );
913             curs = delim;
914 
915             // found end of string
916             if ( * curs != '\\' )
917                 break;
918 
919             // found '\'
920             bool advance = true;
921             switch ( * ++ curs )
922             {
923                 case '"':
924                     sb += '"';
925                     break;
926                 case '\\':
927                     sb += '\\';
928                     break;
929                 case '/':
930                     sb += '/';
931                     break;
932                 case 'b':
933                     sb += '\b';
934                     break;
935                 case 'f':
936                     sb += '\f';
937                     break;
938                 case 'n':
939                     sb += '\n';
940                     break;
941                 case 'r':
942                     sb += '\r';
943                     break;
944                 case 't':
945                     sb += '\t';
946                     break;
947                 case 'u':
948                 {
949                     // back up to escape for regular pattern
950                     -- curs;
951                     String utf8 = hex_to_utf8 ( lim, curs, start );
952                     sb += utf8;
953 
954                     // prepare for skip ahead below
955                     advance = false;
956                     break;
957                 }
958 
959                 default:
960                     -- curs;
961                     throw MalformedJSON (
962                         XP ( XLOC )
963                         << JSON_ERROR ( "Invalid escape character", curs )
964                         << ' '
965                         << curs
966                         << "' ("
967                         << ( U32 ) * curs
968                         << ')'
969                         );
970             }
971 
972             // skip escaped character
973             if ( advance )
974                 ++ curs;
975 
976             // Find ending '"' or control characters
977             if ( ! ( delim = curs ) . findFirstOf ( "\\\"" ) )
978             {
979                 throw MalformedJSON (
980                     XP ( XLOC )
981                     << JSON_ERROR ( "unterminated string", delim )
982                     );
983             }
984         }
985 
986         // being here should mean that we had a break above
987         // on the line looking for a backslash
988         assert ( delim == curs );
989 
990         // because "esc" is identical to "curs"
991         // and because "esc" found either a backslash or quote,
992         // and because backslash kept us in the loop, we know
993         // that the current character must be a closing quote.
994         assert ( * curs == '"' );
995 
996         // set pos to point to next token
997         ++ curs;
998 
999         if ( sb . size () > lim . string_size )
1000         {
1001             throw JSONLimitViolation (
1002                 XP ( XLOC )
1003                 << JSON_LIMIT_VIOLATION ( "string size", sb . size (), lim . string_size )
1004                 );
1005         }
1006         if ( sb . length () > lim . string_length )
1007         {
1008             throw JSONLimitViolation (
1009                 XP ( XLOC )
1010                 << JSON_LIMIT_VIOLATION ( "string length", sb . length (), lim . string_length )
1011                 );
1012         }
1013 
1014         return makeParsedString ( sb . stealString () );
1015     }
1016 
parseArray(const Limits & lim,const String & json,String::Iterator & curs,unsigned int depth)1017     JSONArrayRef JSON :: parseArray ( const Limits & lim, const String & json,
1018         String :: Iterator & curs, unsigned int depth )
1019     {
1020         assert ( * curs == '[' );
1021 
1022         JSONArrayRef array ( new JSONArray () );
1023         while ( 1 )
1024         {
1025             // skip over '[' and any whitespace
1026             // * curs is known to be  '[' or ','
1027             if ( ! skip_whitespace ( ++ curs ) )
1028             {
1029                 throw MalformedJSON (
1030                     XP ( XLOC )
1031                     << JSON_EXPECTED ( "']'", curs )
1032                     );
1033             }
1034 
1035             // allow an empty array
1036             if ( * curs == ']' )
1037                 break;
1038 
1039             // use scope to invalidate value
1040             {
1041                 JSONValueRef value = parse ( lim, json, curs, depth );
1042                 if ( value == nullptr )
1043                 {
1044                     throw MalformedJSON (
1045                         XP ( XLOC )
1046                         << JSON_EXPECTED ( "',' or ']'", curs )
1047                         );
1048                 }
1049 
1050                 array -> appendValue ( value );
1051 
1052                 if ( array -> count () > lim . array_elem_count )
1053                 {
1054                     throw JSONLimitViolation (
1055                         XP ( XLOC )
1056                         << JSON_LIMIT_VIOLATION ( "array element count", array -> count (), lim . array_elem_count )
1057                         );
1058                 }
1059             }
1060 
1061             // find and skip over ',' and skip any whitespace
1062             // exit loop if no ',' found
1063             if ( ! skip_whitespace ( curs ) || * curs != ',' )
1064                 break;
1065         }
1066 
1067         // must end on ']'
1068         if ( ! curs . isValid () || * curs != ']' )
1069         {
1070             throw MalformedJSON (
1071                 XP ( XLOC )
1072                 << JSON_EXPECTED ( "']'", curs )
1073                 );
1074         }
1075 
1076         // skip over ']'
1077         ++ curs;
1078 
1079         // JSONArray must be valid
1080         assert ( array != nullptr );
1081         return array;
1082     }
1083 
parseObject(const Limits & lim,const String & json,String::Iterator & curs,unsigned int depth)1084     JSONObjectRef JSON :: parseObject ( const Limits & lim, const String & json,
1085         String :: Iterator & curs, unsigned int depth )
1086     {
1087         test_depth ( lim, depth );
1088 
1089         assert ( * curs == '{' );
1090 
1091         JSONObjectRef obj ( new JSONObject () );
1092         while ( 1 )
1093         {
1094             // skip over '{' and any whitespace
1095             // json [ pos ] is '{' or ',', start at json [ pos + 1 ]
1096             if ( ! skip_whitespace ( ++ curs ) )
1097             {
1098                 throw MalformedJSON (
1099                     XP ( XLOC )
1100                     << JSON_EXPECTED ( "'}'", curs )
1101                     );
1102             }
1103 
1104             if ( * curs == '}' )
1105                 break;
1106 
1107             if ( * curs != '"' )
1108             {
1109                 throw MalformedJSON (
1110                     XP ( XLOC )
1111                     << JSON_EXPECTED ( "\"<name>\"", curs )
1112                     );
1113             }
1114 
1115             JSONValueRef name = parseString ( lim, json, curs );
1116 
1117             // skip to ':'
1118             if ( ! skip_whitespace ( curs ) || * curs != ':' )
1119             {
1120                 throw MalformedJSON (
1121                     XP ( XLOC )
1122                     << JSON_EXPECTED ( "':'", curs )
1123                     );
1124             }
1125 
1126             // skip over ':'
1127             ++ curs;
1128 
1129             // get JSON value;
1130             {
1131                 JSONValueRef value = parse ( lim, json, curs, depth );
1132                 if ( value == nullptr )
1133                 {
1134                     throw MalformedJSON (
1135                         XP ( XLOC )
1136                         << JSON_EXPECTED ( "',' or '}'", curs )
1137                         );
1138                 }
1139 
1140                 obj -> addValue ( name -> toString (), value );
1141             }
1142 
1143             if ( obj -> count () > lim . object_mbr_count )
1144             {
1145                 throw JSONLimitViolation (
1146                     XP ( XLOC )
1147                     << JSON_LIMIT_VIOLATION ( "object member count", obj -> count (), lim . object_mbr_count )
1148                     );
1149             }
1150 
1151             // find and skip over ',' and skip any whitespace
1152             // exit loop if no ',' found
1153             if ( ! skip_whitespace ( curs ) || * curs != ',' )
1154                 break;
1155         }
1156 
1157         // must end on '}'
1158         if ( ! curs . isValid () || * curs != '}' )
1159         {
1160             throw MalformedJSON (
1161                 XP ( XLOC )
1162                 << JSON_EXPECTED ( "'}'", curs )
1163                 );
1164         }
1165 
1166         // skip over '}'
1167         ++ curs;
1168 
1169         // JSONObject must be valid
1170         assert ( obj != nullptr );
1171         return obj;
1172     }
1173 
makeParsedNumber(const String & val)1174     JSONValueRef JSON :: makeParsedNumber ( const String & val )
1175     {
1176         // numeric string is already validated,
1177         // so just create the value and wrapper
1178         return JSONValueRef ( new JSONWrapper ( jvt_num, new JSONNumber ( val ) ) );
1179     }
1180 
makeParsedString(const String & val)1181     JSONValueRef JSON :: makeParsedString ( const String & val )
1182     {
1183         // string is already valid/transformed
1184         // just create a value and wrapper
1185         return JSONValueRef ( new JSONWrapper ( jvt_str, new JSONString ( val ) ) );
1186     }
1187 
test_parse(const String & json,bool consume_all)1188     JSONValueRef JSON :: test_parse ( const String & json, bool consume_all )
1189     {
1190         if ( json . isEmpty () )
1191             throw MalformedJSON ( XP ( XLOC ) << "Empty JSON source" );
1192 
1193         initLimits ();
1194 
1195         if ( json . size () > default_limits . json_string_size )
1196         {
1197             throw JSONLimitViolation (
1198                 XP ( XLOC )
1199                 << JSON_LIMIT_VIOLATION ( "JSON source size", json . size (), default_limits . json_string_size )
1200                 );
1201         }
1202 
1203         auto curs = json . makeIterator ();
1204         JSONValueRef val = parse ( default_limits, json, curs, 0 );
1205 
1206         if ( consume_all && skip_whitespace ( curs ) )
1207         {
1208             throw MalformedJSON (
1209                 XP ( XLOC )
1210                 << JSON_TRAILING_BYTES ( curs )
1211                 );
1212         }
1213 
1214         return val;
1215     }
1216 
Limits()1217     JSON :: Limits :: Limits ()
1218         : json_string_size ( 4 * 1024 * 1024 )
1219         , recursion_depth ( 32 )
1220         , numeral_length ( 256 )
1221         , string_size ( 64 * 1024 )
1222         , string_length ( 64 * 1024 )
1223         , array_elem_count ( 4 * 1024 )
1224         , object_mbr_count ( 256 )
1225     {
1226         have_limits = true;
1227     }
1228 }
1229 
1230 
1231