1 /*============================================================================== 2 * 3 * PUBLIC DOMAIN NOTICE 4 * National Center for Biotechnology Information 5 * 6 * This software/database is a "United States Government Work" under the 7 * terms of the United States Copyright Act. It was written as part of 8 * the author's official duties as a United States Government employee and 9 * thus cannot be copyrighted. This software/database is freely available 10 * to the public for use. The National Library of Medicine and the U.S. 11 * Government have not placed any restriction on its use or reproduction. 12 * 13 * Although all reasonable efforts have been taken to ensure the accuracy 14 * and reliability of the software and data, the NLM and the U.S. 15 * Government do not and cannot warrant the performance or results that 16 * may be obtained by using this software or data. The NLM and the U.S. 17 * Government disclaim all warranties, express or implied, including 18 * warranties of performance, merchantability or fitness for any particular 19 * purpose. 20 * 21 * Please cite the author in any work or product based on this material. 22 * 23 * =========================================================================== 24 * 25 */ 26 27 #include "json-priv.hpp" 28 #include <ncbi/secure/payload.hpp> 29 30 #include <assert.h> 31 #include <ctype.h> 32 //#include <codecvt> 33 #include <locale> 34 #include <errno.h> 35 36 namespace ncbi 37 { 38 static bool have_limits; 39 JSON :: Limits JSON :: default_limits; 40 41 struct JSONError 42 { JSONErrorncbi::JSONError43 JSONError ( const char * _what, const String :: Iterator & _curs ) 44 : what ( _what ) 45 , curs ( _curs ) 46 { 47 } 48 49 const char * what; 50 const String :: Iterator & curs; 51 }; 52 53 static operator <<(XP & xp,const JSONError & x)54 XP & operator << ( XP & xp, const JSONError & x ) 55 { 56 const String :: Iterator & curs = x . curs; 57 xp 58 << curs . charIndex () 59 << ", byte offset " 60 << curs . byteOffset () 61 << ": " 62 << x . what 63 ; 64 return xp; 65 } 66 67 struct JSONExpected 68 { JSONExpectedncbi::JSONExpected69 JSONExpected ( const char * _what, const String :: Iterator & _curs ) 70 : what ( _what ) 71 , curs ( _curs ) 72 { 73 } 74 75 const char * what; 76 const String :: Iterator & curs; 77 }; 78 79 static operator <<(XP & xp,const JSONExpected & x)80 XP & operator << ( XP & xp, const JSONExpected & x ) 81 { 82 const String :: Iterator & curs = x . curs; 83 xp 84 << curs . charIndex () 85 << ", byte offset " 86 << curs . byteOffset () 87 << " expected: " 88 << x . what 89 << " but found '" 90 << curs 91 << '\'' 92 ; 93 if ( curs . isValid () ) 94 { 95 xp 96 << " (" 97 << ( U32 ) * curs 98 << ')' 99 ; 100 } 101 return xp; 102 } 103 104 struct JSONTrailingBytes 105 { JSONTrailingBytesncbi::JSONTrailingBytes106 JSONTrailingBytes ( const String :: Iterator & _curs ) 107 : curs ( _curs ) 108 { 109 } 110 111 const String :: Iterator & curs; 112 }; 113 114 static operator <<(XP & xp,const JSONTrailingBytes & x)115 XP & operator << ( XP & xp, const JSONTrailingBytes & x ) 116 { 117 const String :: Iterator & curs = x . curs; 118 xp 119 << curs . charIndex () 120 << ", byte offset " 121 << curs . byteOffset () 122 ; 123 return xp; 124 } 125 126 // Without the leading string or other parameter for 127 // which there is an operator defined within the XP class, 128 // C++ has difficulty associating a global "operator <<". 129 // Making this into a macro to force a leading expression 130 // gets past the problem. 131 #define JSON_ERROR( what, curs ) \ 132 "JSON: at character index " << JSONError ( what, curs ) 133 #define JSON_EXPECTED( what, curs ) \ 134 "JSON: at character index " << JSONExpected ( what, curs ) 135 #define JSON_TRAILING_BYTES( curs ) \ 136 "JSON: trailing bytes in text at character index " << JSONTrailingBytes ( curs ) 137 #define JSON_LIMIT_VIOLATION( what, actual, lim ) \ 138 what << " ( " << actual << " ) exceeds allowed limit ( " << lim << " )" 139 double_to_string(long double val,unsigned int precision)140 String double_to_string ( long double val, unsigned int precision ) 141 { 142 // TBD - come up with a more precise cutoff 143 if ( precision > 40 ) 144 precision = 40; 145 146 char buffer [ 1024 ]; 147 int len = std :: snprintf ( buffer, sizeof buffer, "%.*Lg", precision, val ); 148 if ( len < 0 ) 149 { 150 int status = errno; 151 throw JSONInternalError ( 152 XP ( XLOC ) 153 << "failed to convert long double to string - " 154 << syserr ( status ) 155 ); 156 } 157 else if ( ( size_t ) len >= sizeof buffer ) 158 { 159 throw JSONInternalError ( 160 XP ( XLOC ) 161 << "failed to convert long double to string - " 162 << "1024-byte buffer was insufficient" 163 ); 164 } 165 166 return String ( buffer, len ); 167 } 168 169 // skip whitespace 170 // adjust the cursor position until it reaches end or something other than whitespace 171 // returns true if text found ( not end of text ) 172 static skip_whitespace(String::Iterator & curs)173 bool skip_whitespace ( String :: Iterator & curs ) 174 { 175 for ( ; curs . isValid (); ++ curs ) 176 { 177 if ( ! iswspace ( * curs ) ) 178 return true; 179 } 180 181 return false; 182 } 183 184 static htoi(UTF32 h)185 unsigned int htoi ( UTF32 h ) 186 { 187 assert ( iswxdigit ( h ) ); 188 if ( iswdigit ( h ) ) 189 return h - '0'; 190 return towupper ( h ) - 'A' + 10; 191 } 192 193 static htoi(UTF32 h1,UTF32 h2)194 unsigned int htoi ( UTF32 h1, UTF32 h2 ) 195 { 196 return ( htoi ( h1 ) << 4 ) | htoi ( h2 ); 197 } 198 199 static htoi(UTF32 h1,UTF32 h2,UTF32 h3,UTF32 h4)200 unsigned int htoi ( UTF32 h1, UTF32 h2, UTF32 h3, UTF32 h4 ) 201 { 202 return ( htoi ( h1, h2 ) << 8 ) | htoi ( h3, h4 ); 203 } 204 205 static hex_to_utf8(const JSON::Limits & lim,String::Iterator & curs,const String::Iterator & start)206 String hex_to_utf8 ( const JSON :: Limits & lim, String :: Iterator & curs, 207 const String :: Iterator & start ) 208 { 209 assert ( start . isValid () ); 210 assert ( curs . isValid () ); 211 212 Payload pay ( 256 ); 213 UTF16 * buffer = ( UTF16 * ) pay . data (); 214 count_t bend = pay . capacity () / sizeof buffer [ 0 ]; 215 count_t bidx = 0; 216 217 unsigned int start_offset = ( unsigned int ) start . byteOffset (); 218 unsigned int start_index = ( unsigned int ) start . charIndex (); 219 220 while ( curs . isValid () ) 221 { 222 size_t str_size = curs . byteOffset () - start_offset; 223 if ( ( unsigned int ) str_size >= lim . string_size ) 224 { 225 throw JSONLimitViolation ( 226 XP ( XLOC ) 227 << JSON_LIMIT_VIOLATION ( "string size", str_size, lim . string_size ) 228 ); 229 } 230 231 count_t str_length = curs . charIndex () - start_index; 232 if ( ( unsigned int ) str_length >= lim . string_length ) 233 { 234 throw JSONLimitViolation ( 235 XP ( XLOC ) 236 << JSON_LIMIT_VIOLATION ( "string length", str_length, lim . string_length ) 237 ); 238 } 239 240 auto save = curs; 241 try 242 { 243 // look for escape 244 if ( * curs != '\\' ) 245 break; 246 247 // look for 'u' 248 if ( * ++ curs != 'u' ) 249 { 250 -- curs; 251 break; 252 } 253 254 // no turning back now. 255 // expect 4 hex digits 256 UTF32 h1 = * ++ curs; 257 if ( ! iswxdigit ( h1 ) ) 258 { 259 throw MalformedJSON ( 260 XP ( XLOC ) 261 << JSON_EXPECTED ( "hex digit", curs ) 262 ); 263 } 264 265 UTF32 h2 = * ++ curs; 266 if ( ! iswxdigit ( h2 ) ) 267 { 268 throw MalformedJSON ( 269 XP ( XLOC ) 270 << JSON_EXPECTED ( "hex digit", curs ) 271 ); 272 } 273 274 UTF32 h3 = * ++ curs; 275 if ( ! iswxdigit ( h3 ) ) 276 { 277 throw MalformedJSON ( 278 XP ( XLOC ) 279 << JSON_EXPECTED ( "hex digit", curs ) 280 ); 281 } 282 283 UTF32 h4 = * ++ curs; 284 if ( ! iswxdigit ( h4 ) ) 285 { 286 throw MalformedJSON ( 287 XP ( XLOC ) 288 << JSON_EXPECTED ( "hex digit", curs ) 289 ); 290 } 291 292 ++ curs; 293 294 // check buffer capacity 295 if ( bidx == bend ) 296 { 297 pay . increaseCapacity (); 298 buffer = ( UTF16 * ) pay . data (); 299 bend = pay . capacity () / sizeof buffer [ 0 ]; 300 } 301 302 // convert to a UTF-16 character 303 buffer [ bidx ++ ] = htoi ( h1, h2, h3, h4 ); 304 305 // record size update 306 pay . setSize ( bidx * sizeof buffer [ 0 ] ); 307 308 } 309 catch ( BoundsException & x ) 310 { 311 curs = save; 312 throw MalformedJSON ( 313 XP ( XLOC ) 314 << JSON_ERROR ( "bad escape sequence", curs ) 315 ); 316 } 317 catch ( ... ) 318 { 319 curs = save; 320 throw; 321 } 322 } 323 324 // convert UTF-16 to UTF-8 325 return String ( buffer, bidx ); 326 } 327 328 static test_depth(const JSON::Limits & lim,unsigned int & depth)329 void test_depth ( const JSON :: Limits & lim, unsigned int & depth ) 330 { 331 if ( ++ depth > lim . recursion_depth ) 332 { 333 throw JSONLimitViolation ( 334 XP ( XLOC ) 335 << JSON_LIMIT_VIOLATION ( "parsing recursion depth", depth, lim . recursion_depth ) 336 ); 337 } 338 } 339 parse(const String & json)340 JSONValueRef JSON :: parse ( const String & json ) 341 { 342 // parse with default limits 343 initLimits (); 344 return parse ( default_limits, json ); 345 } 346 parse(const Limits & lim,const String & json)347 JSONValueRef JSON :: parse ( const Limits & lim, const String & json ) 348 { 349 // refuse empty JSON 350 if ( json . isEmpty () ) 351 throw MalformedJSON ( XP ( XLOC ) << "Empty JSON source" ); 352 353 // check up front if it is huge 354 if ( json . size () > ( size_t ) lim . json_string_size ) 355 { 356 throw JSONLimitViolation ( 357 XP ( XLOC ) 358 << JSON_LIMIT_VIOLATION ( "JSON source size", json . size (), lim . json_string_size ) 359 ); 360 } 361 362 // walk across string with iterator 363 auto curs = json . makeIterator (); 364 365 // all whitespace is as good as empty 366 if ( ! skip_whitespace ( curs ) ) 367 { 368 throw MalformedJSON ( 369 XP ( XLOC ) 370 << JSON_EXPECTED ( "'{' or '['", curs ) 371 ); 372 } 373 374 // return value 375 JSONValueRef val; 376 switch ( * curs ) 377 { 378 case '{': 379 val = parseObject ( lim, json, curs, 0 ) . release (); 380 break; 381 case '[': 382 val = parseArray ( lim, json, curs, 0 ) . release (); 383 break; 384 case '"': 385 val = parseString ( lim, json, curs ); 386 break; 387 case 'f': 388 case 't': 389 val = parseBoolean ( curs ); 390 break; 391 case '-': 392 val = parseNumber ( lim, json, curs ); 393 break; 394 case 'n': 395 val = parseNull ( curs ); 396 break; 397 default: 398 if ( iswdigit ( * curs ) ) 399 { 400 val = parseNumber ( lim, json, curs ); 401 break; 402 } 403 404 throw MalformedJSON ( 405 XP ( XLOC ) 406 << JSON_EXPECTED ( "'{' or '['", curs ) 407 ); 408 } 409 410 // reject if any trailing characters are left 411 if ( skip_whitespace ( curs ) ) 412 { 413 throw MalformedJSON ( 414 XP ( XLOC ) 415 << JSON_TRAILING_BYTES ( curs ) 416 ); 417 } 418 419 return val; 420 } 421 parseArray(const String & json)422 JSONArrayRef JSON :: parseArray ( const String & json ) 423 { 424 // parse with default limits 425 initLimits (); 426 return parseArray ( default_limits, json ); 427 } 428 parseArray(const Limits & lim,const String & json)429 JSONArrayRef JSON :: parseArray ( const Limits & lim, const String & json ) 430 { 431 // refuse empty JSON 432 if ( json . isEmpty () ) 433 throw MalformedJSON ( XP ( XLOC ) << "Empty JSON source" ); 434 435 // check up front if it is huge 436 if ( json . size () > ( size_t ) lim . json_string_size ) 437 { 438 throw JSONLimitViolation ( 439 XP ( XLOC ) 440 << JSON_LIMIT_VIOLATION ( "JSON source size", json . size (), lim . json_string_size ) 441 ); 442 } 443 444 // walk across string with iterator 445 auto curs = json . makeIterator (); 446 447 // all whitespace is as good as empty 448 if ( ! skip_whitespace ( curs ) ) 449 { 450 throw MalformedJSON ( 451 XP ( XLOC ) 452 << JSON_EXPECTED ( "'['", curs ) 453 ); 454 } 455 456 // return array 457 JSONArrayRef array; 458 switch ( * curs ) 459 { 460 case '[': 461 array = parseArray ( lim, json, curs, 0 ); 462 break; 463 default: 464 throw MalformedJSON ( 465 XP ( XLOC ) 466 << JSON_EXPECTED ( "'['", curs ) 467 ); 468 } 469 470 // reject if any trailing characters are left 471 if ( skip_whitespace ( curs ) ) 472 { 473 throw MalformedJSON ( 474 XP ( XLOC ) 475 << JSON_TRAILING_BYTES ( curs ) 476 ); 477 } 478 479 return array; 480 } 481 parseObject(const String & json)482 JSONObjectRef JSON :: parseObject ( const String & json ) 483 { 484 // parse with default limits 485 initLimits (); 486 return parseObject ( default_limits, json ); 487 } 488 parseObject(const Limits & lim,const String & json)489 JSONObjectRef JSON :: parseObject ( const Limits & lim, const String & json ) 490 { 491 // refuse empty JSON 492 if ( json . isEmpty () ) 493 throw MalformedJSON ( XP ( XLOC ) << "Empty JSON source" ); 494 495 // check up front if it is huge 496 if ( json . size () > ( size_t ) lim . json_string_size ) 497 { 498 throw JSONLimitViolation ( 499 XP ( XLOC ) 500 << JSON_LIMIT_VIOLATION ( "JSON source size", json . size (), lim . json_string_size ) 501 ); 502 } 503 504 // walk across string with iterator 505 auto curs = json . makeIterator (); 506 507 // all whitespace is as good as empty 508 if ( ! skip_whitespace ( curs ) ) 509 { 510 throw MalformedJSON ( 511 XP ( XLOC ) 512 << JSON_EXPECTED ( "'{'", curs ) 513 ); 514 } 515 516 // return object 517 JSONObjectRef obj; 518 switch ( * curs ) 519 { 520 case '{': 521 obj = parseObject ( lim, json, curs, 0 ); 522 break; 523 default: 524 throw MalformedJSON ( 525 XP ( XLOC ) 526 << JSON_EXPECTED ( "'{'", curs ) 527 ); 528 } 529 530 // reject if any trailing characters are left 531 if ( skip_whitespace ( curs ) ) 532 { 533 throw MalformedJSON ( 534 XP ( XLOC ) 535 << JSON_TRAILING_BYTES ( curs ) 536 ); 537 } 538 539 return obj; 540 } 541 makeNull()542 JSONValueRef JSON :: makeNull () 543 { 544 // create a wrapper with no value 545 return JSONValueRef ( new JSONWrapper ( jvt_null ) ); 546 } 547 makeBoolean(bool val)548 JSONValueRef JSON :: makeBoolean ( bool val ) 549 { 550 // create a wrapper with a Boolean value 551 return JSONValueRef ( new JSONWrapper ( jvt_bool, new JSONBoolean ( val ) ) ); 552 } 553 makeNumber(const String & val)554 JSONValueRef JSON :: makeNumber ( const String & val ) 555 { 556 // parse a number from a string using default limits 557 initLimits (); 558 auto curs = val . makeIterator (); 559 return parseNumber ( default_limits, val, curs ); 560 } 561 makeInteger(long long int val)562 JSONValueRef JSON :: makeInteger ( long long int val ) 563 { 564 // make an integer from binary value 565 return JSONValueRef ( new JSONWrapper ( jvt_int, new JSONInteger ( val ) ) ); 566 } 567 makeDouble(long double val,unsigned int precision)568 JSONValueRef JSON :: makeDouble ( long double val, unsigned int precision ) 569 { 570 // make a textual number from binary value and precision 571 return makeParsedNumber ( double_to_string ( val, precision ) ); 572 } 573 makeString(const String & str)574 JSONValueRef JSON :: makeString ( const String & str ) 575 { 576 initLimits (); 577 578 // check string size limit 579 if ( str . size () > default_limits . string_size ) 580 { 581 throw JSONLimitViolation ( 582 XP ( XLOC ) 583 << JSON_LIMIT_VIOLATION ( "string size", str . size (), default_limits . string_size ) 584 ); 585 } 586 587 // check string length limit 588 if ( str . length () > default_limits . string_length ) 589 { 590 throw JSONLimitViolation ( 591 XP ( XLOC ) 592 << JSON_LIMIT_VIOLATION ( "string length", str . length (), default_limits . string_length ) 593 ); 594 } 595 596 // make a string from outside 597 return makeParsedString ( str ); 598 } 599 makeArray()600 JSONArrayRef JSON :: makeArray () 601 { 602 // make a new empty array 603 return JSONArrayRef ( new JSONArray () ); 604 } 605 makeObject()606 JSONObjectRef JSON :: makeObject () 607 { 608 // make a new empty object 609 return JSONObjectRef ( new JSONObject () ); 610 } 611 initLimits()612 void JSON :: initLimits () 613 { 614 if ( ! have_limits ) 615 default_limits = Limits (); 616 } 617 parse(const Limits & lim,const String & json,String::Iterator & curs,unsigned int depth)618 JSONValueRef JSON :: parse ( const Limits & lim, const String & json, 619 String :: Iterator & curs, unsigned int depth ) 620 { 621 // parse potentially empty text 622 if ( skip_whitespace ( curs ) ) 623 { 624 switch ( * curs ) 625 { 626 case '{': 627 return parseObject ( lim, json, curs, depth ) . release (); 628 case '[': 629 return parseArray ( lim, json, curs, depth ) . release (); 630 case '"': 631 return parseString ( lim, json, curs ); 632 case 'f': 633 case 't': 634 return parseBoolean ( curs ); 635 case '-': 636 return parseNumber ( lim, json, curs ); 637 case 'n': 638 return parseNull ( curs ); 639 default: 640 if ( iswdigit ( * curs ) ) 641 return parseNumber ( lim, json, curs ); 642 643 // garbage 644 throw MalformedJSON ( 645 XP ( XLOC ) 646 << JSON_EXPECTED ( "'{' or '[' or '\"' or 'true' or 'false' or number", curs ) 647 ); 648 } 649 } 650 651 // this is where we need to check returns for nullptr 652 return JSONValueRef ( nullptr ); 653 } 654 parseNull(String::Iterator & curs)655 JSONValueRef JSON :: parseNull ( String :: Iterator & curs ) 656 { 657 do 658 { 659 try 660 { 661 // spell 'n' 'u' 'l' 'l' one character at a time 662 assert ( * curs == 'n' ); 663 if ( * ++ curs != 'u' ) 664 break; 665 if ( * ++ curs != 'l' ) 666 break; 667 if ( * ++ curs != 'l' ) 668 break; 669 ++ curs; 670 671 // ensure that it doesn't appear to continue 672 if ( curs . isValid () && iswalnum ( * curs ) ) 673 break; 674 675 // make a null element 676 return makeNull (); 677 } 678 catch ( ... ) 679 { 680 } 681 682 } 683 while ( false ); 684 685 // bad JSON 686 throw MalformedJSON ( 687 XP ( XLOC ) 688 << JSON_EXPECTED ( "keyword 'null'", curs ) 689 ) ; 690 } 691 parseBoolean(String::Iterator & curs)692 JSONValueRef JSON :: parseBoolean ( String :: Iterator & curs ) 693 { 694 bool which = false; 695 do 696 { 697 try 698 { 699 if ( * curs == 't' ) 700 { 701 which = true; 702 if ( * ++ curs != 'r' ) 703 break; 704 if ( * ++ curs != 'u' ) 705 break; 706 if ( * ++ curs != 'e' ) 707 break; 708 } 709 else 710 { 711 which = false; 712 assert ( * curs == 'f' ); 713 if ( * ++ curs != 'a' ) 714 break; 715 if ( * ++ curs != 'l' ) 716 break; 717 if ( * ++ curs != 's' ) 718 break; 719 if ( * ++ curs != 'e' ) 720 break; 721 } 722 723 ++ curs; 724 725 // ensure that it doesn't appear to continue 726 if ( curs . isValid () && iswalnum ( * curs ) ) 727 break; 728 729 return makeBoolean ( which ); 730 } 731 catch ( ... ) 732 { 733 } 734 735 } 736 while ( false ); 737 738 // bad JSON 739 const char * what = which ? 740 "keyword 'true'" : "keyword 'false'"; 741 throw MalformedJSON ( 742 XP ( XLOC ) 743 << JSON_EXPECTED ( what, curs ) 744 ) ; 745 } 746 parseNumber(const Limits & lim,const String & json,String::Iterator & curs)747 JSONValueRef JSON :: parseNumber ( const Limits & lim, 748 const String & json, String :: Iterator & curs ) 749 { 750 assert ( iswdigit ( * curs ) || * curs == '-' ); 751 752 // record starting position within string 753 count_t start = curs . charIndex (); 754 755 // skip over negation 756 if ( * curs == '-' ) 757 ++ curs; 758 759 if ( ! curs . isValid () || ! iswdigit ( * curs ) ) 760 { 761 throw MalformedJSON ( 762 XP ( XLOC ) 763 << JSON_EXPECTED ( "digit", curs ) 764 ); 765 } 766 767 // check for 0 768 if ( * curs == '0' ) 769 ++ curs; 770 else 771 { 772 // we know from the tests above that val is 1..9 773 assert ( * curs >= '1' && * curs <= '9' ); 774 ++ curs; 775 776 // just find the end of the number 777 while ( curs . isValid () && iswdigit ( * curs ) ) 778 ++ curs; 779 } 780 781 // declare a cursor to peek ahead 782 auto peek = curs; 783 784 bool is_float = false; 785 if ( peek . isValid () ) 786 { 787 switch ( * peek ) 788 { 789 case '.': 790 { 791 // skip digits in search of float indicator 792 ++ peek; 793 while ( peek . isValid () && iswdigit ( * peek ) ) 794 { 795 ++ peek; 796 is_float = true; 797 } 798 799 // must have at least one digit 800 if ( ! is_float ) 801 break; // we have an integer 802 803 // if a character other than was [eE] found, break 804 if ( peek . isValid () && towupper ( * peek ) != 'E' ) 805 break; 806 807 // no break - we have an [eE], fall through 808 } 809 case 'E': 810 case 'e': 811 { 812 ++ peek; 813 if ( peek . isValid () ) 814 { 815 switch ( * peek ) 816 { 817 case '+': 818 case '-': 819 ++ peek; 820 break; 821 } 822 } 823 824 while ( peek . isValid () && iswdigit ( * peek ) ) 825 { 826 ++ peek; 827 is_float = true; 828 } 829 830 break; 831 }} 832 } 833 834 // update curs if we found floating point 835 if ( is_float ) 836 curs = peek; 837 838 // check the number of total characters 839 count_t num_length = curs . charIndex () - start; 840 if ( num_length > lim . numeral_length ) 841 { 842 throw JSONLimitViolation ( 843 XP ( XLOC ) 844 << JSON_LIMIT_VIOLATION ( "numeral length", num_length, lim . numeral_length ) 845 ); 846 } 847 848 // this is the numeric string 849 String num_str = json . subString ( start, curs . charIndex () - start ); 850 if ( ! is_float ) 851 { 852 try 853 { 854 // try to convert it to a binary integer 855 long long int num = decToLongLongInteger ( num_str ); 856 return makeInteger ( num ); 857 } 858 catch ( OverflowException & x ) 859 { 860 // too big - fall out 861 } 862 } 863 864 // keep it as a string 865 return makeParsedNumber ( num_str ); 866 } 867 parseString(const Limits & lim,const String & json,String::Iterator & curs)868 JSONValueRef JSON :: parseString ( const Limits & lim, 869 const String & json, String :: Iterator & curs ) 870 { 871 assert ( * curs == '"' ); 872 873 // accumulate text here 874 StringBuffer sb; 875 876 // mark the start of the string 877 String :: Iterator start ( ++ curs ); 878 879 // a look-ahead 880 String :: Iterator delim = curs; 881 882 // Find ending '"' or escaped characters 883 if ( ! delim . findFirstOf ( "\\\"" ) ) 884 { 885 throw MalformedJSON ( 886 XP ( XLOC ) 887 << JSON_ERROR ( "unterminated string", delim ) 888 ); 889 } 890 891 while ( 1 ) 892 { 893 // add everything before the delimiter to the new string 894 size_t proj_size = sb . size () + ( delim . byteOffset () - curs . byteOffset () ); 895 if ( proj_size > lim . string_size ) 896 { 897 throw JSONLimitViolation ( 898 XP ( XLOC ) 899 << JSON_LIMIT_VIOLATION ( "string size", proj_size, lim . string_size ) 900 ); 901 } 902 903 count_t proj_len = sb . length () + ( delim - curs ); 904 if ( proj_len > lim . string_length ) 905 { 906 throw JSONLimitViolation ( 907 XP ( XLOC ) 908 << JSON_LIMIT_VIOLATION ( "string length", proj_len, lim . string_length ) 909 ); 910 } 911 912 sb += json . subString ( curs . charIndex (), delim - curs ); 913 curs = delim; 914 915 // found end of string 916 if ( * curs != '\\' ) 917 break; 918 919 // found '\' 920 bool advance = true; 921 switch ( * ++ curs ) 922 { 923 case '"': 924 sb += '"'; 925 break; 926 case '\\': 927 sb += '\\'; 928 break; 929 case '/': 930 sb += '/'; 931 break; 932 case 'b': 933 sb += '\b'; 934 break; 935 case 'f': 936 sb += '\f'; 937 break; 938 case 'n': 939 sb += '\n'; 940 break; 941 case 'r': 942 sb += '\r'; 943 break; 944 case 't': 945 sb += '\t'; 946 break; 947 case 'u': 948 { 949 // back up to escape for regular pattern 950 -- curs; 951 String utf8 = hex_to_utf8 ( lim, curs, start ); 952 sb += utf8; 953 954 // prepare for skip ahead below 955 advance = false; 956 break; 957 } 958 959 default: 960 -- curs; 961 throw MalformedJSON ( 962 XP ( XLOC ) 963 << JSON_ERROR ( "Invalid escape character", curs ) 964 << ' ' 965 << curs 966 << "' (" 967 << ( U32 ) * curs 968 << ')' 969 ); 970 } 971 972 // skip escaped character 973 if ( advance ) 974 ++ curs; 975 976 // Find ending '"' or control characters 977 if ( ! ( delim = curs ) . findFirstOf ( "\\\"" ) ) 978 { 979 throw MalformedJSON ( 980 XP ( XLOC ) 981 << JSON_ERROR ( "unterminated string", delim ) 982 ); 983 } 984 } 985 986 // being here should mean that we had a break above 987 // on the line looking for a backslash 988 assert ( delim == curs ); 989 990 // because "esc" is identical to "curs" 991 // and because "esc" found either a backslash or quote, 992 // and because backslash kept us in the loop, we know 993 // that the current character must be a closing quote. 994 assert ( * curs == '"' ); 995 996 // set pos to point to next token 997 ++ curs; 998 999 if ( sb . size () > lim . string_size ) 1000 { 1001 throw JSONLimitViolation ( 1002 XP ( XLOC ) 1003 << JSON_LIMIT_VIOLATION ( "string size", sb . size (), lim . string_size ) 1004 ); 1005 } 1006 if ( sb . length () > lim . string_length ) 1007 { 1008 throw JSONLimitViolation ( 1009 XP ( XLOC ) 1010 << JSON_LIMIT_VIOLATION ( "string length", sb . length (), lim . string_length ) 1011 ); 1012 } 1013 1014 return makeParsedString ( sb . stealString () ); 1015 } 1016 parseArray(const Limits & lim,const String & json,String::Iterator & curs,unsigned int depth)1017 JSONArrayRef JSON :: parseArray ( const Limits & lim, const String & json, 1018 String :: Iterator & curs, unsigned int depth ) 1019 { 1020 assert ( * curs == '[' ); 1021 1022 JSONArrayRef array ( new JSONArray () ); 1023 while ( 1 ) 1024 { 1025 // skip over '[' and any whitespace 1026 // * curs is known to be '[' or ',' 1027 if ( ! skip_whitespace ( ++ curs ) ) 1028 { 1029 throw MalformedJSON ( 1030 XP ( XLOC ) 1031 << JSON_EXPECTED ( "']'", curs ) 1032 ); 1033 } 1034 1035 // allow an empty array 1036 if ( * curs == ']' ) 1037 break; 1038 1039 // use scope to invalidate value 1040 { 1041 JSONValueRef value = parse ( lim, json, curs, depth ); 1042 if ( value == nullptr ) 1043 { 1044 throw MalformedJSON ( 1045 XP ( XLOC ) 1046 << JSON_EXPECTED ( "',' or ']'", curs ) 1047 ); 1048 } 1049 1050 array -> appendValue ( value ); 1051 1052 if ( array -> count () > lim . array_elem_count ) 1053 { 1054 throw JSONLimitViolation ( 1055 XP ( XLOC ) 1056 << JSON_LIMIT_VIOLATION ( "array element count", array -> count (), lim . array_elem_count ) 1057 ); 1058 } 1059 } 1060 1061 // find and skip over ',' and skip any whitespace 1062 // exit loop if no ',' found 1063 if ( ! skip_whitespace ( curs ) || * curs != ',' ) 1064 break; 1065 } 1066 1067 // must end on ']' 1068 if ( ! curs . isValid () || * curs != ']' ) 1069 { 1070 throw MalformedJSON ( 1071 XP ( XLOC ) 1072 << JSON_EXPECTED ( "']'", curs ) 1073 ); 1074 } 1075 1076 // skip over ']' 1077 ++ curs; 1078 1079 // JSONArray must be valid 1080 assert ( array != nullptr ); 1081 return array; 1082 } 1083 parseObject(const Limits & lim,const String & json,String::Iterator & curs,unsigned int depth)1084 JSONObjectRef JSON :: parseObject ( const Limits & lim, const String & json, 1085 String :: Iterator & curs, unsigned int depth ) 1086 { 1087 test_depth ( lim, depth ); 1088 1089 assert ( * curs == '{' ); 1090 1091 JSONObjectRef obj ( new JSONObject () ); 1092 while ( 1 ) 1093 { 1094 // skip over '{' and any whitespace 1095 // json [ pos ] is '{' or ',', start at json [ pos + 1 ] 1096 if ( ! skip_whitespace ( ++ curs ) ) 1097 { 1098 throw MalformedJSON ( 1099 XP ( XLOC ) 1100 << JSON_EXPECTED ( "'}'", curs ) 1101 ); 1102 } 1103 1104 if ( * curs == '}' ) 1105 break; 1106 1107 if ( * curs != '"' ) 1108 { 1109 throw MalformedJSON ( 1110 XP ( XLOC ) 1111 << JSON_EXPECTED ( "\"<name>\"", curs ) 1112 ); 1113 } 1114 1115 JSONValueRef name = parseString ( lim, json, curs ); 1116 1117 // skip to ':' 1118 if ( ! skip_whitespace ( curs ) || * curs != ':' ) 1119 { 1120 throw MalformedJSON ( 1121 XP ( XLOC ) 1122 << JSON_EXPECTED ( "':'", curs ) 1123 ); 1124 } 1125 1126 // skip over ':' 1127 ++ curs; 1128 1129 // get JSON value; 1130 { 1131 JSONValueRef value = parse ( lim, json, curs, depth ); 1132 if ( value == nullptr ) 1133 { 1134 throw MalformedJSON ( 1135 XP ( XLOC ) 1136 << JSON_EXPECTED ( "',' or '}'", curs ) 1137 ); 1138 } 1139 1140 obj -> addValue ( name -> toString (), value ); 1141 } 1142 1143 if ( obj -> count () > lim . object_mbr_count ) 1144 { 1145 throw JSONLimitViolation ( 1146 XP ( XLOC ) 1147 << JSON_LIMIT_VIOLATION ( "object member count", obj -> count (), lim . object_mbr_count ) 1148 ); 1149 } 1150 1151 // find and skip over ',' and skip any whitespace 1152 // exit loop if no ',' found 1153 if ( ! skip_whitespace ( curs ) || * curs != ',' ) 1154 break; 1155 } 1156 1157 // must end on '}' 1158 if ( ! curs . isValid () || * curs != '}' ) 1159 { 1160 throw MalformedJSON ( 1161 XP ( XLOC ) 1162 << JSON_EXPECTED ( "'}'", curs ) 1163 ); 1164 } 1165 1166 // skip over '}' 1167 ++ curs; 1168 1169 // JSONObject must be valid 1170 assert ( obj != nullptr ); 1171 return obj; 1172 } 1173 makeParsedNumber(const String & val)1174 JSONValueRef JSON :: makeParsedNumber ( const String & val ) 1175 { 1176 // numeric string is already validated, 1177 // so just create the value and wrapper 1178 return JSONValueRef ( new JSONWrapper ( jvt_num, new JSONNumber ( val ) ) ); 1179 } 1180 makeParsedString(const String & val)1181 JSONValueRef JSON :: makeParsedString ( const String & val ) 1182 { 1183 // string is already valid/transformed 1184 // just create a value and wrapper 1185 return JSONValueRef ( new JSONWrapper ( jvt_str, new JSONString ( val ) ) ); 1186 } 1187 test_parse(const String & json,bool consume_all)1188 JSONValueRef JSON :: test_parse ( const String & json, bool consume_all ) 1189 { 1190 if ( json . isEmpty () ) 1191 throw MalformedJSON ( XP ( XLOC ) << "Empty JSON source" ); 1192 1193 initLimits (); 1194 1195 if ( json . size () > default_limits . json_string_size ) 1196 { 1197 throw JSONLimitViolation ( 1198 XP ( XLOC ) 1199 << JSON_LIMIT_VIOLATION ( "JSON source size", json . size (), default_limits . json_string_size ) 1200 ); 1201 } 1202 1203 auto curs = json . makeIterator (); 1204 JSONValueRef val = parse ( default_limits, json, curs, 0 ); 1205 1206 if ( consume_all && skip_whitespace ( curs ) ) 1207 { 1208 throw MalformedJSON ( 1209 XP ( XLOC ) 1210 << JSON_TRAILING_BYTES ( curs ) 1211 ); 1212 } 1213 1214 return val; 1215 } 1216 Limits()1217 JSON :: Limits :: Limits () 1218 : json_string_size ( 4 * 1024 * 1024 ) 1219 , recursion_depth ( 32 ) 1220 , numeral_length ( 256 ) 1221 , string_size ( 64 * 1024 ) 1222 , string_length ( 64 * 1024 ) 1223 , array_elem_count ( 4 * 1024 ) 1224 , object_mbr_count ( 256 ) 1225 { 1226 have_limits = true; 1227 } 1228 } 1229 1230 1231