1<?php 2/* 3** Zabbix 4** Copyright (C) 2001-2021 Zabbix SIA 5** 6** This program is free software; you can redistribute it and/or modify 7** it under the terms of the GNU General Public License as published by 8** the Free Software Foundation; either version 2 of the License, or 9** (at your option) any later version. 10** 11** This program is distributed in the hope that it will be useful, 12** but WITHOUT ANY WARRANTY; without even the implied warranty of 13** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14** GNU General Public License for more details. 15** 16** You should have received a copy of the GNU General Public License 17** along with this program; if not, write to the Free Software 18** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19**/ 20 21 22/** 23** Class for wrapping JSON encoding/decoding functionality. 24** 25** @ MOD from package Solar_Json <solarphp.com> 26** 27** @author Michal Migurski <mike-json@teczno.com> 28** @author Matt Knapp <mdknapp[at]gmail[dot]com> 29** @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com> 30** @author Clay Loveless <clay@killersoft.com> 31** @modified by Artem Suharev <aly@zabbix.com> 32** 33** @license http://opensource.org/licenses/bsd-license.php BSD 34**/ 35class CJson { 36 37 /** 38 * 39 * User-defined configuration, primarily of use in unit testing. 40 * 41 * Keys are ... 42 * 43 * `bypass_ext` 44 * : (bool) Flag to instruct Solar_Json to bypass 45 * native json extension, ifinstalled. 46 * 47 * `bypass_mb` 48 * : (bool) Flag to instruct Solar_Json to bypass 49 * native mb_convert_encoding() function, if 50 * installed. 51 * 52 * `noerror` 53 * : (bool) Flag to instruct Solar_Json to return null 54 * for values it cannot encode rather than throwing 55 * an exceptions (PHP-only encoding) or PHP warnings 56 * (native json_encode() function). 57 * 58 * @var array 59 * 60 */ 61 protected $_config = [ 62 'bypass_ext' => false, 63 'bypass_mb' => false, 64 'noerror' => false 65 ]; 66 67 /** 68 * 69 * Marker constants for use in _json_decode() 70 * 71 * @constant 72 * 73 */ 74 const SLICE = 1; 75 const IN_STR = 2; 76 const IN_ARR = 3; 77 const IN_OBJ = 4; 78 const IN_CMT = 5; 79 80 /** 81 * 82 * Nest level counter for determining correct behavior of decoding string 83 * representations of numbers and boolean values. 84 * 85 * @var int 86 */ 87 protected $_level; 88 89 /** 90 * Last error of $this->decode() method. 91 * 92 * @var int 93 */ 94 protected $last_error; 95 96 /** 97 * 98 * Constructor. 99 * 100 * If the $config param is an array, it is merged with the class 101 * config array and any values from the Solar.config.php file. 102 * 103 * The Solar.config.php values are inherited along class parent 104 * lines; for example, all classes descending from Solar_Base use the 105 * Solar_Base config file values until overridden. 106 * 107 * @param mixed $config User-defined configuration values. 108 * 109 */ 110 public function __construct($config = null) { 111 $this->_mapAscii(); 112 $this->_setStateTransitionTable(); 113 114 $this->last_error = JSON_ERROR_NONE; 115 } 116 117 /** 118 * 119 * Default destructor; does nothing other than provide a safe fallback 120 * for calls to parent::__destruct(). 121 * 122 * @return void 123 * 124 */ 125 public function __destruct() { 126 } 127 128 // used for fallback _json_encode 129 private static $forceObject = null; 130 131 /** 132 * 133 * Encodes the mixed $valueToEncode into JSON format. 134 * 135 * @param mixed $valueToEncode Value to be encoded into JSON format 136 * 137 * @param array $deQuote Array of keys whose values should **not** be 138 * quoted in encoded string. 139 * 140 * @param bool $forceObject force all arrays to objects 141 * 142 * @return string JSON encoded value 143 * 144 */ 145 public function encode($valueToEncode, $deQuote = [], $forceObject = false) { 146 if (!$this->_config['bypass_ext'] && function_exists('json_encode') && defined('JSON_FORCE_OBJECT')) { 147 if ($this->_config['noerror']) { 148 $old_errlevel = error_reporting(E_ERROR ^ E_WARNING); 149 } 150 151 $encoded = json_encode($valueToEncode, $forceObject ? JSON_FORCE_OBJECT : null); 152 153 if ($this->_config['noerror']) { 154 error_reporting($old_errlevel); 155 } 156 } 157 else { 158 // fall back to php-only method 159 self::$forceObject = $forceObject ? true : null; 160 $encoded = $this->_json_encode($valueToEncode); 161 } 162 163 // sometimes you just don't want some values quoted 164 if (!empty($deQuote)) { 165 $encoded = $this->_deQuote($encoded, $deQuote); 166 } 167 168 return $encoded; 169 } 170 171 /** 172 * 173 * Accepts a JSON-encoded string, and removes quotes around values of 174 * keys specified in the $keys array. 175 * 176 * Sometimes, such as when constructing behaviors on the fly for "onSuccess" 177 * handlers to an Ajax request, the value needs to **not** have quotes around 178 * it. This method will remove those quotes and perform stripslashes on any 179 * escaped quotes within the quoted value. 180 * 181 * @param string $encoded JSON-encoded string 182 * 183 * @param array $keys Array of keys whose values should be de-quoted 184 * 185 * @return string $encoded Cleaned string 186 * 187 */ 188 protected function _deQuote($encoded, $keys) { 189 foreach ($keys as $key) { 190 $encoded = preg_replace_callback("/(\"".$key."\"\:)(\".*(?:[^\\\]\"))/U", 191 [$this, '_stripvalueslashes'], $encoded); 192 } 193 return $encoded; 194 } 195 196 /** 197 * 198 * Method for use with preg_replace_callback in the _deQuote() method. 199 * 200 * Returns \["keymatch":\]\[value\] where value has had its leading and 201 * trailing double-quotes removed, and stripslashes() run on the rest of 202 * the value. 203 * 204 * @param array $matches Regexp matches 205 * 206 * @return string replacement string 207 * 208 */ 209 protected function _stripvalueslashes($matches) { 210 return $matches[1].stripslashes(substr($matches[2], 1, -1)); 211 } 212 213 /** 214 * 215 * Decodes the $encodedValue string which is encoded in the JSON format. 216 * 217 * For compatibility with the native json_decode() function, this static 218 * method accepts the $encodedValue string and an optional boolean value 219 * $asArray which indicates whether or not the decoded value should be 220 * returned as an array. The default is false, meaning the default return 221 * from this method is an object. 222 * 223 * For compliance with the [JSON specification][], no attempt is made to 224 * decode strings that are obviously not an encoded arrays or objects. 225 * 226 * [JSON specification]: http://www.ietf.org/rfc/rfc4627.txt 227 * 228 * @param string $encodedValue String encoded in JSON format 229 * 230 * @param bool $asArray Optional argument to decode as an array. 231 * Default false. 232 * 233 * @return mixed decoded value 234 * 235 */ 236 public function decode($encodedValue, $asArray = false) { 237 if (!$this->_config['bypass_ext'] && function_exists('json_decode') && function_exists('json_last_error')) { 238 $result = json_decode($encodedValue, $asArray); 239 $this->last_error = json_last_error(); 240 241 return $result; 242 } 243 244 $first_char = substr(ltrim($encodedValue), 0, 1); 245 246 if ($first_char != '{' && $first_char != '[') { 247 $result = null; 248 } 249 else { 250 ini_set('pcre.backtrack_limit', '10000000'); 251 252 $this->_level = 0; 253 254 $result = $this->isValid($encodedValue) ? $this->_json_decode($encodedValue, $asArray) : null; 255 } 256 257 $this->last_error = ($result === null) ? JSON_ERROR_SYNTAX : JSON_ERROR_NONE; 258 259 return $result; 260 } 261 262 /** 263 * Returns true if last $this->decode call was with error. 264 * 265 * @return bool 266 */ 267 public function hasError() { 268 return ($this->last_error != JSON_ERROR_NONE); 269 } 270 271 /** 272 * 273 * Encodes the mixed $valueToEncode into the JSON format, without use of 274 * native PHP json extension. 275 * 276 * @param mixed $var Any number, boolean, string, array, or object 277 * to be encoded. Strings are expected to be in ASCII or UTF-8 format. 278 * 279 * @return mixed JSON string representation of input value 280 * 281 */ 282 protected function _json_encode($var) { 283 switch (gettype($var)) { 284 case 'boolean': 285 return $var ? 'true' : 'false'; 286 case 'NULL': 287 return 'null'; 288 case 'integer': 289 // BREAK WITH Services_JSON: 290 // disabled for compatibility with ext/json. ext/json returns 291 // a string for integers, so we will to. 292 return (string) $var; 293 case 'double': 294 case 'float': 295 // BREAK WITH Services_JSON: 296 // disabled for compatibility with ext/json. ext/json returns 297 // a string for floats and doubles, so we will to. 298 return (string) $var; 299 case 'string': 300 // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT 301 $ascii = ''; 302 $strlen_var = strlen($var); 303 304 /* 305 * Iterate over every character in the string, 306 * escaping with a slash or encoding to UTF-8 where necessary 307 */ 308 for ($c = 0; $c < $strlen_var; ++$c) { 309 $ord_var_c = ord($var{$c}); 310 switch (true) { 311 case $ord_var_c == 0x08: 312 $ascii .= '\b'; 313 break; 314 case $ord_var_c == 0x09: 315 $ascii .= '\t'; 316 break; 317 case $ord_var_c == 0x0A: 318 $ascii .= '\n'; 319 break; 320 case $ord_var_c == 0x0C: 321 $ascii .= '\f'; 322 break; 323 case $ord_var_c == 0x0D: 324 $ascii .= '\r'; 325 break; 326 case $ord_var_c == 0x22: 327 case $ord_var_c == 0x2F: 328 case $ord_var_c == 0x5C: 329 // double quote, slash, slosh 330 $ascii .= '\\'.$var{$c}; 331 break; 332 case ($ord_var_c >= 0x20 && $ord_var_c <= 0x7F): 333 // characters U-00000000 - U-0000007F (same as ASCII) 334 $ascii .= $var{$c}; 335 break; 336 case (($ord_var_c & 0xE0) == 0xC0): 337 // characters U-00000080 - U-000007FF, mask 110XXXXX 338 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 339 $char = pack('C*', $ord_var_c, ord($var{$c + 1})); 340 $c += 1; 341 $utf16 = $this->_utf82utf16($char); 342 $ascii .= sprintf('\u%04s', bin2hex($utf16)); 343 break; 344 case (($ord_var_c & 0xF0) == 0xE0): 345 // characters U-00000800 - U-0000FFFF, mask 1110XXXX 346 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 347 $char = pack('C*', $ord_var_c, ord($var{$c + 1}), ord($var{$c + 2})); 348 $c += 2; 349 $utf16 = $this->_utf82utf16($char); 350 $ascii .= sprintf('\u%04s', bin2hex($utf16)); 351 break; 352 case (($ord_var_c & 0xF8) == 0xF0): 353 // characters U-00010000 - U-001FFFFF, mask 11110XXX 354 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 355 $char = pack('C*', $ord_var_c, ord($var{$c + 1}), ord($var{$c + 2}), ord($var{$c + 3})); 356 $c += 3; 357 $utf16 = $this->_utf82utf16($char); 358 $ascii .= sprintf('\u%04s', bin2hex($utf16)); 359 break; 360 case (($ord_var_c & 0xFC) == 0xF8): 361 // characters U-00200000 - U-03FFFFFF, mask 111110XX 362 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 363 $char = pack('C*', $ord_var_c, 364 ord($var{$c + 1}), 365 ord($var{$c + 2}), 366 ord($var{$c + 3}), 367 ord($var{$c + 4})); 368 $c += 4; 369 $utf16 = $this->_utf82utf16($char); 370 $ascii .= sprintf('\u%04s', bin2hex($utf16)); 371 break; 372 case (($ord_var_c & 0xFE) == 0xFC): 373 // characters U-04000000 - U-7FFFFFFF, mask 1111110X 374 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 375 $char = pack('C*', $ord_var_c, 376 ord($var{$c + 1}), 377 ord($var{$c + 2}), 378 ord($var{$c + 3}), 379 ord($var{$c + 4}), 380 ord($var{$c + 5})); 381 $c += 5; 382 $utf16 = $this->_utf82utf16($char); 383 $ascii .= sprintf('\u%04s', bin2hex($utf16)); 384 break; 385 } 386 } 387 return '"'.$ascii.'"'; 388 case 'array': 389 /* 390 * As per JSON spec if any array key is not an integer 391 * we must treat the whole array as an object. We 392 * also try to catch a sparsely populated associative 393 * array with numeric keys here because some JS engines 394 * will create an array with empty indexes up to 395 * max_index which can cause memory issues and because 396 * the keys, which may be relevant, will be remapped 397 * otherwise. 398 * 399 * As per the ECMA and JSON specification an object may 400 * have any string as a property. Unfortunately due to 401 * a hole in the ECMA specification if the key is an 402 * ECMA reserved word or starts with a digit the 403 * parameter is only accessible using ECMAScript's 404 * bracket notation. 405 */ 406 407 // treat as a JSON object 408 if (self::$forceObject || is_array($var) && count($var) && array_keys($var) !== range(0, sizeof($var) - 1)) { 409 $properties = array_map([$this, '_name_value'], array_keys($var), array_values($var)); 410 return '{' . join(',', $properties) . '}'; 411 } 412 413 // treat it like a regular array 414 $elements = array_map([$this, '_json_encode'], $var); 415 return '[' . join(',', $elements) . ']'; 416 case 'object': 417 $vars = get_object_vars($var); 418 $properties = array_map([$this, '_name_value'], array_keys($vars), array_values($vars)); 419 return '{' . join(',', $properties) . '}'; 420 default: 421 if ($this->_config['noerror']) { 422 return 'null'; 423 } 424 throw Solar::exception( 425 'Solar_Json', 426 'ERR_CANNOT_ENCODE', 427 gettype($var).' cannot be encoded as a JSON string', 428 ['var' => $var] 429 ); 430 } 431 } 432 433 /** 434 * Decodes a JSON string into appropriate variable. 435 * 436 * Note: several changes were made in translating this method from 437 * Services_JSON, particularly related to how strings are handled. According 438 * to JSON_checker test suite from <http://www.json.org/JSON_checker/>, 439 * a JSON payload should be an object or an array, not a string. 440 * 441 * Therefore, returning bool(true) for 'true' is invalid JSON decoding 442 * behavior, unless nested inside of an array or object. 443 * 444 * Similarly, a string of '1' should return null, not int(1), unless 445 * nested inside of an array or object. 446 * 447 * @param string $str String encoded in JSON format 448 * @param bool $asArray Optional argument to decode as an array. 449 * @return mixed decoded value 450 * @todo Rewrite this based off of method used in Solar_Json_Checker 451 */ 452 protected function _json_decode($str, $asArray = false) { 453 $str = $this->_reduce_string($str); 454 455 switch (strtolower($str)) { 456 case 'true': 457 // JSON_checker test suite claims 458 // "A JSON payload should be an object or array, not a string." 459 // Thus, returning bool(true) is invalid parsing, unless 460 // we're nested inside an array or object. 461 if (in_array($this->_level, [self::IN_ARR, self::IN_OBJ])) { 462 return true; 463 } 464 else { 465 return null; 466 } 467 break; 468 case 'false': 469 // JSON_checker test suite claims 470 // "A JSON payload should be an object or array, not a string." 471 // Thus, returning bool(false) is invalid parsing, unless 472 // we're nested inside an array or object. 473 if (in_array($this->_level, [self::IN_ARR, self::IN_OBJ])) { 474 return false; 475 } 476 else { 477 return null; 478 } 479 break; 480 case 'null': 481 return null; 482 default: 483 $m = []; 484 485 if (is_numeric($str) || ctype_digit($str) || ctype_xdigit($str)) { 486 // return float or int, or null as appropriate 487 if (in_array($this->_level, [self::IN_ARR, self::IN_OBJ])) { 488 return ((float) $str == (integer) $str) ? (integer) $str : (float) $str; 489 } 490 else { 491 return null; 492 } 493 break; 494 } 495 elseif (preg_match('/^("|\').*(\1)$/s', $str, $m) && $m[1] == $m[2]) { 496 // strings returned in UTF-8 format 497 $delim = substr($str, 0, 1); 498 $chrs = substr($str, 1, -1); 499 $utf8 = ''; 500 $strlen_chrs = strlen($chrs); 501 for ($c = 0; $c < $strlen_chrs; ++$c) { 502 $substr_chrs_c_2 = substr($chrs, $c, 2); 503 $ord_chrs_c = ord($chrs{$c}); 504 switch (true) { 505 case $substr_chrs_c_2 == '\b': 506 $utf8 .= chr(0x08); 507 ++$c; 508 break; 509 case $substr_chrs_c_2 == '\t': 510 $utf8 .= chr(0x09); 511 ++$c; 512 break; 513 case $substr_chrs_c_2 == '\n': 514 $utf8 .= chr(0x0A); 515 ++$c; 516 break; 517 case $substr_chrs_c_2 == '\f': 518 $utf8 .= chr(0x0C); 519 ++$c; 520 break; 521 case $substr_chrs_c_2 == '\r': 522 $utf8 .= chr(0x0D); 523 ++$c; 524 break; 525 case $substr_chrs_c_2 == '\\"': 526 case $substr_chrs_c_2 == '\\\'': 527 case $substr_chrs_c_2 == '\\\\': 528 case $substr_chrs_c_2 == '\\/': 529 if ($delim == '"' && $substr_chrs_c_2 != '\\\'' || $delim == "'" 530 && $substr_chrs_c_2 != '\\"') { 531 $utf8 .= $chrs{++$c}; 532 } 533 break; 534 case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)): 535 // single, escaped unicode character 536 $utf16 = chr(hexdec(substr($chrs, $c + 2, 2))).chr(hexdec(substr($chrs, $c + 4, 2))); 537 $utf8 .= $this->_utf162utf8($utf16); 538 $c += 5; 539 break; 540 case $ord_chrs_c >= 0x20 && $ord_chrs_c <= 0x7F: 541 $utf8 .= $chrs{$c}; 542 break; 543 case ($ord_chrs_c & 0xE0) == 0xC0: 544 // characters U-00000080 - U-000007FF, mask 110XXXXX 545 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 546 $utf8 .= substr($chrs, $c, 2); 547 ++$c; 548 break; 549 case ($ord_chrs_c & 0xF0) == 0xE0: 550 // characters U-00000800 - U-0000FFFF, mask 1110XXXX 551 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 552 $utf8 .= substr($chrs, $c, 3); 553 $c += 2; 554 break; 555 case ($ord_chrs_c & 0xF8) == 0xF0: 556 // characters U-00010000 - U-001FFFFF, mask 11110XXX 557 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 558 $utf8 .= substr($chrs, $c, 4); 559 $c += 3; 560 break; 561 case ($ord_chrs_c & 0xFC) == 0xF8: 562 // characters U-00200000 - U-03FFFFFF, mask 111110XX 563 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 564 $utf8 .= substr($chrs, $c, 5); 565 $c += 4; 566 break; 567 case ($ord_chrs_c & 0xFE) == 0xFC: 568 // characters U-04000000 - U-7FFFFFFF, mask 1111110X 569 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 570 $utf8 .= substr($chrs, $c, 6); 571 $c += 5; 572 break; 573 } 574 } 575 576 if (in_array($this->_level, [self::IN_ARR, self::IN_OBJ])) { 577 return $utf8; 578 } 579 else { 580 return null; 581 } 582 } 583 elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) { 584 // array, or object notation 585 if ($str{0} == '[') { 586 $stk = [self::IN_ARR]; 587 $this->_level = self::IN_ARR; 588 $arr = []; 589 } 590 else { 591 if ($asArray) { 592 $stk = [self::IN_OBJ]; 593 $obj = []; 594 } 595 else { 596 $stk = [self::IN_OBJ]; 597 $obj = new stdClass(); 598 } 599 $this->_level = self::IN_OBJ; 600 } 601 array_push($stk, ['what' => self::SLICE, 'where' => 0, 'delim' => false]); 602 603 $chrs = substr($str, 1, -1); 604 $chrs = $this->_reduce_string($chrs); 605 606 if ($chrs == '') { 607 if (reset($stk) == self::IN_ARR) { 608 return $arr; 609 } 610 else { 611 return $obj; 612 } 613 } 614 615 $strlen_chrs = strlen($chrs); 616 for ($c = 0; $c <= $strlen_chrs; ++$c) { 617 $top = end($stk); 618 $substr_chrs_c_2 = substr($chrs, $c, 2); 619 620 if ($c == $strlen_chrs || ($chrs{$c} == ',' && $top['what'] == self::SLICE)) { 621 // found a comma that is not inside a string, array, etc., 622 // OR we've reached the end of the character list 623 $slice = substr($chrs, $top['where'], $c - $top['where']); 624 array_push($stk, ['what' => self::SLICE, 'where' => $c + 1, 'delim' => false]); 625 626 if (reset($stk) == self::IN_ARR) { 627 $this->_level = self::IN_ARR; 628 // we are in an array, so just push an element onto the stack 629 array_push($arr, $this->_json_decode($slice, $asArray)); 630 } 631 elseif (reset($stk) == self::IN_OBJ) { 632 $this->_level = self::IN_OBJ; 633 // we are in an object, so figure 634 // out the property name and set an 635 // element in an associative array, 636 // for now 637 $parts = []; 638 639 if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) { 640 // "name":value pair 641 $key = $this->_json_decode($parts[1], $asArray); 642 $val = $this->_json_decode($parts[2], $asArray); 643 644 if ($asArray) { 645 $obj[$key] = $val; 646 } 647 else { 648 $obj->$key = $val; 649 } 650 } 651 elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) { 652 // name:value pair, where name is unquoted 653 $key = $parts[1]; 654 $val = $this->_json_decode($parts[2], $asArray); 655 656 if ($asArray) { 657 $obj[$key] = $val; 658 } 659 else { 660 $obj->$key = $val; 661 } 662 } 663 elseif (preg_match('/^\s*(["\']["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) { 664 // "":value pair 665 //$key = $this->_json_decode($parts[1]); 666 // use string that matches ext/json 667 $key = '_empty_'; 668 $val = $this->_json_decode($parts[2], $asArray); 669 670 if ($asArray) { 671 $obj[$key] = $val; 672 } 673 else { 674 $obj->$key = $val; 675 } 676 } 677 } 678 } 679 elseif (($chrs{$c} == '"' || $chrs{$c} == "'") && $top['what'] != self::IN_STR) { 680 // found a quote, and we are not inside a string 681 array_push($stk, ['what' => self::IN_STR, 'where' => $c, 'delim' => $chrs{$c}]); 682 } 683 elseif (((strlen(substr($chrs, 0, $c)) - strlen(rtrim(substr($chrs, 0, $c), '\\'))) % 2 != 1) 684 && $chrs{$c} == $top['delim'] && $top['what'] == self::IN_STR) { 685 // found a quote, we're in a string, and it's not escaped 686 // we know that it's not escaped because there is _not_ an 687 // odd number of backslashes at the end of the string so far 688 array_pop($stk); 689 } 690 elseif ($chrs{$c} == '[' 691 && in_array($top['what'], [self::SLICE, self::IN_ARR, self::IN_OBJ])) { 692 // found a left-bracket, and we are in an array, object, or slice 693 array_push($stk, ['what' => self::IN_ARR, 'where' => $c, 'delim' => false]); 694 } 695 elseif ($chrs{$c} == ']' && $top['what'] == self::IN_ARR) { 696 // found a right-bracket, and we're in an array 697 $this->_level = null; 698 array_pop($stk); 699 } 700 elseif ($chrs{$c} == '{' 701 && in_array($top['what'], [self::SLICE, self::IN_ARR, self::IN_OBJ])) { 702 // found a left-brace, and we are in an array, object, or slice 703 array_push($stk, ['what' => self::IN_OBJ, 'where' => $c, 'delim' => false]); 704 } 705 elseif ($chrs{$c} == '}' && $top['what'] == self::IN_OBJ) { 706 // found a right-brace, and we're in an object 707 $this->_level = null; 708 array_pop($stk); 709 } 710 elseif ($substr_chrs_c_2 == '/*' 711 && in_array($top['what'], [self::SLICE, self::IN_ARR, self::IN_OBJ])) { 712 // found a comment start, and we are in an array, object, or slice 713 array_push($stk, ['what' => self::IN_CMT, 'where' => $c, 'delim' => false]); 714 $c++; 715 } 716 elseif ($substr_chrs_c_2 == '*/' && ($top['what'] == self::IN_CMT)) { 717 // found a comment end, and we're in one now 718 array_pop($stk); 719 $c++; 720 for ($i = $top['where']; $i <= $c; ++$i) { 721 $chrs = substr_replace($chrs, ' ', $i, 1); 722 } 723 } 724 } 725 726 if (reset($stk) == self::IN_ARR) { 727 return $arr; 728 } 729 elseif (reset($stk) == self::IN_OBJ) { 730 return $obj; 731 } 732 } 733 } 734 } 735 736 /** 737 * Array-walking method for use in generating JSON-formatted name-value 738 * pairs in the form of '"name":value'. 739 * 740 * @param string $name name of key to use 741 * @param mixed $value element to be encoded 742 * @return string JSON-formatted name-value pair 743 */ 744 protected function _name_value($name, $value) { 745 $encoded_value = $this->_json_encode($value); 746 return $this->_json_encode(strval($name)) . ':' . $encoded_value; 747 } 748 749 /** 750 * Convert a string from one UTF-16 char to one UTF-8 char. 751 * 752 * Normally should be handled by mb_convert_encoding, but 753 * provides a slower PHP-only method for installations 754 * that lack the multibye string extension. 755 * 756 * @param string $utf16 UTF-16 character 757 * @return string UTF-8 character 758 */ 759 protected function _utf162utf8($utf16) { 760 // oh please oh please oh please oh please oh please 761 if (!$this->_config['bypass_mb'] && function_exists('mb_convert_encoding')) { 762 return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16'); 763 } 764 $bytes = (ord($utf16{0}) << 8) | ord($utf16{1}); 765 766 switch (true) { 767 case ((0x7F & $bytes) == $bytes): 768 // this case should never be reached, because we are in ASCII range 769 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 770 return chr(0x7F & $bytes); 771 case (0x07FF & $bytes) == $bytes: 772 // return a 2-byte UTF-8 character 773 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 774 return chr(0xC0 | (($bytes >> 6) & 0x1F)).chr(0x80 | ($bytes & 0x3F)); 775 case (0xFFFF & $bytes) == $bytes: 776 // return a 3-byte UTF-8 character 777 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 778 return chr(0xE0 | (($bytes >> 12) & 0x0F)).chr(0x80 | (($bytes >> 6) & 0x3F)).chr(0x80 | ($bytes & 0x3F)); 779 } 780 // ignoring UTF-32 for now, sorry 781 return ''; 782 } 783 784 /** 785 * Convert a string from one UTF-8 char to one UTF-16 char. 786 * 787 * Normally should be handled by mb_convert_encoding, but 788 * provides a slower PHP-only method for installations 789 * that lack the multibye string extension. 790 * 791 * @param string $utf8 UTF-8 character 792 * @return string UTF-16 character 793 */ 794 protected function _utf82utf16($utf8) { 795 // oh please oh please oh please oh please oh please 796 if (!$this->_config['bypass_mb'] && function_exists('mb_convert_encoding')) { 797 return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8'); 798 } 799 800 switch (strlen($utf8)) { 801 case 1: 802 // this case should never be reached, because we are in ASCII range 803 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 804 return $utf8; 805 case 2: 806 // return a UTF-16 character from a 2-byte UTF-8 char 807 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 808 return chr(0x07 & (ord($utf8{0}) >> 2)).chr((0xC0 & (ord($utf8{0}) << 6)) | (0x3F & ord($utf8{1}))); 809 case 3: 810 // return a UTF-16 character from a 3-byte UTF-8 char 811 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 812 return chr((0xF0 & (ord($utf8{0}) << 4)) | (0x0F & (ord($utf8{1}) >> 2))). 813 chr((0xC0 & (ord($utf8{1}) << 6)) | (0x7F & ord($utf8{2}))); 814 } 815 // ignoring UTF-32 for now, sorry 816 return ''; 817 } 818 819 /** 820 * Reduce a string by removing leading and trailing comments and whitespace. 821 * 822 * @param string $str string value to strip of comments and whitespace 823 * @return string string value stripped of comments and whitespace 824 */ 825 protected function _reduce_string($str) { 826 $str = preg_replace([ 827 // eliminate single line comments in '// ...' form 828 '#^\s*//(.+)$#m', 829 830 // eliminate multi-line comments in '/* ... */' form, at start of string 831 '#^\s*/\*(.+)\*/#Us', 832 833 // eliminate multi-line comments in '/* ... */' form, at end of string 834 '#/\*(.+)\*/\s*$#Us' 835 836 ], '', $str); 837 // eliminate extraneous space 838 return trim($str); 839 } 840 841 //*************************************************************************** 842 // CHECK JSON * 843 //*************************************************************************** 844 const S_ERR = -1; // Error 845 const S_SPA = 0; // Space 846 const S_WSP = 1; // Other whitespace 847 const S_LBE = 2; // { 848 const S_RBE = 3; // } 849 const S_LBT = 4; // [ 850 const S_RBT = 5; // ] 851 const S_COL = 6; // : 852 const S_COM = 7; // , 853 const S_QUO = 8; // " 854 const S_BAC = 9; // \ 855 const S_SLA = 10; // / 856 const S_PLU = 11; // + 857 const S_MIN = 12; // - 858 const S_DOT = 13; // . 859 const S_ZER = 14; // 0 860 const S_DIG = 15; // 123456789 861 const S__A_ = 16; // a 862 const S__B_ = 17; // b 863 const S__C_ = 18; // c 864 const S__D_ = 19; // d 865 const S__E_ = 20; // e 866 const S__F_ = 21; // f 867 const S__L_ = 22; // l 868 const S__N_ = 23; // n 869 const S__R_ = 24; // r 870 const S__S_ = 25; // s 871 const S__T_ = 26; // t 872 const S__U_ = 27; // u 873 const S_A_F = 28; // ABCDF 874 const S_E = 29; // E 875 const S_ETC = 30; // Everything else 876 877 /** 878 * Map of 128 ASCII characters into the 32 character classes. 879 * The remaining Unicode characters should be mapped to S_ETC. 880 * 881 * @var array 882 */ 883 protected $_ascii_class = []; 884 885 /** 886 * State transition table. 887 * @var array 888 */ 889 protected $_state_transition_table = []; 890 891 /** 892 * These modes can be pushed on the "pushdown automata" (PDA) stack. 893 * @constant 894 */ 895 const MODE_DONE = 1; 896 const MODE_KEY = 2; 897 const MODE_OBJECT = 3; 898 const MODE_ARRAY = 4; 899 900 /** 901 * Max depth allowed for nested structures. 902 * @constant 903 */ 904 const MAX_DEPTH = 20; 905 906 /** 907 * The stack to maintain the state of nested structures. 908 * @var array 909 */ 910 protected $_the_stack = []; 911 912 /** 913 * Pointer for the top of the stack. 914 * @var int 915 */ 916 protected $_the_top; 917 918 /** 919 * The isValid method takes a UTF-16 encoded string and determines if it is 920 * a syntactically correct JSON text. 921 * 922 * It is implemented as a Pushdown Automaton; that means it is a finite 923 * state machine with a stack. 924 * 925 * @param string $str The JSON text to validate 926 * @return bool 927 */ 928 public function isValid($str) { 929 $len = strlen($str); 930 $_the_state = 0; 931 $this->_the_top = -1; 932 $this->_push(self::MODE_DONE); 933 934 for ($_the_index = 0; $_the_index < $len; $_the_index++) { 935 $b = $str{$_the_index}; 936 if (chr(ord($b) & 127) == $b) { 937 $c = $this->_ascii_class[ord($b)]; 938 if ($c <= self::S_ERR) { 939 return false; 940 } 941 } 942 else { 943 $c = self::S_ETC; 944 } 945 946 // get the next state from the transition table 947 $s = $this->_state_transition_table[$_the_state][$c]; 948 949 if ($s < 0) { 950 // perform one of the predefined actions 951 switch ($s) { 952 // empty } 953 case -9: 954 if (!$this->_pop(self::MODE_KEY)) { 955 return false; 956 } 957 $_the_state = 9; 958 break; 959 // { 960 case -8: 961 if (!$this->_push(self::MODE_KEY)) { 962 return false; 963 } 964 $_the_state = 1; 965 break; 966 // } 967 case -7: 968 if (!$this->_pop(self::MODE_OBJECT)) { 969 return false; 970 } 971 $_the_state = 9; 972 break; 973 // [ 974 case -6: 975 if (!$this->_push(self::MODE_ARRAY)) { 976 return false; 977 } 978 $_the_state = 2; 979 break; 980 // ] 981 case -5: 982 if (!$this->_pop(self::MODE_ARRAY)) { 983 return false; 984 } 985 $_the_state = 9; 986 break; 987 // " 988 case -4: 989 switch ($this->_the_stack[$this->_the_top]) { 990 case self::MODE_KEY: 991 $_the_state = 27; 992 break; 993 case self::MODE_ARRAY: 994 case self::MODE_OBJECT: 995 $_the_state = 9; 996 break; 997 default: 998 return false; 999 } 1000 break; 1001 // ' 1002 case -3: 1003 switch ($this->_the_stack[$this->_the_top]) { 1004 case self::MODE_OBJECT: 1005 if ($this->_pop(self::MODE_OBJECT) && $this->_push(self::MODE_KEY)) { 1006 $_the_state = 29; 1007 } 1008 break; 1009 case self::MODE_ARRAY: 1010 $_the_state = 28; 1011 break; 1012 default: 1013 return false; 1014 } 1015 break; 1016 // : 1017 case -2: 1018 if ($this->_pop(self::MODE_KEY) && $this->_push(self::MODE_OBJECT)) { 1019 $_the_state = 28; 1020 break; 1021 } 1022 // syntax error 1023 case -1: 1024 return false; 1025 } 1026 } 1027 else { 1028 // change the state and iterate 1029 $_the_state = $s; 1030 } 1031 } 1032 if ($_the_state == 9 && $this->_pop(self::MODE_DONE)) { 1033 return true; 1034 } 1035 return false; 1036 } 1037 1038 /** 1039 * Map the 128 ASCII characters into the 32 character classes. 1040 * The remaining Unicode characters should be mapped to S_ETC. 1041 * 1042 * @return void 1043 */ 1044 protected function _mapAscii() { 1045 $this->_ascii_class = [ 1046 self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, 1047 self::S_ERR, self::S_WSP, self::S_WSP, self::S_ERR, self::S_ERR, self::S_WSP, self::S_ERR, self::S_ERR, 1048 self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, 1049 self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, self::S_ERR, 1050 1051 self::S_SPA, self::S_ETC, self::S_QUO, self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, 1052 self::S_ETC, self::S_ETC, self::S_ETC, self::S_PLU, self::S_COM, self::S_MIN, self::S_DOT, self::S_SLA, 1053 self::S_ZER, self::S_DIG, self::S_DIG, self::S_DIG, self::S_DIG, self::S_DIG, self::S_DIG, self::S_DIG, 1054 self::S_DIG, self::S_DIG, self::S_COL, self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, 1055 1056 self::S_ETC, self::S_A_F, self::S_A_F, self::S_A_F, self::S_A_F, self::S_E , self::S_A_F, self::S_ETC, 1057 self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, 1058 self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, 1059 self::S_ETC, self::S_ETC, self::S_ETC, self::S_LBT, self::S_BAC, self::S_RBT, self::S_ETC, self::S_ETC, 1060 1061 self::S_ETC, self::S__A_, self::S__B_, self::S__C_, self::S__D_, self::S__E_, self::S__F_, self::S_ETC, 1062 self::S_ETC, self::S_ETC, self::S_ETC, self::S_ETC, self::S__L_, self::S_ETC, self::S__N_, self::S_ETC, 1063 self::S_ETC, self::S_ETC, self::S__R_, self::S__S_, self::S__T_, self::S__U_, self::S_ETC, self::S_ETC, 1064 self::S_ETC, self::S_ETC, self::S_ETC, self::S_LBE, self::S_ETC, self::S_RBE, self::S_ETC, self::S_ETC 1065 ]; 1066 } 1067 1068 /** 1069 * The state transition table takes the current state and the current symbol, 1070 * and returns either a new state or an action. A new state is a number between 1071 * 0 and 29. An action is a negative number between -1 and -9. A JSON text is 1072 * accepted if the end of the text is in state 9 and mode is MODE_DONE. 1073 * 1074 * @return void; 1075 */ 1076 protected function _setStateTransitionTable() { 1077 $this->_state_transition_table = [ 1078 [ 0, 0,-8,-1,-6,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], 1079 [ 1, 1,-1,-9,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], 1080 [ 2, 2,-8,-1,-6,-5,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1], 1081 [ 3,-1, 3, 3, 3, 3, 3, 3,-4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], 1082 [-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1, 3,-1, 3, 3,-1, 3, 5,-1,-1,-1], 1083 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 6, 6, 6, 6, 6, 6, 6, 6,-1,-1,-1,-1,-1,-1, 6, 6,-1], 1084 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 7, 7, 7, 7, 7, 7, 7, 7,-1,-1,-1,-1,-1,-1, 7, 7,-1], 1085 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 8, 8, 8, 8, 8, 8, 8, 8,-1,-1,-1,-1,-1,-1, 8, 8,-1], 1086 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3, 3, 3, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3, 3,-1], 1087 [ 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], 1088 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,11,-1,-1,-1,-1,-1,-1], 1089 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,12,-1,-1,-1], 1090 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], 1091 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,14,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], 1092 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,15,-1,-1,-1,-1,-1,-1,-1,-1], 1093 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,16,-1,-1,-1,-1,-1], 1094 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], 1095 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,18,-1,-1,-1], 1096 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,19,-1,-1,-1,-1,-1,-1,-1,-1], 1097 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1], 1098 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,21,22,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], 1099 [ 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], 1100 [ 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,22,22,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1], 1101 [ 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,23,23,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1], 1102 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,25,25,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], 1103 [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], 1104 [ 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], 1105 [27,27,-1,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], 1106 [28,28,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1], 1107 [29,29,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1] 1108 ]; 1109 } 1110 1111 /** 1112 * Push a mode onto the stack. Return false if there is overflow. 1113 * 1114 * @param int $mode Mode to push onto the stack 1115 * @return bool Success/failure of stack push 1116 */ 1117 protected function _push($mode) { 1118 $this->_the_top++; 1119 if ($this->_the_top >= self::MAX_DEPTH) { 1120 return false; 1121 } 1122 $this->_the_stack[$this->_the_top] = $mode; 1123 return true; 1124 } 1125 1126 /** 1127 * Pop the stack, assuring that the current mode matches the expectation. 1128 * Return false if there is underflow or if the modes mismatch. 1129 * 1130 * @param int $mode Mode to pop from the stack 1131 * @return bool Success/failure of stack pop 1132 */ 1133 protected function _pop($mode) { 1134 if ($this->_the_top < 0 || $this->_the_stack[$this->_the_top] != $mode) { 1135 return false; 1136 } 1137 $this->_the_stack[$this->_the_top] = 0; 1138 $this->_the_top--; 1139 return true; 1140 } 1141} 1142