1<?php 2/** 3 * Zend Framework (http://framework.zend.com/) 4 * 5 * @link http://github.com/zendframework/zf2 for the canonical source repository 6 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 7 * @license http://framework.zend.com/license/new-bsd New BSD License 8 */ 9 10namespace Zend\Json; 11 12use stdClass; 13use Zend\Json\Exception\InvalidArgumentException; 14use Zend\Json\Exception\RuntimeException; 15 16/** 17 * Decode JSON encoded string to PHP variable constructs 18 */ 19class Decoder 20{ 21 /** 22 * Parse tokens used to decode the JSON object. These are not 23 * for public consumption, they are just used internally to the 24 * class. 25 */ 26 const EOF = 0; 27 const DATUM = 1; 28 const LBRACE = 2; 29 const LBRACKET = 3; 30 const RBRACE = 4; 31 const RBRACKET = 5; 32 const COMMA = 6; 33 const COLON = 7; 34 35 /** 36 * Use to maintain a "pointer" to the source being decoded 37 * 38 * @var string 39 */ 40 protected $source; 41 42 /** 43 * Caches the source length 44 * 45 * @var int 46 */ 47 protected $sourceLength; 48 49 /** 50 * The offset within the source being decoded 51 * 52 * @var int 53 * 54 */ 55 protected $offset; 56 57 /** 58 * The current token being considered in the parser cycle 59 * 60 * @var int 61 */ 62 protected $token; 63 64 /** 65 * Flag indicating how objects should be decoded 66 * 67 * @var int 68 * @access protected 69 */ 70 protected $decodeType; 71 72 /** 73 * @var $_tokenValue 74 */ 75 protected $tokenValue; 76 77 /** 78 * Decode Unicode Characters from \u0000 ASCII syntax. 79 * 80 * This algorithm was originally developed for the 81 * Solar Framework by Paul M. Jones 82 * 83 * @link http://solarphp.com/ 84 * @link https://github.com/solarphp/core/blob/master/Solar/Json.php 85 * @param string $chrs 86 * @return string 87 */ 88 public static function decodeUnicodeString($chrs) 89 { 90 $chrs = (string) $chrs; 91 $utf8 = ''; 92 $strlenChrs = strlen($chrs); 93 94 for ($i = 0; $i < $strlenChrs; $i++) { 95 $ordChrsC = ord($chrs[$i]); 96 97 switch (true) { 98 case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $i, 6)): 99 // single, escaped unicode character 100 $utf16 = chr(hexdec(substr($chrs, ($i + 2), 2))) 101 . chr(hexdec(substr($chrs, ($i + 4), 2))); 102 $utf8char = self::_utf162utf8($utf16); 103 $search = array('\\', "\n", "\t", "\r", chr(0x08), chr(0x0C), '"', '\'', '/'); 104 if (in_array($utf8char, $search)) { 105 $replace = array('\\\\', '\\n', '\\t', '\\r', '\\b', '\\f', '\\"', '\\\'', '\\/'); 106 $utf8char = str_replace($search, $replace, $utf8char); 107 } 108 $utf8 .= $utf8char; 109 $i += 5; 110 break; 111 case ($ordChrsC >= 0x20) && ($ordChrsC <= 0x7F): 112 $utf8 .= $chrs{$i}; 113 break; 114 case ($ordChrsC & 0xE0) == 0xC0: 115 // characters U-00000080 - U-000007FF, mask 110XXXXX 116 //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 117 $utf8 .= substr($chrs, $i, 2); 118 ++$i; 119 break; 120 case ($ordChrsC & 0xF0) == 0xE0: 121 // characters U-00000800 - U-0000FFFF, mask 1110XXXX 122 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 123 $utf8 .= substr($chrs, $i, 3); 124 $i += 2; 125 break; 126 case ($ordChrsC & 0xF8) == 0xF0: 127 // characters U-00010000 - U-001FFFFF, mask 11110XXX 128 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 129 $utf8 .= substr($chrs, $i, 4); 130 $i += 3; 131 break; 132 case ($ordChrsC & 0xFC) == 0xF8: 133 // characters U-00200000 - U-03FFFFFF, mask 111110XX 134 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 135 $utf8 .= substr($chrs, $i, 5); 136 $i += 4; 137 break; 138 case ($ordChrsC & 0xFE) == 0xFC: 139 // characters U-04000000 - U-7FFFFFFF, mask 1111110X 140 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 141 $utf8 .= substr($chrs, $i, 6); 142 $i += 5; 143 break; 144 } 145 } 146 147 return $utf8; 148 } 149 150 /** 151 * Constructor 152 * 153 * @param string $source String source to decode 154 * @param int $decodeType How objects should be decoded -- see 155 * {@link Zend\Json\Json::TYPE_ARRAY} and {@link Zend\Json\Json::TYPE_OBJECT} for 156 * valid values 157 * @throws InvalidArgumentException 158 */ 159 protected function __construct($source, $decodeType) 160 { 161 // Set defaults 162 $this->source = self::decodeUnicodeString($source); 163 $this->sourceLength = strlen($this->source); 164 $this->token = self::EOF; 165 $this->offset = 0; 166 167 switch ($decodeType) { 168 case Json::TYPE_ARRAY: 169 case Json::TYPE_OBJECT: 170 $this->decodeType = $decodeType; 171 break; 172 default: 173 throw new InvalidArgumentException("Unknown decode type '{$decodeType}', please use one of the constants Json::TYPE_*"); 174 } 175 176 // Set pointer at first token 177 $this->_getNextToken(); 178 } 179 180 /** 181 * Decode a JSON source string 182 * 183 * Decodes a JSON encoded string. The value returned will be one of the 184 * following: 185 * - integer 186 * - float 187 * - boolean 188 * - null 189 * - stdClass 190 * - array 191 * - array of one or more of the above types 192 * 193 * By default, decoded objects will be returned as associative arrays; to 194 * return a stdClass object instead, pass {@link Zend\Json\Json::TYPE_OBJECT} to 195 * the $objectDecodeType parameter. 196 * 197 * @static 198 * @access public 199 * @param string $source String to be decoded 200 * @param int $objectDecodeType How objects should be decoded; should be 201 * either or {@link Zend\Json\Json::TYPE_ARRAY} or 202 * {@link Zend\Json\Json::TYPE_OBJECT}; defaults to TYPE_ARRAY 203 * @return mixed 204 */ 205 public static function decode($source, $objectDecodeType = Json::TYPE_OBJECT) 206 { 207 $decoder = new static($source, $objectDecodeType); 208 return $decoder->_decodeValue(); 209 } 210 211 /** 212 * Recursive driving routine for supported toplevel tops 213 * 214 * @return mixed 215 */ 216 protected function _decodeValue() 217 { 218 switch ($this->token) { 219 case self::DATUM: 220 $result = $this->tokenValue; 221 $this->_getNextToken(); 222 return($result); 223 case self::LBRACE: 224 return($this->_decodeObject()); 225 case self::LBRACKET: 226 return($this->_decodeArray()); 227 default: 228 return; 229 } 230 } 231 232 /** 233 * Decodes an object of the form: 234 * { "attribute: value, "attribute2" : value,...} 235 * 236 * If Zend\Json\Encoder was used to encode the original object then 237 * a special attribute called __className which specifies a class 238 * name that should wrap the data contained within the encoded source. 239 * 240 * Decodes to either an array or stdClass object, based on the value of 241 * {@link $decodeType}. If invalid $decodeType present, returns as an 242 * array. 243 * 244 * @return array|stdClass 245 * @throws RuntimeException 246 */ 247 protected function _decodeObject() 248 { 249 $members = array(); 250 $tok = $this->_getNextToken(); 251 252 while ($tok && $tok != self::RBRACE) { 253 if ($tok != self::DATUM || ! is_string($this->tokenValue)) { 254 throw new RuntimeException('Missing key in object encoding: ' . $this->source); 255 } 256 257 $key = $this->tokenValue; 258 $tok = $this->_getNextToken(); 259 260 if ($tok != self::COLON) { 261 throw new RuntimeException('Missing ":" in object encoding: ' . $this->source); 262 } 263 264 $this->_getNextToken(); 265 $members[$key] = $this->_decodeValue(); 266 $tok = $this->token; 267 268 if ($tok == self::RBRACE) { 269 break; 270 } 271 272 if ($tok != self::COMMA) { 273 throw new RuntimeException('Missing "," in object encoding: ' . $this->source); 274 } 275 276 $tok = $this->_getNextToken(); 277 } 278 279 switch ($this->decodeType) { 280 case Json::TYPE_OBJECT: 281 // Create new stdClass and populate with $members 282 $result = new stdClass(); 283 foreach ($members as $key => $value) { 284 if ($key === '') { 285 $key = '_empty_'; 286 } 287 $result->$key = $value; 288 } 289 break; 290 case Json::TYPE_ARRAY: 291 default: 292 $result = $members; 293 break; 294 } 295 296 $this->_getNextToken(); 297 return $result; 298 } 299 300 /** 301 * Decodes a JSON array format: 302 * [element, element2,...,elementN] 303 * 304 * @return array 305 * @throws RuntimeException 306 */ 307 protected function _decodeArray() 308 { 309 $result = array(); 310 $tok = $this->_getNextToken(); // Move past the '[' 311 $index = 0; 312 313 while ($tok && $tok != self::RBRACKET) { 314 $result[$index++] = $this->_decodeValue(); 315 316 $tok = $this->token; 317 318 if ($tok == self::RBRACKET || !$tok) { 319 break; 320 } 321 322 if ($tok != self::COMMA) { 323 throw new RuntimeException('Missing "," in array encoding: ' . $this->source); 324 } 325 326 $tok = $this->_getNextToken(); 327 } 328 329 $this->_getNextToken(); 330 return $result; 331 } 332 333 /** 334 * Removes whitespace characters from the source input 335 */ 336 protected function _eatWhitespace() 337 { 338 if (preg_match('/([\t\b\f\n\r ])*/s', $this->source, $matches, PREG_OFFSET_CAPTURE, $this->offset) 339 && $matches[0][1] == $this->offset) { 340 $this->offset += strlen($matches[0][0]); 341 } 342 } 343 344 /** 345 * Retrieves the next token from the source stream 346 * 347 * @return int Token constant value specified in class definition 348 * @throws RuntimeException 349 */ 350 protected function _getNextToken() 351 { 352 $this->token = self::EOF; 353 $this->tokenValue = null; 354 $this->_eatWhitespace(); 355 356 if ($this->offset >= $this->sourceLength) { 357 return(self::EOF); 358 } 359 360 $str = $this->source; 361 $strLength = $this->sourceLength; 362 $i = $this->offset; 363 $start = $i; 364 365 switch ($str{$i}) { 366 case '{': 367 $this->token = self::LBRACE; 368 break; 369 case '}': 370 $this->token = self::RBRACE; 371 break; 372 case '[': 373 $this->token = self::LBRACKET; 374 break; 375 case ']': 376 $this->token = self::RBRACKET; 377 break; 378 case ',': 379 $this->token = self::COMMA; 380 break; 381 case ':': 382 $this->token = self::COLON; 383 break; 384 case '"': 385 $result = ''; 386 do { 387 $i++; 388 if ($i >= $strLength) { 389 break; 390 } 391 392 $chr = $str{$i}; 393 394 if ($chr == '\\') { 395 $i++; 396 if ($i >= $strLength) { 397 break; 398 } 399 $chr = $str{$i}; 400 switch ($chr) { 401 case '"': 402 $result .= '"'; 403 break; 404 case '\\': 405 $result .= '\\'; 406 break; 407 case '/': 408 $result .= '/'; 409 break; 410 case 'b': 411 $result .= "\x08"; 412 break; 413 case 'f': 414 $result .= "\x0c"; 415 break; 416 case 'n': 417 $result .= "\x0a"; 418 break; 419 case 'r': 420 $result .= "\x0d"; 421 break; 422 case 't': 423 $result .= "\x09"; 424 break; 425 case '\'': 426 $result .= '\''; 427 break; 428 default: 429 throw new RuntimeException("Illegal escape sequence '{$chr}'"); 430 } 431 } elseif ($chr == '"') { 432 break; 433 } else { 434 $result .= $chr; 435 } 436 } while ($i < $strLength); 437 438 $this->token = self::DATUM; 439 //$this->tokenValue = substr($str, $start + 1, $i - $start - 1); 440 $this->tokenValue = $result; 441 break; 442 case 't': 443 if (($i+ 3) < $strLength && substr($str, $start, 4) == "true") { 444 $this->token = self::DATUM; 445 } 446 $this->tokenValue = true; 447 $i += 3; 448 break; 449 case 'f': 450 if (($i+ 4) < $strLength && substr($str, $start, 5) == "false") { 451 $this->token = self::DATUM; 452 } 453 $this->tokenValue = false; 454 $i += 4; 455 break; 456 case 'n': 457 if (($i+ 3) < $strLength && substr($str, $start, 4) == "null") { 458 $this->token = self::DATUM; 459 } 460 $this->tokenValue = null; 461 $i += 3; 462 break; 463 } 464 465 if ($this->token != self::EOF) { 466 $this->offset = $i + 1; // Consume the last token character 467 return($this->token); 468 } 469 470 $chr = $str{$i}; 471 if ($chr == '-' || $chr == '.' || ($chr >= '0' && $chr <= '9')) { 472 if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s', $str, $matches, PREG_OFFSET_CAPTURE, $start) && $matches[0][1] == $start) { 473 $datum = $matches[0][0]; 474 475 if (is_numeric($datum)) { 476 if (preg_match('/^0\d+$/', $datum)) { 477 throw new RuntimeException("Octal notation not supported by JSON (value: {$datum})"); 478 } else { 479 $val = intval($datum); 480 $fVal = floatval($datum); 481 $this->tokenValue = ($val == $fVal ? $val : $fVal); 482 } 483 } else { 484 throw new RuntimeException("Illegal number format: {$datum}"); 485 } 486 487 $this->token = self::DATUM; 488 $this->offset = $start + strlen($datum); 489 } 490 } else { 491 throw new RuntimeException('Illegal Token'); 492 } 493 494 return $this->token; 495 } 496 497 /** 498 * Convert a string from one UTF-16 char to one UTF-8 char. 499 * 500 * Normally should be handled by mb_convert_encoding, but 501 * provides a slower PHP-only method for installations 502 * that lack the multibyte string extension. 503 * 504 * This method is from the Solar Framework by Paul M. Jones 505 * 506 * @link http://solarphp.com 507 * @param string $utf16 UTF-16 character 508 * @return string UTF-8 character 509 */ 510 protected static function _utf162utf8($utf16) 511 { 512 // Check for mb extension otherwise do by hand. 513 if (function_exists('mb_convert_encoding')) { 514 return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16'); 515 } 516 517 $bytes = (ord($utf16{0}) << 8) | ord($utf16{1}); 518 519 switch (true) { 520 case ((0x7F & $bytes) == $bytes): 521 // this case should never be reached, because we are in ASCII range 522 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 523 return chr(0x7F & $bytes); 524 525 case (0x07FF & $bytes) == $bytes: 526 // return a 2-byte UTF-8 character 527 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 528 return chr(0xC0 | (($bytes >> 6) & 0x1F)) 529 . chr(0x80 | ($bytes & 0x3F)); 530 531 case (0xFFFF & $bytes) == $bytes: 532 // return a 3-byte UTF-8 character 533 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 534 return chr(0xE0 | (($bytes >> 12) & 0x0F)) 535 . chr(0x80 | (($bytes >> 6) & 0x3F)) 536 . chr(0x80 | ($bytes & 0x3F)); 537 } 538 539 // ignoring UTF-32 for now, sorry 540 return ''; 541 } 542} 543