1<?php 2/** 3 * Zend Framework (http://framework.zend.com/) 4 * 5 * @link http://github.com/zendframework/zf2 for the canonical source repository 6 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) 7 * @license http://framework.zend.com/license/new-bsd New BSD License 8 */ 9 10namespace Zend\Serializer\Adapter; 11 12use stdClass; 13use Traversable; 14use Zend\Math\BigInteger; 15use Zend\Serializer\Exception; 16use Zend\Stdlib\ArrayUtils; 17 18/** 19 * @link http://www.python.org 20 * @see Phython3.1/Lib/pickle.py 21 * @see Phython3.1/Modules/_pickle.c 22 * @link http://pickle-js.googlecode.com 23 */ 24class PythonPickle extends AbstractAdapter 25{ 26 /** 27 * Pickle opcodes. See pickletools.py for extensive docs. 28 * @link http://hg.python.org/cpython/file/2.7/Lib/pickletools.py 29 * The listing here is in kind-of alphabetical order of 1-character pickle code. 30 * pickletools groups them by purpose. 31 */ 32 const OP_MARK = '('; // push special markobject on stack 33 const OP_STOP = '.'; // every pickle ends with STOP 34 const OP_POP = '0'; // discard topmost stack item 35 const OP_POP_MARK = '1'; // discard stack top through topmost markobject 36 const OP_DUP = '2'; // duplicate top stack item 37 const OP_FLOAT = 'F'; // push float object; decimal string argument 38 const OP_INT = 'I'; // push integer or bool; decimal string argument 39 const OP_BININT = 'J'; // push four-byte signed int 40 const OP_BININT1 = 'K'; // push 1-byte unsigned int 41 const OP_LONG = 'L'; // push long; decimal string argument 42 const OP_BININT2 = 'M'; // push 2-byte unsigned int 43 const OP_NONE = 'N'; // push None 44 const OP_PERSID = 'P'; // push persistent object; id is taken from string arg 45 const OP_BINPERSID = 'Q'; // " " " ; " " " " stack 46 const OP_REDUCE = 'R'; // apply callable to argtuple, both on stack 47 const OP_STRING = 'S'; // push string; NL-terminated string argument 48 const OP_BINSTRING = 'T'; // push string; counted binary string argument 49 const OP_SHORT_BINSTRING = 'U'; // " " ; " " " " < 256 bytes 50 const OP_UNICODE = 'V'; // push Unicode string; raw-unicode-escaped'd argument 51 const OP_BINUNICODE = 'X'; // " " " ; counted UTF-8 string argument 52 const OP_APPEND = 'a'; // append stack top to list below it 53 const OP_BUILD = 'b'; // call __setstate__ or __dict__.update() 54 const OP_GLOBAL = 'c'; // push self.find_class(modname, name); 2 string args 55 const OP_DICT = 'd'; // build a dict from stack items 56 const OP_EMPTY_DICT = '}'; // push empty dict 57 const OP_APPENDS = 'e'; // extend list on stack by topmost stack slice 58 const OP_GET = 'g'; // push item from memo on stack; index is string arg 59 const OP_BINGET = 'h'; // " " " " " " ; " " 1-byte arg 60 const OP_INST = 'i'; // build & push class instance 61 const OP_LONG_BINGET = 'j'; // push item from memo on stack; index is 4-byte arg 62 const OP_LIST = 'l'; // build list from topmost stack items 63 const OP_EMPTY_LIST = ']'; // push empty list 64 const OP_OBJ = 'o'; // build & push class instance 65 const OP_PUT = 'p'; // store stack top in memo; index is string arg 66 const OP_BINPUT = 'q'; // " " " " " ; " " 1-byte arg 67 const OP_LONG_BINPUT = 'r'; // " " " " " ; " " 4-byte arg 68 const OP_SETITEM = 's'; // add key+value pair to dict 69 const OP_TUPLE = 't'; // build tuple from topmost stack items 70 const OP_EMPTY_TUPLE = ')'; // push empty tuple 71 const OP_SETITEMS = 'u'; // modify dict by adding topmost key+value pairs 72 const OP_BINFLOAT = 'G'; // push float; arg is 8-byte float encoding 73 74 /* Protocol 2 */ 75 const OP_PROTO = "\x80"; // identify pickle protocol 76 const OP_NEWOBJ = "\x81"; // build object by applying cls.__new__ to argtuple 77 const OP_EXT1 = "\x82"; // push object from extension registry; 1-byte index 78 const OP_EXT2 = "\x83"; // ditto, but 2-byte index 79 const OP_EXT4 = "\x84"; // ditto, but 4-byte index 80 const OP_TUPLE1 = "\x85"; // build 1-tuple from stack top 81 const OP_TUPLE2 = "\x86"; // build 2-tuple from two topmost stack items 82 const OP_TUPLE3 = "\x87"; // build 3-tuple from three topmost stack items 83 const OP_NEWTRUE = "\x88"; // push True 84 const OP_NEWFALSE = "\x89"; // push False 85 const OP_LONG1 = "\x8a"; // push long from < 256 bytes 86 const OP_LONG4 = "\x8b"; // push really big long 87 88 /* Protocol 3 (Python 3.x) */ 89 const OP_BINBYTES = 'B'; // push bytes; counted binary string argument 90 const OP_SHORT_BINBYTES = 'C'; // " " ; " " " " < 256 bytes 91 92 /** 93 * Whether or not the system is little-endian 94 * 95 * @var bool 96 */ 97 protected static $isLittleEndian = null; 98 99 /** 100 * @var array Strings representing quotes 101 */ 102 protected static $quoteString = array( 103 '\\' => '\\\\', 104 "\x00" => '\\x00', "\x01" => '\\x01', "\x02" => '\\x02', "\x03" => '\\x03', 105 "\x04" => '\\x04', "\x05" => '\\x05', "\x06" => '\\x06', "\x07" => '\\x07', 106 "\x08" => '\\x08', "\x09" => '\\t', "\x0a" => '\\n', "\x0b" => '\\x0b', 107 "\x0c" => '\\x0c', "\x0d" => '\\r', "\x0e" => '\\x0e', "\x0f" => '\\x0f', 108 "\x10" => '\\x10', "\x11" => '\\x11', "\x12" => '\\x12', "\x13" => '\\x13', 109 "\x14" => '\\x14', "\x15" => '\\x15', "\x16" => '\\x16', "\x17" => '\\x17', 110 "\x18" => '\\x18', "\x19" => '\\x19', "\x1a" => '\\x1a', "\x1b" => '\\x1b', 111 "\x1c" => '\\x1c', "\x1d" => '\\x1d', "\x1e" => '\\x1e', "\x1f" => '\\x1f', 112 "\xff" => '\\xff' 113 ); 114 115 // process vars 116 protected $protocol = null; 117 protected $memo = array(); 118 protected $pickle = ''; 119 protected $pickleLen = 0; 120 protected $pos = 0; 121 protected $stack = array(); 122 protected $marker = null; 123 124 /** 125 * @var BigInteger\Adapter\AdapterInterface 126 */ 127 protected $bigIntegerAdapter = null; 128 129 /** 130 * @var PythonPickleOptions 131 */ 132 protected $options = null; 133 134 /** 135 * Constructor. 136 * 137 * @param array|Traversable|PythonPickleOptions $options Optional 138 */ 139 public function __construct($options = null) 140 { 141 // init 142 if (static::$isLittleEndian === null) { 143 static::$isLittleEndian = (pack('l', 1) === "\x01\x00\x00\x00"); 144 } 145 146 $this->marker = new stdClass(); 147 148 parent::__construct($options); 149 } 150 151 /** 152 * Set options 153 * 154 * @param array|Traversable|PythonPickleOptions $options 155 * @return PythonPickle 156 */ 157 public function setOptions($options) 158 { 159 if (!$options instanceof PythonPickleOptions) { 160 $options = new PythonPickleOptions($options); 161 } 162 163 $this->options = $options; 164 return $this; 165 } 166 167 /** 168 * Get options 169 * 170 * @return PythonPickleOptions 171 */ 172 public function getOptions() 173 { 174 if ($this->options === null) { 175 $this->options = new PythonPickleOptions(); 176 } 177 return $this->options; 178 } 179 180 /* serialize */ 181 182 /** 183 * Serialize PHP to PythonPickle format 184 * 185 * @param mixed $value 186 * @return string 187 */ 188 public function serialize($value) 189 { 190 $this->clearProcessVars(); 191 $this->protocol = $this->getOptions()->getProtocol(); 192 193 // write 194 if ($this->protocol >= 2) { 195 $this->writeProto($this->protocol); 196 } 197 $this->write($value); 198 $this->writeStop(); 199 200 $pickle = $this->pickle; 201 $this->clearProcessVars(); 202 203 return $pickle; 204 } 205 206 /** 207 * Write a value 208 * 209 * @param mixed $value 210 * @throws Exception\RuntimeException on invalid or unrecognized value type 211 */ 212 protected function write($value) 213 { 214 if ($value === null) { 215 $this->writeNull(); 216 } elseif (is_bool($value)) { 217 $this->writeBool($value); 218 } elseif (is_int($value)) { 219 $this->writeInt($value); 220 } elseif (is_float($value)) { 221 $this->writeFloat($value); 222 } elseif (is_string($value)) { 223 // TODO: write unicode / binary 224 $this->writeString($value); 225 } elseif (is_array($value)) { 226 if (ArrayUtils::isList($value)) { 227 $this->writeArrayList($value); 228 } else { 229 $this->writeArrayDict($value); 230 } 231 } elseif (is_object($value)) { 232 $this->writeObject($value); 233 } else { 234 throw new Exception\RuntimeException(sprintf( 235 'PHP-Type "%s" can not be serialized by %s', 236 gettype($value), 237 get_class($this) 238 )); 239 } 240 } 241 242 /** 243 * Write pickle protocol 244 * 245 * @param int $protocol 246 */ 247 protected function writeProto($protocol) 248 { 249 $this->pickle .= self::OP_PROTO . $protocol; 250 } 251 252 /** 253 * Write a get 254 * 255 * @param int $id Id of memo 256 */ 257 protected function writeGet($id) 258 { 259 if ($this->protocol == 0) { 260 $this->pickle .= self::OP_GET . $id . "\r\n"; 261 } elseif ($id <= 0xFF) { 262 // BINGET + chr(i) 263 $this->pickle .= self::OP_BINGET . chr($id); 264 } else { 265 // LONG_BINGET + pack("<i", i) 266 $bin = pack('l', $id); 267 if (static::$isLittleEndian === false) { 268 $bin = strrev($bin); 269 } 270 $this->pickle .= self::OP_LONG_BINGET . $bin; 271 } 272 } 273 274 /** 275 * Write a put 276 * 277 * @param int $id Id of memo 278 */ 279 protected function writePut($id) 280 { 281 if ($this->protocol == 0) { 282 $this->pickle .= self::OP_PUT . $id . "\r\n"; 283 } elseif ($id <= 0xff) { 284 // BINPUT + chr(i) 285 $this->pickle .= self::OP_BINPUT . chr($id); 286 } else { 287 // LONG_BINPUT + pack("<i", i) 288 $bin = pack('l', $id); 289 if (static::$isLittleEndian === false) { 290 $bin = strrev($bin); 291 } 292 $this->pickle .= self::OP_LONG_BINPUT . $bin; 293 } 294 } 295 296 /** 297 * Write a null as None 298 * 299 */ 300 protected function writeNull() 301 { 302 $this->pickle .= self::OP_NONE; 303 } 304 305 /** 306 * Write boolean value 307 * 308 * @param bool $value 309 */ 310 protected function writeBool($value) 311 { 312 if ($this->protocol >= 2) { 313 $this->pickle .= ($value === true) ? self::OP_NEWTRUE : self::OP_NEWFALSE; 314 } else { 315 $this->pickle .= self::OP_INT . (($value === true) ? '01' : '00') . "\r\n"; 316 } 317 } 318 319 /** 320 * Write an integer value 321 * 322 * @param int $value 323 */ 324 protected function writeInt($value) 325 { 326 if ($this->protocol == 0) { 327 $this->pickle .= self::OP_INT . $value . "\r\n"; 328 return; 329 } 330 331 if ($value >= 0) { 332 if ($value <= 0xFF) { 333 // self.write(BININT1 + chr(obj)) 334 $this->pickle .= self::OP_BININT1 . chr($value); 335 } elseif ($value <= 0xFFFF) { 336 // self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8)) 337 $this->pickle .= self::OP_BININT2 . pack('v', $value); 338 } 339 return; 340 } 341 342 // Next check for 4-byte signed ints: 343 $highBits = $value >> 31; // note that Python shift sign-extends 344 if ($highBits == 0 || $highBits == -1) { 345 // All high bits are copies of bit 2**31, so the value 346 // fits in a 4-byte signed int. 347 // self.write(BININT + pack("<i", obj)) 348 $bin = pack('l', $value); 349 if (static::$isLittleEndian === false) { 350 $bin = strrev($bin); 351 } 352 $this->pickle .= self::OP_BININT . $bin; 353 return; 354 } 355 } 356 357 /** 358 * Write a float value 359 * 360 * @param float $value 361 */ 362 protected function writeFloat($value) 363 { 364 if ($this->protocol == 0) { 365 $this->pickle .= self::OP_FLOAT . $value . "\r\n"; 366 } else { 367 // self.write(BINFLOAT + pack('>d', obj)) 368 $bin = pack('d', $value); 369 if (static::$isLittleEndian === true) { 370 $bin = strrev($bin); 371 } 372 $this->pickle .= self::OP_BINFLOAT . $bin; 373 } 374 } 375 376 /** 377 * Write a string value 378 * 379 * @param string $value 380 */ 381 protected function writeString($value) 382 { 383 if (($id = $this->searchMemo($value)) !== false) { 384 $this->writeGet($id); 385 return; 386 } 387 388 if ($this->protocol == 0) { 389 $this->pickle .= self::OP_STRING . $this->quoteString($value) . "\r\n"; 390 } else { 391 $n = strlen($value); 392 if ($n <= 0xFF) { 393 // self.write(SHORT_BINSTRING + chr(n) + obj) 394 $this->pickle .= self::OP_SHORT_BINSTRING . chr($n) . $value; 395 } else { 396 // self.write(BINSTRING + pack("<i", n) + obj) 397 $binLen = pack('l', $n); 398 if (static::$isLittleEndian === false) { 399 $binLen = strrev($binLen); 400 } 401 $this->pickle .= self::OP_BINSTRING . $binLen . $value; 402 } 403 } 404 405 $this->memorize($value); 406 } 407 408 /** 409 * Write an associative array value as dictionary 410 * 411 * @param array|Traversable $value 412 */ 413 protected function writeArrayDict($value) 414 { 415 if (($id = $this->searchMemo($value)) !== false) { 416 $this->writeGet($id); 417 return; 418 } 419 420 $this->pickle .= self::OP_MARK . self::OP_DICT; 421 $this->memorize($value); 422 423 foreach ($value as $k => $v) { 424 $this->write($k); 425 $this->write($v); 426 $this->pickle .= self::OP_SETITEM; 427 } 428 } 429 430 /** 431 * Write a simple array value as list 432 * 433 * @param array $value 434 */ 435 protected function writeArrayList(array $value) 436 { 437 if (($id = $this->searchMemo($value)) !== false) { 438 $this->writeGet($id); 439 return; 440 } 441 442 $this->pickle .= self::OP_MARK . self::OP_LIST; 443 $this->memorize($value); 444 445 foreach ($value as $v) { 446 $this->write($v); 447 $this->pickle .= self::OP_APPEND; 448 } 449 } 450 451 /** 452 * Write an object as a dictionary 453 * 454 * @param object $value 455 */ 456 protected function writeObject($value) 457 { 458 // The main differences between a SplFixedArray and a normal PHP array is 459 // that the SplFixedArray is of fixed length and allows only integers 460 // within the range as indexes. 461 if ($value instanceof \SplFixedArray) { 462 $this->writeArrayList($value->toArray()); 463 464 // Use the object method toArray if available 465 } elseif (method_exists($value, 'toArray')) { 466 $this->writeArrayDict($value->toArray()); 467 468 // If the object is an iterator simply iterate it 469 // and convert it to a dictionary 470 } elseif ($value instanceof Traversable) { 471 $this->writeArrayDict($value); 472 473 // other objects are simply converted by using its properties 474 } else { 475 $this->writeArrayDict(get_object_vars($value)); 476 } 477 } 478 479 /** 480 * Write stop 481 */ 482 protected function writeStop() 483 { 484 $this->pickle .= self::OP_STOP; 485 } 486 487 /* serialize helper */ 488 489 /** 490 * Add a value to the memo and write the id 491 * 492 * @param mixed $value 493 */ 494 protected function memorize($value) 495 { 496 $id = count($this->memo); 497 $this->memo[$id] = $value; 498 $this->writePut($id); 499 } 500 501 /** 502 * Search a value in the memo and return the id 503 * 504 * @param mixed $value 505 * @return int|bool The id or false 506 */ 507 protected function searchMemo($value) 508 { 509 return array_search($value, $this->memo, true); 510 } 511 512 /** 513 * Quote/Escape a string 514 * 515 * @param string $str 516 * @return string quoted string 517 */ 518 protected function quoteString($str) 519 { 520 $quoteArr = static::$quoteString; 521 522 if (($cntSingleQuote = substr_count($str, "'")) 523 && ($cntDoubleQuote = substr_count($str, '"')) 524 && ($cntSingleQuote < $cntDoubleQuote) 525 ) { 526 $quoteArr['"'] = '\\"'; 527 $enclosure = '"'; 528 } else { 529 $quoteArr["'"] = "\\'"; 530 $enclosure = "'"; 531 } 532 533 return $enclosure . strtr($str, $quoteArr) . $enclosure; 534 } 535 536 /* unserialize */ 537 538 /** 539 * Unserialize from Python Pickle format to PHP 540 * 541 * @param string $pickle 542 * @return mixed 543 * @throws Exception\RuntimeException on invalid Pickle string 544 */ 545 public function unserialize($pickle) 546 { 547 // init process vars 548 $this->clearProcessVars(); 549 $this->pickle = $pickle; 550 $this->pickleLen = strlen($this->pickle); 551 552 // read pickle string 553 while (($op = $this->read(1)) !== self::OP_STOP) { 554 $this->load($op); 555 } 556 557 if (!count($this->stack)) { 558 throw new Exception\RuntimeException('No data found'); 559 } 560 561 $ret = array_pop($this->stack); 562 563 // clear process vars 564 $this->clearProcessVars(); 565 566 return $ret; 567 } 568 569 /** 570 * Clear temp variables needed for processing 571 */ 572 protected function clearProcessVars() 573 { 574 $this->pos = 0; 575 $this->pickle = ''; 576 $this->pickleLen = 0; 577 $this->memo = array(); 578 $this->stack = array(); 579 } 580 581 /** 582 * Load a pickle opcode 583 * 584 * @param string $op 585 * @throws Exception\RuntimeException on invalid opcode 586 */ 587 protected function load($op) 588 { 589 switch ($op) { 590 case self::OP_PUT: 591 $this->loadPut(); 592 break; 593 case self::OP_BINPUT: 594 $this->loadBinPut(); 595 break; 596 case self::OP_LONG_BINPUT: 597 $this->loadLongBinPut(); 598 break; 599 case self::OP_GET: 600 $this->loadGet(); 601 break; 602 case self::OP_BINGET: 603 $this->loadBinGet(); 604 break; 605 case self::OP_LONG_BINGET: 606 $this->loadLongBinGet(); 607 break; 608 case self::OP_NONE: 609 $this->loadNone(); 610 break; 611 case self::OP_NEWTRUE: 612 $this->loadNewTrue(); 613 break; 614 case self::OP_NEWFALSE: 615 $this->loadNewFalse(); 616 break; 617 case self::OP_INT: 618 $this->loadInt(); 619 break; 620 case self::OP_BININT: 621 $this->loadBinInt(); 622 break; 623 case self::OP_BININT1: 624 $this->loadBinInt1(); 625 break; 626 case self::OP_BININT2: 627 $this->loadBinInt2(); 628 break; 629 case self::OP_LONG: 630 $this->loadLong(); 631 break; 632 case self::OP_LONG1: 633 $this->loadLong1(); 634 break; 635 case self::OP_LONG4: 636 $this->loadLong4(); 637 break; 638 case self::OP_FLOAT: 639 $this->loadFloat(); 640 break; 641 case self::OP_BINFLOAT: 642 $this->loadBinFloat(); 643 break; 644 case self::OP_STRING: 645 $this->loadString(); 646 break; 647 case self::OP_BINSTRING: 648 $this->loadBinString(); 649 break; 650 case self::OP_SHORT_BINSTRING: 651 $this->loadShortBinString(); 652 break; 653 case self::OP_BINBYTES: 654 $this->loadBinBytes(); 655 break; 656 case self::OP_SHORT_BINBYTES: 657 $this->loadShortBinBytes(); 658 break; 659 case self::OP_UNICODE: 660 $this->loadUnicode(); 661 break; 662 case self::OP_BINUNICODE: 663 $this->loadBinUnicode(); 664 break; 665 case self::OP_MARK: 666 $this->loadMark(); 667 break; 668 case self::OP_LIST: 669 $this->loadList(); 670 break; 671 case self::OP_EMPTY_LIST: 672 $this->loadEmptyList(); 673 break; 674 case self::OP_APPEND: 675 $this->loadAppend(); 676 break; 677 case self::OP_APPENDS: 678 $this->loadAppends(); 679 break; 680 case self::OP_DICT: 681 $this->loadDict(); 682 break; 683 case self::OP_EMPTY_DICT: 684 $this->_loadEmptyDict(); 685 break; 686 case self::OP_SETITEM: 687 $this->loadSetItem(); 688 break; 689 case self::OP_SETITEMS: 690 $this->loadSetItems(); 691 break; 692 case self::OP_TUPLE: 693 $this->loadTuple(); 694 break; 695 case self::OP_TUPLE1: 696 $this->loadTuple1(); 697 break; 698 case self::OP_TUPLE2: 699 $this->loadTuple2(); 700 break; 701 case self::OP_TUPLE3: 702 $this->loadTuple3(); 703 break; 704 case self::OP_PROTO: 705 $this->loadProto(); 706 break; 707 default: 708 throw new Exception\RuntimeException("Invalid or unknown opcode '{$op}'"); 709 } 710 } 711 712 /** 713 * Load a PUT opcode 714 * 715 * @throws Exception\RuntimeException on missing stack 716 */ 717 protected function loadPut() 718 { 719 $id = (int) $this->readline(); 720 721 $lastStack = count($this->stack) - 1; 722 if (!isset($this->stack[$lastStack])) { 723 throw new Exception\RuntimeException('No stack exist'); 724 } 725 $this->memo[$id] =& $this->stack[$lastStack]; 726 } 727 728 /** 729 * Load a binary PUT 730 * 731 * @throws Exception\RuntimeException on missing stack 732 */ 733 protected function loadBinPut() 734 { 735 $id = ord($this->read(1)); 736 737 $lastStack = count($this->stack)-1; 738 if (!isset($this->stack[$lastStack])) { 739 throw new Exception\RuntimeException('No stack exist'); 740 } 741 $this->memo[$id] =& $this->stack[$lastStack]; 742 } 743 744 /** 745 * Load a long binary PUT 746 * 747 * @throws Exception\RuntimeException on missing stack 748 */ 749 protected function loadLongBinPut() 750 { 751 $bin = $this->read(4); 752 if (static::$isLittleEndian === false) { 753 $bin = strrev($bin); 754 } 755 list(, $id) = unpack('l', $bin); 756 757 $lastStack = count($this->stack)-1; 758 if (!isset($this->stack[$lastStack])) { 759 throw new Exception\RuntimeException('No stack exist'); 760 } 761 $this->memo[$id] =& $this->stack[$lastStack]; 762 } 763 764 /** 765 * Load a GET operation 766 * 767 * @throws Exception\RuntimeException on missing GET identifier 768 */ 769 protected function loadGet() 770 { 771 $id = (int) $this->readline(); 772 773 if (!array_key_exists($id, $this->memo)) { 774 throw new Exception\RuntimeException('Get id "' . $id . '" not found in memo'); 775 } 776 $this->stack[] =& $this->memo[$id]; 777 } 778 779 /** 780 * Load a binary GET operation 781 * 782 * @throws Exception\RuntimeException on missing GET identifier 783 */ 784 protected function loadBinGet() 785 { 786 $id = ord($this->read(1)); 787 788 if (!array_key_exists($id, $this->memo)) { 789 throw new Exception\RuntimeException('Get id "' . $id . '" not found in memo'); 790 } 791 $this->stack[] =& $this->memo[$id]; 792 } 793 794 /** 795 * Load a long binary GET operation 796 * 797 * @throws Exception\RuntimeException on missing GET identifier 798 */ 799 protected function loadLongBinGet() 800 { 801 $bin = $this->read(4); 802 if (static::$isLittleEndian === false) { 803 $bin = strrev($bin); 804 } 805 list(, $id) = unpack('l', $bin); 806 807 if (!array_key_exists($id, $this->memo)) { 808 throw new Exception\RuntimeException('Get id "' . $id . '" not found in memo'); 809 } 810 $this->stack[] =& $this->memo[$id]; 811 } 812 813 /** 814 * Load a NONE operator 815 */ 816 protected function loadNone() 817 { 818 $this->stack[] = null; 819 } 820 821 /** 822 * Load a boolean TRUE operator 823 */ 824 protected function loadNewTrue() 825 { 826 $this->stack[] = true; 827 } 828 829 /** 830 * Load a boolean FALSE operator 831 */ 832 protected function loadNewFalse() 833 { 834 $this->stack[] = false; 835 } 836 837 /** 838 * Load an integer operator 839 */ 840 protected function loadInt() 841 { 842 $line = $this->readline(); 843 if ($line === '01') { 844 $this->stack[] = true; 845 } elseif ($line === '00') { 846 $this->stack[] = false; 847 } else { 848 $this->stack[] = (int) $line; 849 } 850 } 851 852 /** 853 * Load a binary integer operator 854 */ 855 protected function loadBinInt() 856 { 857 $bin = $this->read(4); 858 if (static::$isLittleEndian === false) { 859 $bin = strrev($bin); 860 } 861 list(, $int) = unpack('l', $bin); 862 $this->stack[] = $int; 863 } 864 865 /** 866 * Load the first byte of a binary integer 867 */ 868 protected function loadBinInt1() 869 { 870 $this->stack[] = ord($this->read(1)); 871 } 872 873 /** 874 * Load the second byte of a binary integer 875 */ 876 protected function loadBinInt2() 877 { 878 $bin = $this->read(2); 879 list(, $int) = unpack('v', $bin); 880 $this->stack[] = $int; 881 } 882 883 /** 884 * Load a long (float) operator 885 */ 886 protected function loadLong() 887 { 888 $data = rtrim($this->readline(), 'L'); 889 if ($data === '') { 890 $this->stack[] = 0; 891 } else { 892 $this->stack[] = $data; 893 } 894 } 895 896 /** 897 * Load a one byte long integer 898 */ 899 protected function loadLong1() 900 { 901 $n = ord($this->read(1)); 902 $data = $this->read($n); 903 $this->stack[] = $this->decodeBinLong($data); 904 } 905 906 /** 907 * Load a 4 byte long integer 908 * 909 */ 910 protected function loadLong4() 911 { 912 $nBin = $this->read(4); 913 if (static::$isLittleEndian === false) { 914 $nBin = strrev($$nBin); 915 } 916 list(, $n) = unpack('l', $nBin); 917 $data = $this->read($n); 918 919 $this->stack[] = $this->decodeBinLong($data); 920 } 921 922 /** 923 * Load a float value 924 * 925 */ 926 protected function loadFloat() 927 { 928 $float = (float) $this->readline(); 929 $this->stack[] = $float; 930 } 931 932 /** 933 * Load a binary float value 934 * 935 */ 936 protected function loadBinFloat() 937 { 938 $bin = $this->read(8); 939 if (static::$isLittleEndian === true) { 940 $bin = strrev($bin); 941 } 942 list(, $float) = unpack('d', $bin); 943 $this->stack[] = $float; 944 } 945 946 /** 947 * Load a string 948 * 949 */ 950 protected function loadString() 951 { 952 $this->stack[] = $this->unquoteString((string) $this->readline()); 953 } 954 955 /** 956 * Load a binary string 957 * 958 */ 959 protected function loadBinString() 960 { 961 $bin = $this->read(4); 962 if (!static::$isLittleEndian) { 963 $bin = strrev($bin); 964 } 965 list(, $len) = unpack('l', $bin); 966 $this->stack[] = (string) $this->read($len); 967 } 968 969 /** 970 * Load a short binary string 971 * 972 */ 973 protected function loadShortBinString() 974 { 975 $len = ord($this->read(1)); 976 $this->stack[] = (string) $this->read($len); 977 } 978 979 /** 980 * Load arbitrary binary bytes 981 */ 982 protected function loadBinBytes() 983 { 984 // read byte length 985 $nBin = $this->read(4); 986 if (static::$isLittleEndian === false) { 987 $nBin = strrev($$nBin); 988 } 989 list(, $n) = unpack('l', $nBin); 990 $this->stack[] = $this->read($n); 991 } 992 993 /** 994 * Load a single binary byte 995 */ 996 protected function loadShortBinBytes() 997 { 998 $n = ord($this->read(1)); 999 $this->stack[] = $this->read($n); 1000 } 1001 1002 /** 1003 * Load a unicode string 1004 */ 1005 protected function loadUnicode() 1006 { 1007 $data = $this->readline(); 1008 $pattern = '/\\\\u([a-fA-F0-9]{4})/u'; // \uXXXX 1009 $data = preg_replace_callback($pattern, array($this, '_convertMatchingUnicodeSequence2Utf8'), $data); 1010 1011 $this->stack[] = $data; 1012 } 1013 1014 /** 1015 * Convert a unicode sequence to UTF-8 1016 * 1017 * @param array $match 1018 * @return string 1019 */ 1020 protected function _convertMatchingUnicodeSequence2Utf8(array $match) 1021 { 1022 return $this->hex2Utf8($match[1]); 1023 } 1024 1025 /** 1026 * Convert a hex string to a UTF-8 string 1027 * 1028 * @param string $hex 1029 * @return string 1030 * @throws Exception\RuntimeException on unmatched unicode sequence 1031 */ 1032 protected function hex2Utf8($hex) 1033 { 1034 $uniCode = hexdec($hex); 1035 1036 if ($uniCode < 0x80) { // 1Byte 1037 $utf8Char = chr($uniCode); 1038 } elseif ($uniCode < 0x800) { // 2Byte 1039 $utf8Char = chr(0xC0 | $uniCode >> 6) 1040 . chr(0x80 | $uniCode & 0x3F); 1041 } elseif ($uniCode < 0x10000) { // 3Byte 1042 $utf8Char = chr(0xE0 | $uniCode >> 12) 1043 . chr(0x80 | $uniCode >> 6 & 0x3F) 1044 . chr(0x80 | $uniCode & 0x3F); 1045 } elseif ($uniCode < 0x110000) { // 4Byte 1046 $utf8Char = chr(0xF0 | $uniCode >> 18) 1047 . chr(0x80 | $uniCode >> 12 & 0x3F) 1048 . chr(0x80 | $uniCode >> 6 & 0x3F) 1049 . chr(0x80 | $uniCode & 0x3F); 1050 } else { 1051 throw new Exception\RuntimeException( 1052 sprintf('Unsupported unicode character found "%s"', dechex($uniCode)) 1053 ); 1054 } 1055 1056 return $utf8Char; 1057 } 1058 1059 /** 1060 * Load binary unicode sequence 1061 */ 1062 protected function loadBinUnicode() 1063 { 1064 // read byte length 1065 $n = $this->read(4); 1066 if (static::$isLittleEndian === false) { 1067 $n = strrev($n); 1068 } 1069 list(, $n) = unpack('l', $n); 1070 $data = $this->read($n); 1071 1072 $this->stack[] = $data; 1073 } 1074 1075 /** 1076 * Load a marker sequence 1077 */ 1078 protected function loadMark() 1079 { 1080 $this->stack[] = $this->marker; 1081 } 1082 1083 /** 1084 * Load an array (list) 1085 */ 1086 protected function loadList() 1087 { 1088 $k = $this->lastMarker(); 1089 $this->stack[$k] = array(); 1090 1091 // remove all elements after marker 1092 for ($i = $k + 1, $max = count($this->stack); $i < $max; $i++) { 1093 unset($this->stack[$i]); 1094 } 1095 } 1096 1097 /** 1098 * Load an append (to list) sequence 1099 */ 1100 protected function loadAppend() 1101 { 1102 $value = array_pop($this->stack); 1103 $list =& $this->stack[count($this->stack) - 1]; 1104 $list[] = $value; 1105 } 1106 1107 /** 1108 * Load an empty list sequence 1109 */ 1110 protected function loadEmptyList() 1111 { 1112 $this->stack[] = array(); 1113 } 1114 1115 /** 1116 * Load multiple append (to list) sequences at once 1117 */ 1118 protected function loadAppends() 1119 { 1120 $k = $this->lastMarker(); 1121 $list =& $this->stack[$k - 1]; 1122 $max = count($this->stack); 1123 for ($i = $k + 1; $i < $max; $i++) { 1124 $list[] = $this->stack[$i]; 1125 unset($this->stack[$i]); 1126 } 1127 unset($this->stack[$k]); 1128 } 1129 1130 /** 1131 * Load an associative array (Python dictionary) 1132 */ 1133 protected function loadDict() 1134 { 1135 $k = $this->lastMarker(); 1136 $this->stack[$k] = array(); 1137 1138 // remove all elements after marker 1139 $max = count($this->stack); 1140 for ($i = $k + 1; $i < $max; $i++) { 1141 unset($this->stack[$i]); 1142 } 1143 } 1144 1145 /** 1146 * Load an item from a set 1147 */ 1148 protected function loadSetItem() 1149 { 1150 $value = array_pop($this->stack); 1151 $key = array_pop($this->stack); 1152 $dict =& $this->stack[count($this->stack) - 1]; 1153 $dict[$key] = $value; 1154 } 1155 1156 /** 1157 * Load an empty dictionary 1158 */ 1159 protected function _loadEmptyDict() 1160 { 1161 $this->stack[] = array(); 1162 } 1163 1164 /** 1165 * Load set items 1166 */ 1167 protected function loadSetItems() 1168 { 1169 $k = $this->lastMarker(); 1170 $dict =& $this->stack[$k - 1]; 1171 $max = count($this->stack); 1172 for ($i = $k + 1; $i < $max; $i += 2) { 1173 $key = $this->stack[$i]; 1174 $value = $this->stack[$i + 1]; 1175 $dict[$key] = $value; 1176 unset($this->stack[$i], $this->stack[$i+1]); 1177 } 1178 unset($this->stack[$k]); 1179 } 1180 1181 /** 1182 * Load a tuple 1183 */ 1184 protected function loadTuple() 1185 { 1186 $k = $this->lastMarker(); 1187 $this->stack[$k] = array(); 1188 $tuple =& $this->stack[$k]; 1189 $max = count($this->stack); 1190 for ($i = $k + 1; $i < $max; $i++) { 1191 $tuple[] = $this->stack[$i]; 1192 unset($this->stack[$i]); 1193 } 1194 } 1195 1196 /** 1197 * Load single item tuple 1198 */ 1199 protected function loadTuple1() 1200 { 1201 $value1 = array_pop($this->stack); 1202 $this->stack[] = array($value1); 1203 } 1204 1205 /** 1206 * Load two item tuple 1207 * 1208 */ 1209 protected function loadTuple2() 1210 { 1211 $value2 = array_pop($this->stack); 1212 $value1 = array_pop($this->stack); 1213 $this->stack[] = array($value1, $value2); 1214 } 1215 1216 /** 1217 * Load three item tuple 1218 * 1219 */ 1220 protected function loadTuple3() 1221 { 1222 $value3 = array_pop($this->stack); 1223 $value2 = array_pop($this->stack); 1224 $value1 = array_pop($this->stack); 1225 $this->stack[] = array($value1, $value2, $value3); 1226 } 1227 1228 /** 1229 * Load a proto value 1230 * 1231 * @throws Exception\RuntimeException if Pickle version does not support this feature 1232 */ 1233 protected function loadProto() 1234 { 1235 $proto = ord($this->read(1)); 1236 if ($proto < 2 || $proto > 3) { 1237 throw new Exception\RuntimeException( 1238 "Invalid or unknown protocol version '{$proto}' detected" 1239 ); 1240 } 1241 $this->protocol = $proto; 1242 } 1243 1244 /* unserialize helper */ 1245 1246 /** 1247 * Read a segment of the pickle 1248 * 1249 * @param mixed $len 1250 * @return string 1251 * @throws Exception\RuntimeException if position matches end of data 1252 */ 1253 protected function read($len) 1254 { 1255 if (($this->pos + $len) > $this->pickleLen) { 1256 throw new Exception\RuntimeException('End of data'); 1257 } 1258 1259 $this->pos += $len; 1260 return substr($this->pickle, ($this->pos - $len), $len); 1261 } 1262 1263 /** 1264 * Read a line of the pickle at once 1265 * 1266 * @return string 1267 * @throws Exception\RuntimeException if no EOL character found 1268 */ 1269 protected function readline() 1270 { 1271 $eolLen = 2; 1272 $eolPos = strpos($this->pickle, "\r\n", $this->pos); 1273 if ($eolPos === false) { 1274 $eolPos = strpos($this->pickle, "\n", $this->pos); 1275 $eolLen = 1; 1276 } 1277 1278 if ($eolPos === false) { 1279 throw new Exception\RuntimeException('No new line found'); 1280 } 1281 $ret = substr($this->pickle, $this->pos, $eolPos-$this->pos); 1282 $this->pos = $eolPos + $eolLen; 1283 1284 return $ret; 1285 } 1286 1287 /** 1288 * Unquote/Unescape a quoted string 1289 * 1290 * @param string $str quoted string 1291 * @return string unquoted string 1292 */ 1293 protected function unquoteString($str) 1294 { 1295 $quoteArr = array_flip(static::$quoteString); 1296 1297 if ($str[0] == '"') { 1298 $quoteArr['\\"'] = '"'; 1299 } else { 1300 $quoteArr["\\'"] = "'"; 1301 } 1302 1303 return strtr(substr(trim($str), 1, -1), $quoteArr); 1304 } 1305 1306 /** 1307 * Return last marker position in stack 1308 * 1309 * @return int 1310 */ 1311 protected function lastMarker() 1312 { 1313 for ($k = count($this->stack)-1; $k >= 0; $k -= 1) { 1314 if ($this->stack[$k] === $this->marker) { 1315 break; 1316 } 1317 } 1318 return $k; 1319 } 1320 1321 /** 1322 * Decode a binary long sequence 1323 * 1324 * @param string $data 1325 * @return int|float|string 1326 */ 1327 protected function decodeBinLong($data) 1328 { 1329 $nbytes = strlen($data); 1330 1331 if ($nbytes == 0) { 1332 return 0; 1333 } 1334 1335 $long = 0; 1336 if ($nbytes > 7) { 1337 if ($this->bigIntegerAdapter === null) { 1338 $this->bigIntegerAdapter = BigInteger\BigInteger::getDefaultAdapter(); 1339 } 1340 if (static::$isLittleEndian === true) { 1341 $data = strrev($data); 1342 } 1343 $long = $this->bigIntegerAdapter->binToInt($data, true); 1344 } else { 1345 for ($i = 0; $i < $nbytes; $i++) { 1346 $long += ord($data[$i]) * pow(256, $i); 1347 } 1348 if (0x80 <= ord($data[$nbytes - 1])) { 1349 $long -= pow(2, $nbytes * 8); 1350 // $long-= 1 << ($nbytes * 8); 1351 } 1352 } 1353 1354 return $long; 1355 } 1356} 1357