1<?php
2/**
3 * Zend Framework (http://framework.zend.com/)
4 *
5 * @link      http://github.com/zendframework/zf2 for the canonical source repository
6 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
7 * @license   http://framework.zend.com/license/new-bsd New BSD License
8 */
9
10namespace Zend\Serializer\Adapter;
11
12use stdClass;
13use Traversable;
14use Zend\Math\BigInteger;
15use Zend\Serializer\Exception;
16use Zend\Stdlib\ArrayUtils;
17
18/**
19 * @link       http://www.python.org
20 * @see        Phython3.1/Lib/pickle.py
21 * @see        Phython3.1/Modules/_pickle.c
22 * @link       http://pickle-js.googlecode.com
23 */
24class PythonPickle extends AbstractAdapter
25{
26    /**
27     * Pickle opcodes. See pickletools.py for extensive docs.
28     * @link http://hg.python.org/cpython/file/2.7/Lib/pickletools.py
29     * The listing here is in kind-of alphabetical order of 1-character pickle code.
30     * pickletools groups them by purpose.
31     */
32    const OP_MARK            = '(';     // push special markobject on stack
33    const OP_STOP            = '.';     // every pickle ends with STOP
34    const OP_POP             = '0';     // discard topmost stack item
35    const OP_POP_MARK        = '1';     // discard stack top through topmost markobject
36    const OP_DUP             = '2';     // duplicate top stack item
37    const OP_FLOAT           = 'F';     // push float object; decimal string argument
38    const OP_INT             = 'I';     // push integer or bool; decimal string argument
39    const OP_BININT          = 'J';     // push four-byte signed int
40    const OP_BININT1         = 'K';     // push 1-byte unsigned int
41    const OP_LONG            = 'L';     // push long; decimal string argument
42    const OP_BININT2         = 'M';     // push 2-byte unsigned int
43    const OP_NONE            = 'N';     // push None
44    const OP_PERSID          = 'P';     // push persistent object; id is taken from string arg
45    const OP_BINPERSID       = 'Q';     //  "       "         "  ;  "  "   "     "  stack
46    const OP_REDUCE          = 'R';     // apply callable to argtuple, both on stack
47    const OP_STRING          = 'S';     // push string; NL-terminated string argument
48    const OP_BINSTRING       = 'T';     // push string; counted binary string argument
49    const OP_SHORT_BINSTRING = 'U';     //  "     "   ;    "      "       "      " < 256 bytes
50    const OP_UNICODE         = 'V';     // push Unicode string; raw-unicode-escaped'd argument
51    const OP_BINUNICODE      = 'X';     //   "     "       "  ; counted UTF-8 string argument
52    const OP_APPEND          = 'a';     // append stack top to list below it
53    const OP_BUILD           = 'b';     // call __setstate__ or __dict__.update()
54    const OP_GLOBAL          = 'c';     // push self.find_class(modname, name); 2 string args
55    const OP_DICT            = 'd';     // build a dict from stack items
56    const OP_EMPTY_DICT      = '}';     // push empty dict
57    const OP_APPENDS         = 'e';     // extend list on stack by topmost stack slice
58    const OP_GET             = 'g';     // push item from memo on stack; index is string arg
59    const OP_BINGET          = 'h';     //   "    "    "    "   "   "  ;   "    " 1-byte arg
60    const OP_INST            = 'i';     // build & push class instance
61    const OP_LONG_BINGET     = 'j';     // push item from memo on stack; index is 4-byte arg
62    const OP_LIST            = 'l';     // build list from topmost stack items
63    const OP_EMPTY_LIST      = ']';     // push empty list
64    const OP_OBJ             = 'o';     // build & push class instance
65    const OP_PUT             = 'p';     // store stack top in memo; index is string arg
66    const OP_BINPUT          = 'q';     //   "     "    "   "   " ;   "    " 1-byte arg
67    const OP_LONG_BINPUT     = 'r';     //   "     "    "   "   " ;   "    " 4-byte arg
68    const OP_SETITEM         = 's';     // add key+value pair to dict
69    const OP_TUPLE           = 't';     // build tuple from topmost stack items
70    const OP_EMPTY_TUPLE     = ')';     // push empty tuple
71    const OP_SETITEMS        = 'u';     // modify dict by adding topmost key+value pairs
72    const OP_BINFLOAT        = 'G';     // push float; arg is 8-byte float encoding
73
74    /* Protocol 2 */
75    const OP_PROTO           = "\x80";  // identify pickle protocol
76    const OP_NEWOBJ          = "\x81";  // build object by applying cls.__new__ to argtuple
77    const OP_EXT1            = "\x82";  // push object from extension registry; 1-byte index
78    const OP_EXT2            = "\x83";  // ditto, but 2-byte index
79    const OP_EXT4            = "\x84";  // ditto, but 4-byte index
80    const OP_TUPLE1          = "\x85";  // build 1-tuple from stack top
81    const OP_TUPLE2          = "\x86";  // build 2-tuple from two topmost stack items
82    const OP_TUPLE3          = "\x87";  // build 3-tuple from three topmost stack items
83    const OP_NEWTRUE         = "\x88";  // push True
84    const OP_NEWFALSE        = "\x89";  // push False
85    const OP_LONG1           = "\x8a";  // push long from < 256 bytes
86    const OP_LONG4           = "\x8b";  // push really big long
87
88    /* Protocol 3 (Python 3.x) */
89    const OP_BINBYTES        = 'B';     // push bytes; counted binary string argument
90    const OP_SHORT_BINBYTES  = 'C';     //  "     "   ;    "      "       "      " < 256 bytes
91
92    /**
93     * Whether or not the system is little-endian
94     *
95     * @var bool
96     */
97    protected static $isLittleEndian = null;
98
99    /**
100     * @var array Strings representing quotes
101     */
102    protected static $quoteString = array(
103        '\\' => '\\\\',
104        "\x00" => '\\x00', "\x01" => '\\x01', "\x02" => '\\x02', "\x03" => '\\x03',
105        "\x04" => '\\x04', "\x05" => '\\x05', "\x06" => '\\x06', "\x07" => '\\x07',
106        "\x08" => '\\x08', "\x09" => '\\t',   "\x0a" => '\\n',   "\x0b" => '\\x0b',
107        "\x0c" => '\\x0c', "\x0d" => '\\r',   "\x0e" => '\\x0e', "\x0f" => '\\x0f',
108        "\x10" => '\\x10', "\x11" => '\\x11', "\x12" => '\\x12', "\x13" => '\\x13',
109        "\x14" => '\\x14', "\x15" => '\\x15', "\x16" => '\\x16', "\x17" => '\\x17',
110        "\x18" => '\\x18', "\x19" => '\\x19', "\x1a" => '\\x1a', "\x1b" => '\\x1b',
111        "\x1c" => '\\x1c', "\x1d" => '\\x1d', "\x1e" => '\\x1e', "\x1f" => '\\x1f',
112        "\xff" => '\\xff'
113    );
114
115    // process vars
116    protected $protocol  = null;
117    protected $memo      = array();
118    protected $pickle    = '';
119    protected $pickleLen = 0;
120    protected $pos       = 0;
121    protected $stack     = array();
122    protected $marker    = null;
123
124    /**
125     * @var BigInteger\Adapter\AdapterInterface
126     */
127    protected $bigIntegerAdapter = null;
128
129    /**
130     * @var PythonPickleOptions
131     */
132    protected $options = null;
133
134    /**
135     * Constructor.
136     *
137     * @param  array|Traversable|PythonPickleOptions $options Optional
138     */
139    public function __construct($options = null)
140    {
141        // init
142        if (static::$isLittleEndian === null) {
143            static::$isLittleEndian = (pack('l', 1) === "\x01\x00\x00\x00");
144        }
145
146        $this->marker = new stdClass();
147
148        parent::__construct($options);
149    }
150
151    /**
152     * Set options
153     *
154     * @param  array|Traversable|PythonPickleOptions $options
155     * @return PythonPickle
156     */
157    public function setOptions($options)
158    {
159        if (!$options instanceof PythonPickleOptions) {
160            $options = new PythonPickleOptions($options);
161        }
162
163        $this->options = $options;
164        return $this;
165    }
166
167    /**
168     * Get options
169     *
170     * @return PythonPickleOptions
171     */
172    public function getOptions()
173    {
174        if ($this->options === null) {
175            $this->options = new PythonPickleOptions();
176        }
177        return $this->options;
178    }
179
180    /* serialize */
181
182    /**
183     * Serialize PHP to PythonPickle format
184     *
185     * @param  mixed $value
186     * @return string
187     */
188    public function serialize($value)
189    {
190        $this->clearProcessVars();
191        $this->protocol = $this->getOptions()->getProtocol();
192
193        // write
194        if ($this->protocol >= 2) {
195            $this->writeProto($this->protocol);
196        }
197        $this->write($value);
198        $this->writeStop();
199
200        $pickle = $this->pickle;
201        $this->clearProcessVars();
202
203        return $pickle;
204    }
205
206    /**
207     * Write a value
208     *
209     * @param  mixed $value
210     * @throws Exception\RuntimeException on invalid or unrecognized value type
211     */
212    protected function write($value)
213    {
214        if ($value === null) {
215            $this->writeNull();
216        } elseif (is_bool($value)) {
217            $this->writeBool($value);
218        } elseif (is_int($value)) {
219            $this->writeInt($value);
220        } elseif (is_float($value)) {
221            $this->writeFloat($value);
222        } elseif (is_string($value)) {
223            // TODO: write unicode / binary
224            $this->writeString($value);
225        } elseif (is_array($value)) {
226            if (ArrayUtils::isList($value)) {
227                $this->writeArrayList($value);
228            } else {
229                $this->writeArrayDict($value);
230            }
231        } elseif (is_object($value)) {
232            $this->writeObject($value);
233        } else {
234            throw new Exception\RuntimeException(sprintf(
235                'PHP-Type "%s" can not be serialized by %s',
236                gettype($value),
237                get_class($this)
238            ));
239        }
240    }
241
242    /**
243     * Write pickle protocol
244     *
245     * @param int $protocol
246     */
247    protected function writeProto($protocol)
248    {
249        $this->pickle .= self::OP_PROTO . $protocol;
250    }
251
252    /**
253     * Write a get
254     *
255     * @param  int $id Id of memo
256     */
257    protected function writeGet($id)
258    {
259        if ($this->protocol == 0) {
260            $this->pickle .= self::OP_GET . $id . "\r\n";
261        } elseif ($id <= 0xFF) {
262            // BINGET + chr(i)
263            $this->pickle .= self::OP_BINGET . chr($id);
264        } else {
265            // LONG_BINGET + pack("<i", i)
266            $bin = pack('l', $id);
267            if (static::$isLittleEndian === false) {
268                $bin = strrev($bin);
269            }
270            $this->pickle .= self::OP_LONG_BINGET . $bin;
271        }
272    }
273
274    /**
275     * Write a put
276     *
277     * @param  int $id Id of memo
278     */
279    protected function writePut($id)
280    {
281        if ($this->protocol == 0) {
282            $this->pickle .= self::OP_PUT . $id . "\r\n";
283        } elseif ($id <= 0xff) {
284            // BINPUT + chr(i)
285            $this->pickle .= self::OP_BINPUT . chr($id);
286        } else {
287            // LONG_BINPUT + pack("<i", i)
288            $bin = pack('l', $id);
289            if (static::$isLittleEndian === false) {
290                $bin = strrev($bin);
291            }
292            $this->pickle .= self::OP_LONG_BINPUT . $bin;
293        }
294    }
295
296    /**
297     * Write a null as None
298     *
299     */
300    protected function writeNull()
301    {
302        $this->pickle .= self::OP_NONE;
303    }
304
305    /**
306     * Write boolean value
307     *
308     * @param bool $value
309     */
310    protected function writeBool($value)
311    {
312        if ($this->protocol >= 2) {
313            $this->pickle .= ($value === true) ? self::OP_NEWTRUE : self::OP_NEWFALSE;
314        } else {
315            $this->pickle .= self::OP_INT . (($value === true) ? '01' : '00') . "\r\n";
316        }
317    }
318
319    /**
320     * Write an integer value
321     *
322     * @param  int $value
323     */
324    protected function writeInt($value)
325    {
326        if ($this->protocol == 0) {
327            $this->pickle .= self::OP_INT . $value . "\r\n";
328            return;
329        }
330
331        if ($value >= 0) {
332            if ($value <= 0xFF) {
333                // self.write(BININT1 + chr(obj))
334                $this->pickle .= self::OP_BININT1 . chr($value);
335            } elseif ($value <= 0xFFFF) {
336                // self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
337                $this->pickle .= self::OP_BININT2 . pack('v', $value);
338            }
339            return;
340        }
341
342        // Next check for 4-byte signed ints:
343        $highBits = $value >> 31;  // note that Python shift sign-extends
344        if ($highBits == 0 || $highBits == -1) {
345            // All high bits are copies of bit 2**31, so the value
346            // fits in a 4-byte signed int.
347            // self.write(BININT + pack("<i", obj))
348            $bin = pack('l', $value);
349            if (static::$isLittleEndian === false) {
350                $bin = strrev($bin);
351            }
352            $this->pickle .= self::OP_BININT . $bin;
353            return;
354        }
355    }
356
357    /**
358     * Write a float value
359     *
360     * @param  float $value
361     */
362    protected function writeFloat($value)
363    {
364        if ($this->protocol == 0) {
365            $this->pickle .= self::OP_FLOAT . $value . "\r\n";
366        } else {
367            // self.write(BINFLOAT + pack('>d', obj))
368            $bin = pack('d', $value);
369            if (static::$isLittleEndian === true) {
370                $bin = strrev($bin);
371            }
372            $this->pickle .= self::OP_BINFLOAT . $bin;
373        }
374    }
375
376    /**
377     * Write a string value
378     *
379     * @param  string $value
380     */
381    protected function writeString($value)
382    {
383        if (($id = $this->searchMemo($value)) !== false) {
384            $this->writeGet($id);
385            return;
386        }
387
388        if ($this->protocol == 0) {
389            $this->pickle .= self::OP_STRING . $this->quoteString($value) . "\r\n";
390        } else {
391            $n = strlen($value);
392            if ($n <= 0xFF) {
393                // self.write(SHORT_BINSTRING + chr(n) + obj)
394                $this->pickle .= self::OP_SHORT_BINSTRING . chr($n) . $value;
395            } else {
396                // self.write(BINSTRING + pack("<i", n) + obj)
397                $binLen = pack('l', $n);
398                if (static::$isLittleEndian === false) {
399                    $binLen = strrev($binLen);
400                }
401                $this->pickle .= self::OP_BINSTRING . $binLen . $value;
402            }
403        }
404
405        $this->memorize($value);
406    }
407
408    /**
409     * Write an associative array value as dictionary
410     *
411     * @param  array|Traversable $value
412     */
413    protected function writeArrayDict($value)
414    {
415        if (($id = $this->searchMemo($value)) !== false) {
416            $this->writeGet($id);
417            return;
418        }
419
420        $this->pickle .= self::OP_MARK . self::OP_DICT;
421        $this->memorize($value);
422
423        foreach ($value as $k => $v) {
424            $this->write($k);
425            $this->write($v);
426            $this->pickle .= self::OP_SETITEM;
427        }
428    }
429
430    /**
431     * Write a simple array value as list
432     *
433     * @param  array $value
434     */
435    protected function writeArrayList(array $value)
436    {
437        if (($id = $this->searchMemo($value)) !== false) {
438            $this->writeGet($id);
439            return;
440        }
441
442        $this->pickle .= self::OP_MARK . self::OP_LIST;
443        $this->memorize($value);
444
445        foreach ($value as $v) {
446            $this->write($v);
447            $this->pickle .= self::OP_APPEND;
448        }
449    }
450
451    /**
452     * Write an object as a dictionary
453     *
454     * @param  object $value
455     */
456    protected function writeObject($value)
457    {
458        // The main differences between a SplFixedArray and a normal PHP array is
459        // that the SplFixedArray is of fixed length and allows only integers
460        // within the range as indexes.
461        if ($value instanceof \SplFixedArray) {
462            $this->writeArrayList($value->toArray());
463
464        // Use the object method toArray if available
465        } elseif (method_exists($value, 'toArray')) {
466            $this->writeArrayDict($value->toArray());
467
468        // If the object is an iterator simply iterate it
469        // and convert it to a dictionary
470        } elseif ($value instanceof Traversable) {
471            $this->writeArrayDict($value);
472
473        // other objects are simply converted by using its properties
474        } else {
475            $this->writeArrayDict(get_object_vars($value));
476        }
477    }
478
479    /**
480     * Write stop
481     */
482    protected function writeStop()
483    {
484        $this->pickle .= self::OP_STOP;
485    }
486
487    /* serialize helper */
488
489    /**
490     * Add a value to the memo and write the id
491     *
492     * @param mixed $value
493     */
494    protected function memorize($value)
495    {
496        $id = count($this->memo);
497        $this->memo[$id] = $value;
498        $this->writePut($id);
499    }
500
501    /**
502     * Search a value in the memo and return  the id
503     *
504     * @param  mixed $value
505     * @return int|bool The id or false
506     */
507    protected function searchMemo($value)
508    {
509        return array_search($value, $this->memo, true);
510    }
511
512    /**
513     * Quote/Escape a string
514     *
515     * @param  string $str
516     * @return string quoted string
517     */
518    protected function quoteString($str)
519    {
520        $quoteArr = static::$quoteString;
521
522        if (($cntSingleQuote = substr_count($str, "'"))
523            && ($cntDoubleQuote = substr_count($str, '"'))
524            && ($cntSingleQuote < $cntDoubleQuote)
525        ) {
526            $quoteArr['"'] = '\\"';
527            $enclosure     = '"';
528        } else {
529            $quoteArr["'"] = "\\'";
530            $enclosure     = "'";
531        }
532
533        return $enclosure . strtr($str, $quoteArr) . $enclosure;
534    }
535
536    /* unserialize */
537
538    /**
539     * Unserialize from Python Pickle format to PHP
540     *
541     * @param  string $pickle
542     * @return mixed
543     * @throws Exception\RuntimeException on invalid Pickle string
544     */
545    public function unserialize($pickle)
546    {
547        // init process vars
548        $this->clearProcessVars();
549        $this->pickle    = $pickle;
550        $this->pickleLen = strlen($this->pickle);
551
552        // read pickle string
553        while (($op = $this->read(1)) !== self::OP_STOP) {
554            $this->load($op);
555        }
556
557        if (!count($this->stack)) {
558            throw new Exception\RuntimeException('No data found');
559        }
560
561        $ret = array_pop($this->stack);
562
563        // clear process vars
564        $this->clearProcessVars();
565
566        return $ret;
567    }
568
569    /**
570     * Clear temp variables needed for processing
571     */
572    protected function clearProcessVars()
573    {
574        $this->pos       = 0;
575        $this->pickle    = '';
576        $this->pickleLen = 0;
577        $this->memo      = array();
578        $this->stack     = array();
579    }
580
581    /**
582     * Load a pickle opcode
583     *
584     * @param  string $op
585     * @throws Exception\RuntimeException on invalid opcode
586     */
587    protected function load($op)
588    {
589        switch ($op) {
590            case self::OP_PUT:
591                $this->loadPut();
592                break;
593            case self::OP_BINPUT:
594                $this->loadBinPut();
595                break;
596            case self::OP_LONG_BINPUT:
597                $this->loadLongBinPut();
598                break;
599            case self::OP_GET:
600                $this->loadGet();
601                break;
602            case self::OP_BINGET:
603                $this->loadBinGet();
604                break;
605            case self::OP_LONG_BINGET:
606                $this->loadLongBinGet();
607                break;
608            case self::OP_NONE:
609                $this->loadNone();
610                break;
611            case self::OP_NEWTRUE:
612                $this->loadNewTrue();
613                break;
614            case self::OP_NEWFALSE:
615                $this->loadNewFalse();
616                break;
617            case self::OP_INT:
618                $this->loadInt();
619                break;
620            case self::OP_BININT:
621                $this->loadBinInt();
622                break;
623            case self::OP_BININT1:
624                $this->loadBinInt1();
625                break;
626            case self::OP_BININT2:
627                $this->loadBinInt2();
628                break;
629            case self::OP_LONG:
630                $this->loadLong();
631                break;
632            case self::OP_LONG1:
633                $this->loadLong1();
634                break;
635            case self::OP_LONG4:
636                $this->loadLong4();
637                break;
638            case self::OP_FLOAT:
639                $this->loadFloat();
640                break;
641            case self::OP_BINFLOAT:
642                $this->loadBinFloat();
643                break;
644            case self::OP_STRING:
645                $this->loadString();
646                break;
647            case self::OP_BINSTRING:
648                $this->loadBinString();
649                break;
650            case self::OP_SHORT_BINSTRING:
651                $this->loadShortBinString();
652                break;
653            case self::OP_BINBYTES:
654                $this->loadBinBytes();
655                break;
656            case self::OP_SHORT_BINBYTES:
657                $this->loadShortBinBytes();
658                break;
659            case self::OP_UNICODE:
660                $this->loadUnicode();
661                break;
662            case self::OP_BINUNICODE:
663                $this->loadBinUnicode();
664                break;
665            case self::OP_MARK:
666                $this->loadMark();
667                break;
668            case self::OP_LIST:
669                $this->loadList();
670                break;
671            case self::OP_EMPTY_LIST:
672                $this->loadEmptyList();
673                break;
674            case self::OP_APPEND:
675                $this->loadAppend();
676                break;
677            case self::OP_APPENDS:
678                $this->loadAppends();
679                break;
680            case self::OP_DICT:
681                $this->loadDict();
682                break;
683            case self::OP_EMPTY_DICT:
684                $this->_loadEmptyDict();
685                break;
686            case self::OP_SETITEM:
687                $this->loadSetItem();
688                break;
689            case self::OP_SETITEMS:
690                $this->loadSetItems();
691                break;
692            case self::OP_TUPLE:
693                $this->loadTuple();
694                break;
695            case self::OP_TUPLE1:
696                $this->loadTuple1();
697                break;
698            case self::OP_TUPLE2:
699                $this->loadTuple2();
700                break;
701            case self::OP_TUPLE3:
702                $this->loadTuple3();
703                break;
704            case self::OP_PROTO:
705                $this->loadProto();
706                break;
707            default:
708                throw new Exception\RuntimeException("Invalid or unknown opcode '{$op}'");
709        }
710    }
711
712    /**
713     * Load a PUT opcode
714     *
715     * @throws Exception\RuntimeException on missing stack
716     */
717    protected function loadPut()
718    {
719        $id = (int) $this->readline();
720
721        $lastStack = count($this->stack) - 1;
722        if (!isset($this->stack[$lastStack])) {
723            throw new Exception\RuntimeException('No stack exist');
724        }
725        $this->memo[$id] =& $this->stack[$lastStack];
726    }
727
728    /**
729     * Load a binary PUT
730     *
731     * @throws Exception\RuntimeException on missing stack
732     */
733    protected function loadBinPut()
734    {
735        $id = ord($this->read(1));
736
737        $lastStack = count($this->stack)-1;
738        if (!isset($this->stack[$lastStack])) {
739            throw new Exception\RuntimeException('No stack exist');
740        }
741        $this->memo[$id] =& $this->stack[$lastStack];
742    }
743
744    /**
745     * Load a long binary PUT
746     *
747     * @throws Exception\RuntimeException on missing stack
748     */
749    protected function loadLongBinPut()
750    {
751        $bin = $this->read(4);
752        if (static::$isLittleEndian === false) {
753            $bin = strrev($bin);
754        }
755        list(, $id) = unpack('l', $bin);
756
757        $lastStack = count($this->stack)-1;
758        if (!isset($this->stack[$lastStack])) {
759            throw new Exception\RuntimeException('No stack exist');
760        }
761        $this->memo[$id] =& $this->stack[$lastStack];
762    }
763
764    /**
765     * Load a GET operation
766     *
767     * @throws Exception\RuntimeException on missing GET identifier
768     */
769    protected function loadGet()
770    {
771        $id = (int) $this->readline();
772
773        if (!array_key_exists($id, $this->memo)) {
774            throw new Exception\RuntimeException('Get id "' . $id . '" not found in memo');
775        }
776        $this->stack[] =& $this->memo[$id];
777    }
778
779    /**
780     * Load a binary GET operation
781     *
782     * @throws Exception\RuntimeException on missing GET identifier
783     */
784    protected function loadBinGet()
785    {
786        $id = ord($this->read(1));
787
788        if (!array_key_exists($id, $this->memo)) {
789            throw new Exception\RuntimeException('Get id "' . $id . '" not found in memo');
790        }
791        $this->stack[] =& $this->memo[$id];
792    }
793
794    /**
795     * Load a long binary GET operation
796     *
797     * @throws Exception\RuntimeException on missing GET identifier
798     */
799    protected function loadLongBinGet()
800    {
801        $bin = $this->read(4);
802        if (static::$isLittleEndian === false) {
803            $bin = strrev($bin);
804        }
805        list(, $id) = unpack('l', $bin);
806
807        if (!array_key_exists($id, $this->memo)) {
808            throw new Exception\RuntimeException('Get id "' . $id . '" not found in memo');
809        }
810        $this->stack[] =& $this->memo[$id];
811    }
812
813    /**
814     * Load a NONE operator
815     */
816    protected function loadNone()
817    {
818        $this->stack[] = null;
819    }
820
821    /**
822     * Load a boolean TRUE operator
823     */
824    protected function loadNewTrue()
825    {
826        $this->stack[] = true;
827    }
828
829    /**
830     * Load a boolean FALSE operator
831     */
832    protected function loadNewFalse()
833    {
834        $this->stack[] = false;
835    }
836
837    /**
838     * Load an integer operator
839     */
840    protected function loadInt()
841    {
842        $line = $this->readline();
843        if ($line === '01') {
844            $this->stack[] = true;
845        } elseif ($line === '00') {
846            $this->stack[] = false;
847        } else {
848            $this->stack[] = (int) $line;
849        }
850    }
851
852    /**
853     * Load a binary integer operator
854     */
855    protected function loadBinInt()
856    {
857        $bin = $this->read(4);
858        if (static::$isLittleEndian === false) {
859            $bin = strrev($bin);
860        }
861        list(, $int)   = unpack('l', $bin);
862        $this->stack[] = $int;
863    }
864
865    /**
866     * Load the first byte of a binary integer
867     */
868    protected function loadBinInt1()
869    {
870        $this->stack[] = ord($this->read(1));
871    }
872
873    /**
874     * Load the second byte of a binary integer
875     */
876    protected function loadBinInt2()
877    {
878        $bin = $this->read(2);
879        list(, $int)   = unpack('v', $bin);
880        $this->stack[] = $int;
881    }
882
883    /**
884     * Load a long (float) operator
885     */
886    protected function loadLong()
887    {
888        $data = rtrim($this->readline(), 'L');
889        if ($data === '') {
890            $this->stack[] = 0;
891        } else {
892            $this->stack[] = $data;
893        }
894    }
895
896    /**
897     * Load a one byte long integer
898     */
899    protected function loadLong1()
900    {
901        $n    = ord($this->read(1));
902        $data = $this->read($n);
903        $this->stack[] = $this->decodeBinLong($data);
904    }
905
906    /**
907     * Load a 4 byte long integer
908     *
909     */
910    protected function loadLong4()
911    {
912        $nBin = $this->read(4);
913        if (static::$isLittleEndian === false) {
914            $nBin = strrev($$nBin);
915        }
916        list(, $n) = unpack('l', $nBin);
917        $data = $this->read($n);
918
919        $this->stack[] = $this->decodeBinLong($data);
920    }
921
922    /**
923     * Load a float value
924     *
925     */
926    protected function loadFloat()
927    {
928        $float = (float) $this->readline();
929        $this->stack[] = $float;
930    }
931
932    /**
933     * Load a binary float value
934     *
935     */
936    protected function loadBinFloat()
937    {
938        $bin = $this->read(8);
939        if (static::$isLittleEndian === true) {
940            $bin = strrev($bin);
941        }
942        list(, $float) = unpack('d', $bin);
943        $this->stack[] = $float;
944    }
945
946    /**
947     * Load a string
948     *
949     */
950    protected function loadString()
951    {
952        $this->stack[] = $this->unquoteString((string) $this->readline());
953    }
954
955    /**
956     * Load a binary string
957     *
958     */
959    protected function loadBinString()
960    {
961        $bin = $this->read(4);
962        if (!static::$isLittleEndian) {
963            $bin = strrev($bin);
964        }
965        list(, $len)   = unpack('l', $bin);
966        $this->stack[] = (string) $this->read($len);
967    }
968
969    /**
970     * Load a short binary string
971     *
972     */
973    protected function loadShortBinString()
974    {
975        $len           = ord($this->read(1));
976        $this->stack[] = (string) $this->read($len);
977    }
978
979    /**
980     * Load arbitrary binary bytes
981     */
982    protected function loadBinBytes()
983    {
984        // read byte length
985        $nBin = $this->read(4);
986        if (static::$isLittleEndian === false) {
987            $nBin = strrev($$nBin);
988        }
989        list(, $n)     = unpack('l', $nBin);
990        $this->stack[] = $this->read($n);
991    }
992
993    /**
994     * Load a single binary byte
995     */
996    protected function loadShortBinBytes()
997    {
998        $n             = ord($this->read(1));
999        $this->stack[] = $this->read($n);
1000    }
1001
1002    /**
1003     * Load a unicode string
1004     */
1005    protected function loadUnicode()
1006    {
1007        $data    = $this->readline();
1008        $pattern = '/\\\\u([a-fA-F0-9]{4})/u'; // \uXXXX
1009        $data    = preg_replace_callback($pattern, array($this, '_convertMatchingUnicodeSequence2Utf8'), $data);
1010
1011        $this->stack[] = $data;
1012    }
1013
1014    /**
1015     * Convert a unicode sequence to UTF-8
1016     *
1017     * @param  array $match
1018     * @return string
1019     */
1020    protected function _convertMatchingUnicodeSequence2Utf8(array $match)
1021    {
1022        return $this->hex2Utf8($match[1]);
1023    }
1024
1025    /**
1026     * Convert a hex string to a UTF-8 string
1027     *
1028     * @param  string $hex
1029     * @return string
1030     * @throws Exception\RuntimeException on unmatched unicode sequence
1031     */
1032    protected function hex2Utf8($hex)
1033    {
1034        $uniCode = hexdec($hex);
1035
1036        if ($uniCode < 0x80) { // 1Byte
1037            $utf8Char = chr($uniCode);
1038        } elseif ($uniCode < 0x800) { // 2Byte
1039            $utf8Char = chr(0xC0 | $uniCode >> 6)
1040                      . chr(0x80 | $uniCode & 0x3F);
1041        } elseif ($uniCode < 0x10000) { // 3Byte
1042            $utf8Char = chr(0xE0 | $uniCode >> 12)
1043                      . chr(0x80 | $uniCode >> 6 & 0x3F)
1044                      . chr(0x80 | $uniCode & 0x3F);
1045        } elseif ($uniCode < 0x110000) { // 4Byte
1046            $utf8Char  = chr(0xF0 | $uniCode >> 18)
1047                       . chr(0x80 | $uniCode >> 12 & 0x3F)
1048                       . chr(0x80 | $uniCode >> 6 & 0x3F)
1049                       . chr(0x80 | $uniCode & 0x3F);
1050        } else {
1051            throw new Exception\RuntimeException(
1052                sprintf('Unsupported unicode character found "%s"', dechex($uniCode))
1053            );
1054        }
1055
1056        return $utf8Char;
1057    }
1058
1059    /**
1060     * Load binary unicode sequence
1061     */
1062    protected function loadBinUnicode()
1063    {
1064        // read byte length
1065        $n = $this->read(4);
1066        if (static::$isLittleEndian === false) {
1067            $n = strrev($n);
1068        }
1069        list(, $n) = unpack('l', $n);
1070        $data      = $this->read($n);
1071
1072        $this->stack[] = $data;
1073    }
1074
1075    /**
1076     * Load a marker sequence
1077     */
1078    protected function loadMark()
1079    {
1080        $this->stack[] = $this->marker;
1081    }
1082
1083    /**
1084     * Load an array (list)
1085     */
1086    protected function loadList()
1087    {
1088        $k = $this->lastMarker();
1089        $this->stack[$k] = array();
1090
1091        // remove all elements after marker
1092        for ($i = $k + 1, $max = count($this->stack); $i < $max; $i++) {
1093            unset($this->stack[$i]);
1094        }
1095    }
1096
1097    /**
1098     * Load an append (to list) sequence
1099     */
1100    protected function loadAppend()
1101    {
1102        $value  =  array_pop($this->stack);
1103        $list   =& $this->stack[count($this->stack) - 1];
1104        $list[] =  $value;
1105    }
1106
1107    /**
1108     * Load an empty list sequence
1109     */
1110    protected function loadEmptyList()
1111    {
1112        $this->stack[] = array();
1113    }
1114
1115    /**
1116     * Load multiple append (to list) sequences at once
1117     */
1118    protected function loadAppends()
1119    {
1120        $k    =  $this->lastMarker();
1121        $list =& $this->stack[$k - 1];
1122        $max  =  count($this->stack);
1123        for ($i = $k + 1; $i < $max; $i++) {
1124            $list[] = $this->stack[$i];
1125            unset($this->stack[$i]);
1126        }
1127        unset($this->stack[$k]);
1128    }
1129
1130    /**
1131     * Load an associative array (Python dictionary)
1132     */
1133    protected function loadDict()
1134    {
1135        $k = $this->lastMarker();
1136        $this->stack[$k] = array();
1137
1138        // remove all elements after marker
1139        $max = count($this->stack);
1140        for ($i = $k + 1; $i < $max; $i++) {
1141            unset($this->stack[$i]);
1142        }
1143    }
1144
1145    /**
1146     * Load an item from a set
1147     */
1148    protected function loadSetItem()
1149    {
1150        $value =  array_pop($this->stack);
1151        $key   =  array_pop($this->stack);
1152        $dict  =& $this->stack[count($this->stack) - 1];
1153        $dict[$key] = $value;
1154    }
1155
1156    /**
1157     * Load an empty dictionary
1158     */
1159    protected function _loadEmptyDict()
1160    {
1161        $this->stack[] = array();
1162    }
1163
1164    /**
1165     * Load set items
1166     */
1167    protected function loadSetItems()
1168    {
1169        $k    =  $this->lastMarker();
1170        $dict =& $this->stack[$k - 1];
1171        $max  =  count($this->stack);
1172        for ($i = $k + 1; $i < $max; $i += 2) {
1173            $key        = $this->stack[$i];
1174            $value      = $this->stack[$i + 1];
1175            $dict[$key] = $value;
1176            unset($this->stack[$i], $this->stack[$i+1]);
1177        }
1178        unset($this->stack[$k]);
1179    }
1180
1181    /**
1182     * Load a tuple
1183     */
1184    protected function loadTuple()
1185    {
1186        $k                =  $this->lastMarker();
1187        $this->stack[$k]  =  array();
1188        $tuple            =& $this->stack[$k];
1189        $max              =  count($this->stack);
1190        for ($i = $k + 1; $i < $max; $i++) {
1191            $tuple[] = $this->stack[$i];
1192            unset($this->stack[$i]);
1193        }
1194    }
1195
1196    /**
1197     * Load single item tuple
1198     */
1199    protected function loadTuple1()
1200    {
1201        $value1        = array_pop($this->stack);
1202        $this->stack[] = array($value1);
1203    }
1204
1205    /**
1206     * Load two item tuple
1207     *
1208     */
1209    protected function loadTuple2()
1210    {
1211        $value2 = array_pop($this->stack);
1212        $value1 = array_pop($this->stack);
1213        $this->stack[] = array($value1, $value2);
1214    }
1215
1216    /**
1217     * Load three item tuple
1218     *
1219     */
1220    protected function loadTuple3()
1221    {
1222        $value3 = array_pop($this->stack);
1223        $value2 = array_pop($this->stack);
1224        $value1 = array_pop($this->stack);
1225        $this->stack[] = array($value1, $value2, $value3);
1226    }
1227
1228    /**
1229     * Load a proto value
1230     *
1231     * @throws Exception\RuntimeException if Pickle version does not support this feature
1232     */
1233    protected function loadProto()
1234    {
1235        $proto = ord($this->read(1));
1236        if ($proto < 2 || $proto > 3) {
1237            throw new Exception\RuntimeException(
1238                "Invalid or unknown protocol version '{$proto}' detected"
1239            );
1240        }
1241        $this->protocol = $proto;
1242    }
1243
1244    /* unserialize helper */
1245
1246    /**
1247     * Read a segment of the pickle
1248     *
1249     * @param  mixed $len
1250     * @return string
1251     * @throws Exception\RuntimeException if position matches end of data
1252     */
1253    protected function read($len)
1254    {
1255        if (($this->pos + $len) > $this->pickleLen) {
1256            throw new Exception\RuntimeException('End of data');
1257        }
1258
1259        $this->pos += $len;
1260        return substr($this->pickle, ($this->pos - $len), $len);
1261    }
1262
1263    /**
1264     * Read a line of the pickle at once
1265     *
1266     * @return string
1267     * @throws Exception\RuntimeException if no EOL character found
1268     */
1269    protected function readline()
1270    {
1271        $eolLen = 2;
1272        $eolPos = strpos($this->pickle, "\r\n", $this->pos);
1273        if ($eolPos === false) {
1274            $eolPos = strpos($this->pickle, "\n", $this->pos);
1275            $eolLen = 1;
1276        }
1277
1278        if ($eolPos === false) {
1279            throw new Exception\RuntimeException('No new line found');
1280        }
1281        $ret       = substr($this->pickle, $this->pos, $eolPos-$this->pos);
1282        $this->pos = $eolPos + $eolLen;
1283
1284        return $ret;
1285    }
1286
1287    /**
1288     * Unquote/Unescape a quoted string
1289     *
1290     * @param  string $str quoted string
1291     * @return string unquoted string
1292     */
1293    protected function unquoteString($str)
1294    {
1295        $quoteArr = array_flip(static::$quoteString);
1296
1297        if ($str[0] == '"') {
1298            $quoteArr['\\"'] = '"';
1299        } else {
1300            $quoteArr["\\'"] = "'";
1301        }
1302
1303        return strtr(substr(trim($str), 1, -1), $quoteArr);
1304    }
1305
1306    /**
1307     * Return last marker position in stack
1308     *
1309     * @return int
1310     */
1311    protected function lastMarker()
1312    {
1313        for ($k = count($this->stack)-1; $k >= 0; $k -= 1) {
1314            if ($this->stack[$k] === $this->marker) {
1315                break;
1316            }
1317        }
1318        return $k;
1319    }
1320
1321    /**
1322     * Decode a binary long sequence
1323     *
1324     * @param  string $data
1325     * @return int|float|string
1326     */
1327    protected function decodeBinLong($data)
1328    {
1329        $nbytes = strlen($data);
1330
1331        if ($nbytes == 0) {
1332            return 0;
1333        }
1334
1335        $long = 0;
1336        if ($nbytes > 7) {
1337            if ($this->bigIntegerAdapter === null) {
1338                $this->bigIntegerAdapter = BigInteger\BigInteger::getDefaultAdapter();
1339            }
1340            if (static::$isLittleEndian === true) {
1341                $data = strrev($data);
1342            }
1343            $long = $this->bigIntegerAdapter->binToInt($data, true);
1344        } else {
1345            for ($i = 0; $i < $nbytes; $i++) {
1346                $long += ord($data[$i]) * pow(256, $i);
1347            }
1348            if (0x80 <= ord($data[$nbytes - 1])) {
1349                $long -= pow(2, $nbytes * 8);
1350                // $long-= 1 << ($nbytes * 8);
1351            }
1352        }
1353
1354        return $long;
1355    }
1356}
1357