1"""Create portable serialized representations of Python objects.
2
3See module copyreg for a mechanism for registering custom picklers.
4See module pickletools source for extensive comments.
5
6Classes:
7
8    Pickler
9    Unpickler
10
11Functions:
12
13    dump(object, file)
14    dumps(object) -> string
15    load(file) -> object
16    loads(bytes) -> object
17
18Misc variables:
19
20    __version__
21    format_version
22    compatible_formats
23
24"""
25
26from types import FunctionType
27from copyreg import dispatch_table
28from copyreg import _extension_registry, _inverted_registry, _extension_cache
29from itertools import islice
30from functools import partial
31import sys
32from sys import maxsize
33from struct import pack, unpack
34import re
35import io
36import codecs
37import _compat_pickle
38
39__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
40           "Unpickler", "dump", "dumps", "load", "loads"]
41
42try:
43    from _pickle import PickleBuffer
44    __all__.append("PickleBuffer")
45    _HAVE_PICKLE_BUFFER = True
46except ImportError:
47    _HAVE_PICKLE_BUFFER = False
48
49
50# Shortcut for use in isinstance testing
51bytes_types = (bytes, bytearray)
52
53# These are purely informational; no code uses these.
54format_version = "4.0"                  # File format version we write
55compatible_formats = ["1.0",            # Original protocol 0
56                      "1.1",            # Protocol 0 with INST added
57                      "1.2",            # Original protocol 1
58                      "1.3",            # Protocol 1 with BINFLOAT added
59                      "2.0",            # Protocol 2
60                      "3.0",            # Protocol 3
61                      "4.0",            # Protocol 4
62                      "5.0",            # Protocol 5
63                      ]                 # Old format versions we can read
64
65# This is the highest protocol number we know how to read.
66HIGHEST_PROTOCOL = 5
67
68# The protocol we write by default.  May be less than HIGHEST_PROTOCOL.
69# Only bump this if the oldest still supported version of Python already
70# includes it.
71DEFAULT_PROTOCOL = 4
72
73class PickleError(Exception):
74    """A common base class for the other pickling exceptions."""
75    pass
76
77class PicklingError(PickleError):
78    """This exception is raised when an unpicklable object is passed to the
79    dump() method.
80
81    """
82    pass
83
84class UnpicklingError(PickleError):
85    """This exception is raised when there is a problem unpickling an object,
86    such as a security violation.
87
88    Note that other exceptions may also be raised during unpickling, including
89    (but not necessarily limited to) AttributeError, EOFError, ImportError,
90    and IndexError.
91
92    """
93    pass
94
95# An instance of _Stop is raised by Unpickler.load_stop() in response to
96# the STOP opcode, passing the object that is the result of unpickling.
97class _Stop(Exception):
98    def __init__(self, value):
99        self.value = value
100
101# Jython has PyStringMap; it's a dict subclass with string keys
102try:
103    from org.python.core import PyStringMap
104except ImportError:
105    PyStringMap = None
106
107# Pickle opcodes.  See pickletools.py for extensive docs.  The listing
108# here is in kind-of alphabetical order of 1-character pickle code.
109# pickletools groups them by purpose.
110
111MARK           = b'('   # push special markobject on stack
112STOP           = b'.'   # every pickle ends with STOP
113POP            = b'0'   # discard topmost stack item
114POP_MARK       = b'1'   # discard stack top through topmost markobject
115DUP            = b'2'   # duplicate top stack item
116FLOAT          = b'F'   # push float object; decimal string argument
117INT            = b'I'   # push integer or bool; decimal string argument
118BININT         = b'J'   # push four-byte signed int
119BININT1        = b'K'   # push 1-byte unsigned int
120LONG           = b'L'   # push long; decimal string argument
121BININT2        = b'M'   # push 2-byte unsigned int
122NONE           = b'N'   # push None
123PERSID         = b'P'   # push persistent object; id is taken from string arg
124BINPERSID      = b'Q'   #  "       "         "  ;  "  "   "     "  stack
125REDUCE         = b'R'   # apply callable to argtuple, both on stack
126STRING         = b'S'   # push string; NL-terminated string argument
127BINSTRING      = b'T'   # push string; counted binary string argument
128SHORT_BINSTRING= b'U'   #  "     "   ;    "      "       "      " < 256 bytes
129UNICODE        = b'V'   # push Unicode string; raw-unicode-escaped'd argument
130BINUNICODE     = b'X'   #   "     "       "  ; counted UTF-8 string argument
131APPEND         = b'a'   # append stack top to list below it
132BUILD          = b'b'   # call __setstate__ or __dict__.update()
133GLOBAL         = b'c'   # push self.find_class(modname, name); 2 string args
134DICT           = b'd'   # build a dict from stack items
135EMPTY_DICT     = b'}'   # push empty dict
136APPENDS        = b'e'   # extend list on stack by topmost stack slice
137GET            = b'g'   # push item from memo on stack; index is string arg
138BINGET         = b'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
139INST           = b'i'   # build & push class instance
140LONG_BINGET    = b'j'   # push item from memo on stack; index is 4-byte arg
141LIST           = b'l'   # build list from topmost stack items
142EMPTY_LIST     = b']'   # push empty list
143OBJ            = b'o'   # build & push class instance
144PUT            = b'p'   # store stack top in memo; index is string arg
145BINPUT         = b'q'   #   "     "    "   "   " ;   "    " 1-byte arg
146LONG_BINPUT    = b'r'   #   "     "    "   "   " ;   "    " 4-byte arg
147SETITEM        = b's'   # add key+value pair to dict
148TUPLE          = b't'   # build tuple from topmost stack items
149EMPTY_TUPLE    = b')'   # push empty tuple
150SETITEMS       = b'u'   # modify dict by adding topmost key+value pairs
151BINFLOAT       = b'G'   # push float; arg is 8-byte float encoding
152
153TRUE           = b'I01\n'  # not an opcode; see INT docs in pickletools.py
154FALSE          = b'I00\n'  # not an opcode; see INT docs in pickletools.py
155
156# Protocol 2
157
158PROTO          = b'\x80'  # identify pickle protocol
159NEWOBJ         = b'\x81'  # build object by applying cls.__new__ to argtuple
160EXT1           = b'\x82'  # push object from extension registry; 1-byte index
161EXT2           = b'\x83'  # ditto, but 2-byte index
162EXT4           = b'\x84'  # ditto, but 4-byte index
163TUPLE1         = b'\x85'  # build 1-tuple from stack top
164TUPLE2         = b'\x86'  # build 2-tuple from two topmost stack items
165TUPLE3         = b'\x87'  # build 3-tuple from three topmost stack items
166NEWTRUE        = b'\x88'  # push True
167NEWFALSE       = b'\x89'  # push False
168LONG1          = b'\x8a'  # push long from < 256 bytes
169LONG4          = b'\x8b'  # push really big long
170
171_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
172
173# Protocol 3 (Python 3.x)
174
175BINBYTES       = b'B'   # push bytes; counted binary string argument
176SHORT_BINBYTES = b'C'   #  "     "   ;    "      "       "      " < 256 bytes
177
178# Protocol 4
179
180SHORT_BINUNICODE = b'\x8c'  # push short string; UTF-8 length < 256 bytes
181BINUNICODE8      = b'\x8d'  # push very long string
182BINBYTES8        = b'\x8e'  # push very long bytes string
183EMPTY_SET        = b'\x8f'  # push empty set on the stack
184ADDITEMS         = b'\x90'  # modify set by adding topmost stack items
185FROZENSET        = b'\x91'  # build frozenset from topmost stack items
186NEWOBJ_EX        = b'\x92'  # like NEWOBJ but work with keyword only arguments
187STACK_GLOBAL     = b'\x93'  # same as GLOBAL but using names on the stacks
188MEMOIZE          = b'\x94'  # store top of the stack in memo
189FRAME            = b'\x95'  # indicate the beginning of a new frame
190
191# Protocol 5
192
193BYTEARRAY8       = b'\x96'  # push bytearray
194NEXT_BUFFER      = b'\x97'  # push next out-of-band buffer
195READONLY_BUFFER  = b'\x98'  # make top of stack readonly
196
197__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
198
199
200class _Framer:
201
202    _FRAME_SIZE_MIN = 4
203    _FRAME_SIZE_TARGET = 64 * 1024
204
205    def __init__(self, file_write):
206        self.file_write = file_write
207        self.current_frame = None
208
209    def start_framing(self):
210        self.current_frame = io.BytesIO()
211
212    def end_framing(self):
213        if self.current_frame and self.current_frame.tell() > 0:
214            self.commit_frame(force=True)
215            self.current_frame = None
216
217    def commit_frame(self, force=False):
218        if self.current_frame:
219            f = self.current_frame
220            if f.tell() >= self._FRAME_SIZE_TARGET or force:
221                data = f.getbuffer()
222                write = self.file_write
223                if len(data) >= self._FRAME_SIZE_MIN:
224                    # Issue a single call to the write method of the underlying
225                    # file object for the frame opcode with the size of the
226                    # frame. The concatenation is expected to be less expensive
227                    # than issuing an additional call to write.
228                    write(FRAME + pack("<Q", len(data)))
229
230                # Issue a separate call to write to append the frame
231                # contents without concatenation to the above to avoid a
232                # memory copy.
233                write(data)
234
235                # Start the new frame with a new io.BytesIO instance so that
236                # the file object can have delayed access to the previous frame
237                # contents via an unreleased memoryview of the previous
238                # io.BytesIO instance.
239                self.current_frame = io.BytesIO()
240
241    def write(self, data):
242        if self.current_frame:
243            return self.current_frame.write(data)
244        else:
245            return self.file_write(data)
246
247    def write_large_bytes(self, header, payload):
248        write = self.file_write
249        if self.current_frame:
250            # Terminate the current frame and flush it to the file.
251            self.commit_frame(force=True)
252
253        # Perform direct write of the header and payload of the large binary
254        # object. Be careful not to concatenate the header and the payload
255        # prior to calling 'write' as we do not want to allocate a large
256        # temporary bytes object.
257        # We intentionally do not insert a protocol 4 frame opcode to make
258        # it possible to optimize file.read calls in the loader.
259        write(header)
260        write(payload)
261
262
263class _Unframer:
264
265    def __init__(self, file_read, file_readline, file_tell=None):
266        self.file_read = file_read
267        self.file_readline = file_readline
268        self.current_frame = None
269
270    def readinto(self, buf):
271        if self.current_frame:
272            n = self.current_frame.readinto(buf)
273            if n == 0 and len(buf) != 0:
274                self.current_frame = None
275                n = len(buf)
276                buf[:] = self.file_read(n)
277                return n
278            if n < len(buf):
279                raise UnpicklingError(
280                    "pickle exhausted before end of frame")
281            return n
282        else:
283            n = len(buf)
284            buf[:] = self.file_read(n)
285            return n
286
287    def read(self, n):
288        if self.current_frame:
289            data = self.current_frame.read(n)
290            if not data and n != 0:
291                self.current_frame = None
292                return self.file_read(n)
293            if len(data) < n:
294                raise UnpicklingError(
295                    "pickle exhausted before end of frame")
296            return data
297        else:
298            return self.file_read(n)
299
300    def readline(self):
301        if self.current_frame:
302            data = self.current_frame.readline()
303            if not data:
304                self.current_frame = None
305                return self.file_readline()
306            if data[-1] != b'\n'[0]:
307                raise UnpicklingError(
308                    "pickle exhausted before end of frame")
309            return data
310        else:
311            return self.file_readline()
312
313    def load_frame(self, frame_size):
314        if self.current_frame and self.current_frame.read() != b'':
315            raise UnpicklingError(
316                "beginning of a new frame before end of current frame")
317        self.current_frame = io.BytesIO(self.file_read(frame_size))
318
319
320# Tools used for pickling.
321
322def _getattribute(obj, name):
323    for subpath in name.split('.'):
324        if subpath == '<locals>':
325            raise AttributeError("Can't get local attribute {!r} on {!r}"
326                                 .format(name, obj))
327        try:
328            parent = obj
329            obj = getattr(obj, subpath)
330        except AttributeError:
331            raise AttributeError("Can't get attribute {!r} on {!r}"
332                                 .format(name, obj)) from None
333    return obj, parent
334
335def whichmodule(obj, name):
336    """Find the module an object belong to."""
337    module_name = getattr(obj, '__module__', None)
338    if module_name is not None:
339        return module_name
340    # Protect the iteration by using a list copy of sys.modules against dynamic
341    # modules that trigger imports of other modules upon calls to getattr.
342    for module_name, module in sys.modules.copy().items():
343        if (module_name == '__main__'
344            or module_name == '__mp_main__'  # bpo-42406
345            or module is None):
346            continue
347        try:
348            if _getattribute(module, name)[0] is obj:
349                return module_name
350        except AttributeError:
351            pass
352    return '__main__'
353
354def encode_long(x):
355    r"""Encode a long to a two's complement little-endian binary string.
356    Note that 0 is a special case, returning an empty string, to save a
357    byte in the LONG1 pickling context.
358
359    >>> encode_long(0)
360    b''
361    >>> encode_long(255)
362    b'\xff\x00'
363    >>> encode_long(32767)
364    b'\xff\x7f'
365    >>> encode_long(-256)
366    b'\x00\xff'
367    >>> encode_long(-32768)
368    b'\x00\x80'
369    >>> encode_long(-128)
370    b'\x80'
371    >>> encode_long(127)
372    b'\x7f'
373    >>>
374    """
375    if x == 0:
376        return b''
377    nbytes = (x.bit_length() >> 3) + 1
378    result = x.to_bytes(nbytes, byteorder='little', signed=True)
379    if x < 0 and nbytes > 1:
380        if result[-1] == 0xff and (result[-2] & 0x80) != 0:
381            result = result[:-1]
382    return result
383
384def decode_long(data):
385    r"""Decode a long from a two's complement little-endian binary string.
386
387    >>> decode_long(b'')
388    0
389    >>> decode_long(b"\xff\x00")
390    255
391    >>> decode_long(b"\xff\x7f")
392    32767
393    >>> decode_long(b"\x00\xff")
394    -256
395    >>> decode_long(b"\x00\x80")
396    -32768
397    >>> decode_long(b"\x80")
398    -128
399    >>> decode_long(b"\x7f")
400    127
401    """
402    return int.from_bytes(data, byteorder='little', signed=True)
403
404
405# Pickling machinery
406
407class _Pickler:
408
409    def __init__(self, file, protocol=None, *, fix_imports=True,
410                 buffer_callback=None):
411        """This takes a binary file for writing a pickle data stream.
412
413        The optional *protocol* argument tells the pickler to use the
414        given protocol; supported protocols are 0, 1, 2, 3, 4 and 5.
415        The default protocol is 4. It was introduced in Python 3.4, and
416        is incompatible with previous versions.
417
418        Specifying a negative protocol version selects the highest
419        protocol version supported.  The higher the protocol used, the
420        more recent the version of Python needed to read the pickle
421        produced.
422
423        The *file* argument must have a write() method that accepts a
424        single bytes argument. It can thus be a file object opened for
425        binary writing, an io.BytesIO instance, or any other custom
426        object that meets this interface.
427
428        If *fix_imports* is True and *protocol* is less than 3, pickle
429        will try to map the new Python 3 names to the old module names
430        used in Python 2, so that the pickle data stream is readable
431        with Python 2.
432
433        If *buffer_callback* is None (the default), buffer views are
434        serialized into *file* as part of the pickle stream.
435
436        If *buffer_callback* is not None, then it can be called any number
437        of times with a buffer view.  If the callback returns a false value
438        (such as None), the given buffer is out-of-band; otherwise the
439        buffer is serialized in-band, i.e. inside the pickle stream.
440
441        It is an error if *buffer_callback* is not None and *protocol*
442        is None or smaller than 5.
443        """
444        if protocol is None:
445            protocol = DEFAULT_PROTOCOL
446        if protocol < 0:
447            protocol = HIGHEST_PROTOCOL
448        elif not 0 <= protocol <= HIGHEST_PROTOCOL:
449            raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
450        if buffer_callback is not None and protocol < 5:
451            raise ValueError("buffer_callback needs protocol >= 5")
452        self._buffer_callback = buffer_callback
453        try:
454            self._file_write = file.write
455        except AttributeError:
456            raise TypeError("file must have a 'write' attribute")
457        self.framer = _Framer(self._file_write)
458        self.write = self.framer.write
459        self._write_large_bytes = self.framer.write_large_bytes
460        self.memo = {}
461        self.proto = int(protocol)
462        self.bin = protocol >= 1
463        self.fast = 0
464        self.fix_imports = fix_imports and protocol < 3
465
466    def clear_memo(self):
467        """Clears the pickler's "memo".
468
469        The memo is the data structure that remembers which objects the
470        pickler has already seen, so that shared or recursive objects
471        are pickled by reference and not by value.  This method is
472        useful when re-using picklers.
473        """
474        self.memo.clear()
475
476    def dump(self, obj):
477        """Write a pickled representation of obj to the open file."""
478        # Check whether Pickler was initialized correctly. This is
479        # only needed to mimic the behavior of _pickle.Pickler.dump().
480        if not hasattr(self, "_file_write"):
481            raise PicklingError("Pickler.__init__() was not called by "
482                                "%s.__init__()" % (self.__class__.__name__,))
483        if self.proto >= 2:
484            self.write(PROTO + pack("<B", self.proto))
485        if self.proto >= 4:
486            self.framer.start_framing()
487        self.save(obj)
488        self.write(STOP)
489        self.framer.end_framing()
490
491    def memoize(self, obj):
492        """Store an object in the memo."""
493
494        # The Pickler memo is a dictionary mapping object ids to 2-tuples
495        # that contain the Unpickler memo key and the object being memoized.
496        # The memo key is written to the pickle and will become
497        # the key in the Unpickler's memo.  The object is stored in the
498        # Pickler memo so that transient objects are kept alive during
499        # pickling.
500
501        # The use of the Unpickler memo length as the memo key is just a
502        # convention.  The only requirement is that the memo values be unique.
503        # But there appears no advantage to any other scheme, and this
504        # scheme allows the Unpickler memo to be implemented as a plain (but
505        # growable) array, indexed by memo key.
506        if self.fast:
507            return
508        assert id(obj) not in self.memo
509        idx = len(self.memo)
510        self.write(self.put(idx))
511        self.memo[id(obj)] = idx, obj
512
513    # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
514    def put(self, idx):
515        if self.proto >= 4:
516            return MEMOIZE
517        elif self.bin:
518            if idx < 256:
519                return BINPUT + pack("<B", idx)
520            else:
521                return LONG_BINPUT + pack("<I", idx)
522        else:
523            return PUT + repr(idx).encode("ascii") + b'\n'
524
525    # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
526    def get(self, i):
527        if self.bin:
528            if i < 256:
529                return BINGET + pack("<B", i)
530            else:
531                return LONG_BINGET + pack("<I", i)
532
533        return GET + repr(i).encode("ascii") + b'\n'
534
535    def save(self, obj, save_persistent_id=True):
536        self.framer.commit_frame()
537
538        # Check for persistent id (defined by a subclass)
539        pid = self.persistent_id(obj)
540        if pid is not None and save_persistent_id:
541            self.save_pers(pid)
542            return
543
544        # Check the memo
545        x = self.memo.get(id(obj))
546        if x is not None:
547            self.write(self.get(x[0]))
548            return
549
550        rv = NotImplemented
551        reduce = getattr(self, "reducer_override", None)
552        if reduce is not None:
553            rv = reduce(obj)
554
555        if rv is NotImplemented:
556            # Check the type dispatch table
557            t = type(obj)
558            f = self.dispatch.get(t)
559            if f is not None:
560                f(self, obj)  # Call unbound method with explicit self
561                return
562
563            # Check private dispatch table if any, or else
564            # copyreg.dispatch_table
565            reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
566            if reduce is not None:
567                rv = reduce(obj)
568            else:
569                # Check for a class with a custom metaclass; treat as regular
570                # class
571                if issubclass(t, type):
572                    self.save_global(obj)
573                    return
574
575                # Check for a __reduce_ex__ method, fall back to __reduce__
576                reduce = getattr(obj, "__reduce_ex__", None)
577                if reduce is not None:
578                    rv = reduce(self.proto)
579                else:
580                    reduce = getattr(obj, "__reduce__", None)
581                    if reduce is not None:
582                        rv = reduce()
583                    else:
584                        raise PicklingError("Can't pickle %r object: %r" %
585                                            (t.__name__, obj))
586
587        # Check for string returned by reduce(), meaning "save as global"
588        if isinstance(rv, str):
589            self.save_global(obj, rv)
590            return
591
592        # Assert that reduce() returned a tuple
593        if not isinstance(rv, tuple):
594            raise PicklingError("%s must return string or tuple" % reduce)
595
596        # Assert that it returned an appropriately sized tuple
597        l = len(rv)
598        if not (2 <= l <= 6):
599            raise PicklingError("Tuple returned by %s must have "
600                                "two to six elements" % reduce)
601
602        # Save the reduce() output and finally memoize the object
603        self.save_reduce(obj=obj, *rv)
604
605    def persistent_id(self, obj):
606        # This exists so a subclass can override it
607        return None
608
609    def save_pers(self, pid):
610        # Save a persistent id reference
611        if self.bin:
612            self.save(pid, save_persistent_id=False)
613            self.write(BINPERSID)
614        else:
615            try:
616                self.write(PERSID + str(pid).encode("ascii") + b'\n')
617            except UnicodeEncodeError:
618                raise PicklingError(
619                    "persistent IDs in protocol 0 must be ASCII strings")
620
621    def save_reduce(self, func, args, state=None, listitems=None,
622                    dictitems=None, state_setter=None, obj=None):
623        # This API is called by some subclasses
624
625        if not isinstance(args, tuple):
626            raise PicklingError("args from save_reduce() must be a tuple")
627        if not callable(func):
628            raise PicklingError("func from save_reduce() must be callable")
629
630        save = self.save
631        write = self.write
632
633        func_name = getattr(func, "__name__", "")
634        if self.proto >= 2 and func_name == "__newobj_ex__":
635            cls, args, kwargs = args
636            if not hasattr(cls, "__new__"):
637                raise PicklingError("args[0] from {} args has no __new__"
638                                    .format(func_name))
639            if obj is not None and cls is not obj.__class__:
640                raise PicklingError("args[0] from {} args has the wrong class"
641                                    .format(func_name))
642            if self.proto >= 4:
643                save(cls)
644                save(args)
645                save(kwargs)
646                write(NEWOBJ_EX)
647            else:
648                func = partial(cls.__new__, cls, *args, **kwargs)
649                save(func)
650                save(())
651                write(REDUCE)
652        elif self.proto >= 2 and func_name == "__newobj__":
653            # A __reduce__ implementation can direct protocol 2 or newer to
654            # use the more efficient NEWOBJ opcode, while still
655            # allowing protocol 0 and 1 to work normally.  For this to
656            # work, the function returned by __reduce__ should be
657            # called __newobj__, and its first argument should be a
658            # class.  The implementation for __newobj__
659            # should be as follows, although pickle has no way to
660            # verify this:
661            #
662            # def __newobj__(cls, *args):
663            #     return cls.__new__(cls, *args)
664            #
665            # Protocols 0 and 1 will pickle a reference to __newobj__,
666            # while protocol 2 (and above) will pickle a reference to
667            # cls, the remaining args tuple, and the NEWOBJ code,
668            # which calls cls.__new__(cls, *args) at unpickling time
669            # (see load_newobj below).  If __reduce__ returns a
670            # three-tuple, the state from the third tuple item will be
671            # pickled regardless of the protocol, calling __setstate__
672            # at unpickling time (see load_build below).
673            #
674            # Note that no standard __newobj__ implementation exists;
675            # you have to provide your own.  This is to enforce
676            # compatibility with Python 2.2 (pickles written using
677            # protocol 0 or 1 in Python 2.3 should be unpicklable by
678            # Python 2.2).
679            cls = args[0]
680            if not hasattr(cls, "__new__"):
681                raise PicklingError(
682                    "args[0] from __newobj__ args has no __new__")
683            if obj is not None and cls is not obj.__class__:
684                raise PicklingError(
685                    "args[0] from __newobj__ args has the wrong class")
686            args = args[1:]
687            save(cls)
688            save(args)
689            write(NEWOBJ)
690        else:
691            save(func)
692            save(args)
693            write(REDUCE)
694
695        if obj is not None:
696            # If the object is already in the memo, this means it is
697            # recursive. In this case, throw away everything we put on the
698            # stack, and fetch the object back from the memo.
699            if id(obj) in self.memo:
700                write(POP + self.get(self.memo[id(obj)][0]))
701            else:
702                self.memoize(obj)
703
704        # More new special cases (that work with older protocols as
705        # well): when __reduce__ returns a tuple with 4 or 5 items,
706        # the 4th and 5th item should be iterators that provide list
707        # items and dict items (as (key, value) tuples), or None.
708
709        if listitems is not None:
710            self._batch_appends(listitems)
711
712        if dictitems is not None:
713            self._batch_setitems(dictitems)
714
715        if state is not None:
716            if state_setter is None:
717                save(state)
718                write(BUILD)
719            else:
720                # If a state_setter is specified, call it instead of load_build
721                # to update obj's with its previous state.
722                # First, push state_setter and its tuple of expected arguments
723                # (obj, state) onto the stack.
724                save(state_setter)
725                save(obj)  # simple BINGET opcode as obj is already memoized.
726                save(state)
727                write(TUPLE2)
728                # Trigger a state_setter(obj, state) function call.
729                write(REDUCE)
730                # The purpose of state_setter is to carry-out an
731                # inplace modification of obj. We do not care about what the
732                # method might return, so its output is eventually removed from
733                # the stack.
734                write(POP)
735
736    # Methods below this point are dispatched through the dispatch table
737
738    dispatch = {}
739
740    def save_none(self, obj):
741        self.write(NONE)
742    dispatch[type(None)] = save_none
743
744    def save_bool(self, obj):
745        if self.proto >= 2:
746            self.write(NEWTRUE if obj else NEWFALSE)
747        else:
748            self.write(TRUE if obj else FALSE)
749    dispatch[bool] = save_bool
750
751    def save_long(self, obj):
752        if self.bin:
753            # If the int is small enough to fit in a signed 4-byte 2's-comp
754            # format, we can store it more efficiently than the general
755            # case.
756            # First one- and two-byte unsigned ints:
757            if obj >= 0:
758                if obj <= 0xff:
759                    self.write(BININT1 + pack("<B", obj))
760                    return
761                if obj <= 0xffff:
762                    self.write(BININT2 + pack("<H", obj))
763                    return
764            # Next check for 4-byte signed ints:
765            if -0x80000000 <= obj <= 0x7fffffff:
766                self.write(BININT + pack("<i", obj))
767                return
768        if self.proto >= 2:
769            encoded = encode_long(obj)
770            n = len(encoded)
771            if n < 256:
772                self.write(LONG1 + pack("<B", n) + encoded)
773            else:
774                self.write(LONG4 + pack("<i", n) + encoded)
775            return
776        if -0x80000000 <= obj <= 0x7fffffff:
777            self.write(INT + repr(obj).encode("ascii") + b'\n')
778        else:
779            self.write(LONG + repr(obj).encode("ascii") + b'L\n')
780    dispatch[int] = save_long
781
782    def save_float(self, obj):
783        if self.bin:
784            self.write(BINFLOAT + pack('>d', obj))
785        else:
786            self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
787    dispatch[float] = save_float
788
789    def save_bytes(self, obj):
790        if self.proto < 3:
791            if not obj: # bytes object is empty
792                self.save_reduce(bytes, (), obj=obj)
793            else:
794                self.save_reduce(codecs.encode,
795                                 (str(obj, 'latin1'), 'latin1'), obj=obj)
796            return
797        n = len(obj)
798        if n <= 0xff:
799            self.write(SHORT_BINBYTES + pack("<B", n) + obj)
800        elif n > 0xffffffff and self.proto >= 4:
801            self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
802        elif n >= self.framer._FRAME_SIZE_TARGET:
803            self._write_large_bytes(BINBYTES + pack("<I", n), obj)
804        else:
805            self.write(BINBYTES + pack("<I", n) + obj)
806        self.memoize(obj)
807    dispatch[bytes] = save_bytes
808
809    def save_bytearray(self, obj):
810        if self.proto < 5:
811            if not obj:  # bytearray is empty
812                self.save_reduce(bytearray, (), obj=obj)
813            else:
814                self.save_reduce(bytearray, (bytes(obj),), obj=obj)
815            return
816        n = len(obj)
817        if n >= self.framer._FRAME_SIZE_TARGET:
818            self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
819        else:
820            self.write(BYTEARRAY8 + pack("<Q", n) + obj)
821        self.memoize(obj)
822    dispatch[bytearray] = save_bytearray
823
824    if _HAVE_PICKLE_BUFFER:
825        def save_picklebuffer(self, obj):
826            if self.proto < 5:
827                raise PicklingError("PickleBuffer can only pickled with "
828                                    "protocol >= 5")
829            with obj.raw() as m:
830                if not m.contiguous:
831                    raise PicklingError("PickleBuffer can not be pickled when "
832                                        "pointing to a non-contiguous buffer")
833                in_band = True
834                if self._buffer_callback is not None:
835                    in_band = bool(self._buffer_callback(obj))
836                if in_band:
837                    # Write data in-band
838                    # XXX The C implementation avoids a copy here
839                    if m.readonly:
840                        self.save_bytes(m.tobytes())
841                    else:
842                        self.save_bytearray(m.tobytes())
843                else:
844                    # Write data out-of-band
845                    self.write(NEXT_BUFFER)
846                    if m.readonly:
847                        self.write(READONLY_BUFFER)
848
849        dispatch[PickleBuffer] = save_picklebuffer
850
851    def save_str(self, obj):
852        if self.bin:
853            encoded = obj.encode('utf-8', 'surrogatepass')
854            n = len(encoded)
855            if n <= 0xff and self.proto >= 4:
856                self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
857            elif n > 0xffffffff and self.proto >= 4:
858                self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
859            elif n >= self.framer._FRAME_SIZE_TARGET:
860                self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
861            else:
862                self.write(BINUNICODE + pack("<I", n) + encoded)
863        else:
864            obj = obj.replace("\\", "\\u005c")
865            obj = obj.replace("\0", "\\u0000")
866            obj = obj.replace("\n", "\\u000a")
867            obj = obj.replace("\r", "\\u000d")
868            obj = obj.replace("\x1a", "\\u001a")  # EOF on DOS
869            self.write(UNICODE + obj.encode('raw-unicode-escape') +
870                       b'\n')
871        self.memoize(obj)
872    dispatch[str] = save_str
873
874    def save_tuple(self, obj):
875        if not obj: # tuple is empty
876            if self.bin:
877                self.write(EMPTY_TUPLE)
878            else:
879                self.write(MARK + TUPLE)
880            return
881
882        n = len(obj)
883        save = self.save
884        memo = self.memo
885        if n <= 3 and self.proto >= 2:
886            for element in obj:
887                save(element)
888            # Subtle.  Same as in the big comment below.
889            if id(obj) in memo:
890                get = self.get(memo[id(obj)][0])
891                self.write(POP * n + get)
892            else:
893                self.write(_tuplesize2code[n])
894                self.memoize(obj)
895            return
896
897        # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
898        # has more than 3 elements.
899        write = self.write
900        write(MARK)
901        for element in obj:
902            save(element)
903
904        if id(obj) in memo:
905            # Subtle.  d was not in memo when we entered save_tuple(), so
906            # the process of saving the tuple's elements must have saved
907            # the tuple itself:  the tuple is recursive.  The proper action
908            # now is to throw away everything we put on the stack, and
909            # simply GET the tuple (it's already constructed).  This check
910            # could have been done in the "for element" loop instead, but
911            # recursive tuples are a rare thing.
912            get = self.get(memo[id(obj)][0])
913            if self.bin:
914                write(POP_MARK + get)
915            else:   # proto 0 -- POP_MARK not available
916                write(POP * (n+1) + get)
917            return
918
919        # No recursion.
920        write(TUPLE)
921        self.memoize(obj)
922
923    dispatch[tuple] = save_tuple
924
925    def save_list(self, obj):
926        if self.bin:
927            self.write(EMPTY_LIST)
928        else:   # proto 0 -- can't use EMPTY_LIST
929            self.write(MARK + LIST)
930
931        self.memoize(obj)
932        self._batch_appends(obj)
933
934    dispatch[list] = save_list
935
936    _BATCHSIZE = 1000
937
938    def _batch_appends(self, items):
939        # Helper to batch up APPENDS sequences
940        save = self.save
941        write = self.write
942
943        if not self.bin:
944            for x in items:
945                save(x)
946                write(APPEND)
947            return
948
949        it = iter(items)
950        while True:
951            tmp = list(islice(it, self._BATCHSIZE))
952            n = len(tmp)
953            if n > 1:
954                write(MARK)
955                for x in tmp:
956                    save(x)
957                write(APPENDS)
958            elif n:
959                save(tmp[0])
960                write(APPEND)
961            # else tmp is empty, and we're done
962            if n < self._BATCHSIZE:
963                return
964
965    def save_dict(self, obj):
966        if self.bin:
967            self.write(EMPTY_DICT)
968        else:   # proto 0 -- can't use EMPTY_DICT
969            self.write(MARK + DICT)
970
971        self.memoize(obj)
972        self._batch_setitems(obj.items())
973
974    dispatch[dict] = save_dict
975    if PyStringMap is not None:
976        dispatch[PyStringMap] = save_dict
977
978    def _batch_setitems(self, items):
979        # Helper to batch up SETITEMS sequences; proto >= 1 only
980        save = self.save
981        write = self.write
982
983        if not self.bin:
984            for k, v in items:
985                save(k)
986                save(v)
987                write(SETITEM)
988            return
989
990        it = iter(items)
991        while True:
992            tmp = list(islice(it, self._BATCHSIZE))
993            n = len(tmp)
994            if n > 1:
995                write(MARK)
996                for k, v in tmp:
997                    save(k)
998                    save(v)
999                write(SETITEMS)
1000            elif n:
1001                k, v = tmp[0]
1002                save(k)
1003                save(v)
1004                write(SETITEM)
1005            # else tmp is empty, and we're done
1006            if n < self._BATCHSIZE:
1007                return
1008
1009    def save_set(self, obj):
1010        save = self.save
1011        write = self.write
1012
1013        if self.proto < 4:
1014            self.save_reduce(set, (list(obj),), obj=obj)
1015            return
1016
1017        write(EMPTY_SET)
1018        self.memoize(obj)
1019
1020        it = iter(obj)
1021        while True:
1022            batch = list(islice(it, self._BATCHSIZE))
1023            n = len(batch)
1024            if n > 0:
1025                write(MARK)
1026                for item in batch:
1027                    save(item)
1028                write(ADDITEMS)
1029            if n < self._BATCHSIZE:
1030                return
1031    dispatch[set] = save_set
1032
1033    def save_frozenset(self, obj):
1034        save = self.save
1035        write = self.write
1036
1037        if self.proto < 4:
1038            self.save_reduce(frozenset, (list(obj),), obj=obj)
1039            return
1040
1041        write(MARK)
1042        for item in obj:
1043            save(item)
1044
1045        if id(obj) in self.memo:
1046            # If the object is already in the memo, this means it is
1047            # recursive. In this case, throw away everything we put on the
1048            # stack, and fetch the object back from the memo.
1049            write(POP_MARK + self.get(self.memo[id(obj)][0]))
1050            return
1051
1052        write(FROZENSET)
1053        self.memoize(obj)
1054    dispatch[frozenset] = save_frozenset
1055
1056    def save_global(self, obj, name=None):
1057        write = self.write
1058        memo = self.memo
1059
1060        if name is None:
1061            name = getattr(obj, '__qualname__', None)
1062        if name is None:
1063            name = obj.__name__
1064
1065        module_name = whichmodule(obj, name)
1066        try:
1067            __import__(module_name, level=0)
1068            module = sys.modules[module_name]
1069            obj2, parent = _getattribute(module, name)
1070        except (ImportError, KeyError, AttributeError):
1071            raise PicklingError(
1072                "Can't pickle %r: it's not found as %s.%s" %
1073                (obj, module_name, name)) from None
1074        else:
1075            if obj2 is not obj:
1076                raise PicklingError(
1077                    "Can't pickle %r: it's not the same object as %s.%s" %
1078                    (obj, module_name, name))
1079
1080        if self.proto >= 2:
1081            code = _extension_registry.get((module_name, name))
1082            if code:
1083                assert code > 0
1084                if code <= 0xff:
1085                    write(EXT1 + pack("<B", code))
1086                elif code <= 0xffff:
1087                    write(EXT2 + pack("<H", code))
1088                else:
1089                    write(EXT4 + pack("<i", code))
1090                return
1091        lastname = name.rpartition('.')[2]
1092        if parent is module:
1093            name = lastname
1094        # Non-ASCII identifiers are supported only with protocols >= 3.
1095        if self.proto >= 4:
1096            self.save(module_name)
1097            self.save(name)
1098            write(STACK_GLOBAL)
1099        elif parent is not module:
1100            self.save_reduce(getattr, (parent, lastname))
1101        elif self.proto >= 3:
1102            write(GLOBAL + bytes(module_name, "utf-8") + b'\n' +
1103                  bytes(name, "utf-8") + b'\n')
1104        else:
1105            if self.fix_imports:
1106                r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING
1107                r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING
1108                if (module_name, name) in r_name_mapping:
1109                    module_name, name = r_name_mapping[(module_name, name)]
1110                elif module_name in r_import_mapping:
1111                    module_name = r_import_mapping[module_name]
1112            try:
1113                write(GLOBAL + bytes(module_name, "ascii") + b'\n' +
1114                      bytes(name, "ascii") + b'\n')
1115            except UnicodeEncodeError:
1116                raise PicklingError(
1117                    "can't pickle global identifier '%s.%s' using "
1118                    "pickle protocol %i" % (module, name, self.proto)) from None
1119
1120        self.memoize(obj)
1121
1122    def save_type(self, obj):
1123        if obj is type(None):
1124            return self.save_reduce(type, (None,), obj=obj)
1125        elif obj is type(NotImplemented):
1126            return self.save_reduce(type, (NotImplemented,), obj=obj)
1127        elif obj is type(...):
1128            return self.save_reduce(type, (...,), obj=obj)
1129        return self.save_global(obj)
1130
1131    dispatch[FunctionType] = save_global
1132    dispatch[type] = save_type
1133
1134
1135# Unpickling machinery
1136
1137class _Unpickler:
1138
1139    def __init__(self, file, *, fix_imports=True,
1140                 encoding="ASCII", errors="strict", buffers=None):
1141        """This takes a binary file for reading a pickle data stream.
1142
1143        The protocol version of the pickle is detected automatically, so
1144        no proto argument is needed.
1145
1146        The argument *file* must have two methods, a read() method that
1147        takes an integer argument, and a readline() method that requires
1148        no arguments.  Both methods should return bytes.  Thus *file*
1149        can be a binary file object opened for reading, an io.BytesIO
1150        object, or any other custom object that meets this interface.
1151
1152        The file-like object must have two methods, a read() method
1153        that takes an integer argument, and a readline() method that
1154        requires no arguments.  Both methods should return bytes.
1155        Thus file-like object can be a binary file object opened for
1156        reading, a BytesIO object, or any other custom object that
1157        meets this interface.
1158
1159        If *buffers* is not None, it should be an iterable of buffer-enabled
1160        objects that is consumed each time the pickle stream references
1161        an out-of-band buffer view.  Such buffers have been given in order
1162        to the *buffer_callback* of a Pickler object.
1163
1164        If *buffers* is None (the default), then the buffers are taken
1165        from the pickle stream, assuming they are serialized there.
1166        It is an error for *buffers* to be None if the pickle stream
1167        was produced with a non-None *buffer_callback*.
1168
1169        Other optional arguments are *fix_imports*, *encoding* and
1170        *errors*, which are used to control compatibility support for
1171        pickle stream generated by Python 2.  If *fix_imports* is True,
1172        pickle will try to map the old Python 2 names to the new names
1173        used in Python 3.  The *encoding* and *errors* tell pickle how
1174        to decode 8-bit string instances pickled by Python 2; these
1175        default to 'ASCII' and 'strict', respectively. *encoding* can be
1176        'bytes' to read these 8-bit string instances as bytes objects.
1177        """
1178        self._buffers = iter(buffers) if buffers is not None else None
1179        self._file_readline = file.readline
1180        self._file_read = file.read
1181        self.memo = {}
1182        self.encoding = encoding
1183        self.errors = errors
1184        self.proto = 0
1185        self.fix_imports = fix_imports
1186
1187    def load(self):
1188        """Read a pickled object representation from the open file.
1189
1190        Return the reconstituted object hierarchy specified in the file.
1191        """
1192        # Check whether Unpickler was initialized correctly. This is
1193        # only needed to mimic the behavior of _pickle.Unpickler.dump().
1194        if not hasattr(self, "_file_read"):
1195            raise UnpicklingError("Unpickler.__init__() was not called by "
1196                                  "%s.__init__()" % (self.__class__.__name__,))
1197        self._unframer = _Unframer(self._file_read, self._file_readline)
1198        self.read = self._unframer.read
1199        self.readinto = self._unframer.readinto
1200        self.readline = self._unframer.readline
1201        self.metastack = []
1202        self.stack = []
1203        self.append = self.stack.append
1204        self.proto = 0
1205        read = self.read
1206        dispatch = self.dispatch
1207        try:
1208            while True:
1209                key = read(1)
1210                if not key:
1211                    raise EOFError
1212                assert isinstance(key, bytes_types)
1213                dispatch[key[0]](self)
1214        except _Stop as stopinst:
1215            return stopinst.value
1216
1217    # Return a list of items pushed in the stack after last MARK instruction.
1218    def pop_mark(self):
1219        items = self.stack
1220        self.stack = self.metastack.pop()
1221        self.append = self.stack.append
1222        return items
1223
1224    def persistent_load(self, pid):
1225        raise UnpicklingError("unsupported persistent id encountered")
1226
1227    dispatch = {}
1228
1229    def load_proto(self):
1230        proto = self.read(1)[0]
1231        if not 0 <= proto <= HIGHEST_PROTOCOL:
1232            raise ValueError("unsupported pickle protocol: %d" % proto)
1233        self.proto = proto
1234    dispatch[PROTO[0]] = load_proto
1235
1236    def load_frame(self):
1237        frame_size, = unpack('<Q', self.read(8))
1238        if frame_size > sys.maxsize:
1239            raise ValueError("frame size > sys.maxsize: %d" % frame_size)
1240        self._unframer.load_frame(frame_size)
1241    dispatch[FRAME[0]] = load_frame
1242
1243    def load_persid(self):
1244        try:
1245            pid = self.readline()[:-1].decode("ascii")
1246        except UnicodeDecodeError:
1247            raise UnpicklingError(
1248                "persistent IDs in protocol 0 must be ASCII strings")
1249        self.append(self.persistent_load(pid))
1250    dispatch[PERSID[0]] = load_persid
1251
1252    def load_binpersid(self):
1253        pid = self.stack.pop()
1254        self.append(self.persistent_load(pid))
1255    dispatch[BINPERSID[0]] = load_binpersid
1256
1257    def load_none(self):
1258        self.append(None)
1259    dispatch[NONE[0]] = load_none
1260
1261    def load_false(self):
1262        self.append(False)
1263    dispatch[NEWFALSE[0]] = load_false
1264
1265    def load_true(self):
1266        self.append(True)
1267    dispatch[NEWTRUE[0]] = load_true
1268
1269    def load_int(self):
1270        data = self.readline()
1271        if data == FALSE[1:]:
1272            val = False
1273        elif data == TRUE[1:]:
1274            val = True
1275        else:
1276            val = int(data, 0)
1277        self.append(val)
1278    dispatch[INT[0]] = load_int
1279
1280    def load_binint(self):
1281        self.append(unpack('<i', self.read(4))[0])
1282    dispatch[BININT[0]] = load_binint
1283
1284    def load_binint1(self):
1285        self.append(self.read(1)[0])
1286    dispatch[BININT1[0]] = load_binint1
1287
1288    def load_binint2(self):
1289        self.append(unpack('<H', self.read(2))[0])
1290    dispatch[BININT2[0]] = load_binint2
1291
1292    def load_long(self):
1293        val = self.readline()[:-1]
1294        if val and val[-1] == b'L'[0]:
1295            val = val[:-1]
1296        self.append(int(val, 0))
1297    dispatch[LONG[0]] = load_long
1298
1299    def load_long1(self):
1300        n = self.read(1)[0]
1301        data = self.read(n)
1302        self.append(decode_long(data))
1303    dispatch[LONG1[0]] = load_long1
1304
1305    def load_long4(self):
1306        n, = unpack('<i', self.read(4))
1307        if n < 0:
1308            # Corrupt or hostile pickle -- we never write one like this
1309            raise UnpicklingError("LONG pickle has negative byte count")
1310        data = self.read(n)
1311        self.append(decode_long(data))
1312    dispatch[LONG4[0]] = load_long4
1313
1314    def load_float(self):
1315        self.append(float(self.readline()[:-1]))
1316    dispatch[FLOAT[0]] = load_float
1317
1318    def load_binfloat(self):
1319        self.append(unpack('>d', self.read(8))[0])
1320    dispatch[BINFLOAT[0]] = load_binfloat
1321
1322    def _decode_string(self, value):
1323        # Used to allow strings from Python 2 to be decoded either as
1324        # bytes or Unicode strings.  This should be used only with the
1325        # STRING, BINSTRING and SHORT_BINSTRING opcodes.
1326        if self.encoding == "bytes":
1327            return value
1328        else:
1329            return value.decode(self.encoding, self.errors)
1330
1331    def load_string(self):
1332        data = self.readline()[:-1]
1333        # Strip outermost quotes
1334        if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
1335            data = data[1:-1]
1336        else:
1337            raise UnpicklingError("the STRING opcode argument must be quoted")
1338        self.append(self._decode_string(codecs.escape_decode(data)[0]))
1339    dispatch[STRING[0]] = load_string
1340
1341    def load_binstring(self):
1342        # Deprecated BINSTRING uses signed 32-bit length
1343        len, = unpack('<i', self.read(4))
1344        if len < 0:
1345            raise UnpicklingError("BINSTRING pickle has negative byte count")
1346        data = self.read(len)
1347        self.append(self._decode_string(data))
1348    dispatch[BINSTRING[0]] = load_binstring
1349
1350    def load_binbytes(self):
1351        len, = unpack('<I', self.read(4))
1352        if len > maxsize:
1353            raise UnpicklingError("BINBYTES exceeds system's maximum size "
1354                                  "of %d bytes" % maxsize)
1355        self.append(self.read(len))
1356    dispatch[BINBYTES[0]] = load_binbytes
1357
1358    def load_unicode(self):
1359        self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
1360    dispatch[UNICODE[0]] = load_unicode
1361
1362    def load_binunicode(self):
1363        len, = unpack('<I', self.read(4))
1364        if len > maxsize:
1365            raise UnpicklingError("BINUNICODE exceeds system's maximum size "
1366                                  "of %d bytes" % maxsize)
1367        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1368    dispatch[BINUNICODE[0]] = load_binunicode
1369
1370    def load_binunicode8(self):
1371        len, = unpack('<Q', self.read(8))
1372        if len > maxsize:
1373            raise UnpicklingError("BINUNICODE8 exceeds system's maximum size "
1374                                  "of %d bytes" % maxsize)
1375        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1376    dispatch[BINUNICODE8[0]] = load_binunicode8
1377
1378    def load_binbytes8(self):
1379        len, = unpack('<Q', self.read(8))
1380        if len > maxsize:
1381            raise UnpicklingError("BINBYTES8 exceeds system's maximum size "
1382                                  "of %d bytes" % maxsize)
1383        self.append(self.read(len))
1384    dispatch[BINBYTES8[0]] = load_binbytes8
1385
1386    def load_bytearray8(self):
1387        len, = unpack('<Q', self.read(8))
1388        if len > maxsize:
1389            raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
1390                                  "of %d bytes" % maxsize)
1391        b = bytearray(len)
1392        self.readinto(b)
1393        self.append(b)
1394    dispatch[BYTEARRAY8[0]] = load_bytearray8
1395
1396    def load_next_buffer(self):
1397        if self._buffers is None:
1398            raise UnpicklingError("pickle stream refers to out-of-band data "
1399                                  "but no *buffers* argument was given")
1400        try:
1401            buf = next(self._buffers)
1402        except StopIteration:
1403            raise UnpicklingError("not enough out-of-band buffers")
1404        self.append(buf)
1405    dispatch[NEXT_BUFFER[0]] = load_next_buffer
1406
1407    def load_readonly_buffer(self):
1408        buf = self.stack[-1]
1409        with memoryview(buf) as m:
1410            if not m.readonly:
1411                self.stack[-1] = m.toreadonly()
1412    dispatch[READONLY_BUFFER[0]] = load_readonly_buffer
1413
1414    def load_short_binstring(self):
1415        len = self.read(1)[0]
1416        data = self.read(len)
1417        self.append(self._decode_string(data))
1418    dispatch[SHORT_BINSTRING[0]] = load_short_binstring
1419
1420    def load_short_binbytes(self):
1421        len = self.read(1)[0]
1422        self.append(self.read(len))
1423    dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
1424
1425    def load_short_binunicode(self):
1426        len = self.read(1)[0]
1427        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1428    dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
1429
1430    def load_tuple(self):
1431        items = self.pop_mark()
1432        self.append(tuple(items))
1433    dispatch[TUPLE[0]] = load_tuple
1434
1435    def load_empty_tuple(self):
1436        self.append(())
1437    dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
1438
1439    def load_tuple1(self):
1440        self.stack[-1] = (self.stack[-1],)
1441    dispatch[TUPLE1[0]] = load_tuple1
1442
1443    def load_tuple2(self):
1444        self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1445    dispatch[TUPLE2[0]] = load_tuple2
1446
1447    def load_tuple3(self):
1448        self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1449    dispatch[TUPLE3[0]] = load_tuple3
1450
1451    def load_empty_list(self):
1452        self.append([])
1453    dispatch[EMPTY_LIST[0]] = load_empty_list
1454
1455    def load_empty_dictionary(self):
1456        self.append({})
1457    dispatch[EMPTY_DICT[0]] = load_empty_dictionary
1458
1459    def load_empty_set(self):
1460        self.append(set())
1461    dispatch[EMPTY_SET[0]] = load_empty_set
1462
1463    def load_frozenset(self):
1464        items = self.pop_mark()
1465        self.append(frozenset(items))
1466    dispatch[FROZENSET[0]] = load_frozenset
1467
1468    def load_list(self):
1469        items = self.pop_mark()
1470        self.append(items)
1471    dispatch[LIST[0]] = load_list
1472
1473    def load_dict(self):
1474        items = self.pop_mark()
1475        d = {items[i]: items[i+1]
1476             for i in range(0, len(items), 2)}
1477        self.append(d)
1478    dispatch[DICT[0]] = load_dict
1479
1480    # INST and OBJ differ only in how they get a class object.  It's not
1481    # only sensible to do the rest in a common routine, the two routines
1482    # previously diverged and grew different bugs.
1483    # klass is the class to instantiate, and k points to the topmost mark
1484    # object, following which are the arguments for klass.__init__.
1485    def _instantiate(self, klass, args):
1486        if (args or not isinstance(klass, type) or
1487            hasattr(klass, "__getinitargs__")):
1488            try:
1489                value = klass(*args)
1490            except TypeError as err:
1491                raise TypeError("in constructor for %s: %s" %
1492                                (klass.__name__, str(err)), sys.exc_info()[2])
1493        else:
1494            value = klass.__new__(klass)
1495        self.append(value)
1496
1497    def load_inst(self):
1498        module = self.readline()[:-1].decode("ascii")
1499        name = self.readline()[:-1].decode("ascii")
1500        klass = self.find_class(module, name)
1501        self._instantiate(klass, self.pop_mark())
1502    dispatch[INST[0]] = load_inst
1503
1504    def load_obj(self):
1505        # Stack is ... markobject classobject arg1 arg2 ...
1506        args = self.pop_mark()
1507        cls = args.pop(0)
1508        self._instantiate(cls, args)
1509    dispatch[OBJ[0]] = load_obj
1510
1511    def load_newobj(self):
1512        args = self.stack.pop()
1513        cls = self.stack.pop()
1514        obj = cls.__new__(cls, *args)
1515        self.append(obj)
1516    dispatch[NEWOBJ[0]] = load_newobj
1517
1518    def load_newobj_ex(self):
1519        kwargs = self.stack.pop()
1520        args = self.stack.pop()
1521        cls = self.stack.pop()
1522        obj = cls.__new__(cls, *args, **kwargs)
1523        self.append(obj)
1524    dispatch[NEWOBJ_EX[0]] = load_newobj_ex
1525
1526    def load_global(self):
1527        module = self.readline()[:-1].decode("utf-8")
1528        name = self.readline()[:-1].decode("utf-8")
1529        klass = self.find_class(module, name)
1530        self.append(klass)
1531    dispatch[GLOBAL[0]] = load_global
1532
1533    def load_stack_global(self):
1534        name = self.stack.pop()
1535        module = self.stack.pop()
1536        if type(name) is not str or type(module) is not str:
1537            raise UnpicklingError("STACK_GLOBAL requires str")
1538        self.append(self.find_class(module, name))
1539    dispatch[STACK_GLOBAL[0]] = load_stack_global
1540
1541    def load_ext1(self):
1542        code = self.read(1)[0]
1543        self.get_extension(code)
1544    dispatch[EXT1[0]] = load_ext1
1545
1546    def load_ext2(self):
1547        code, = unpack('<H', self.read(2))
1548        self.get_extension(code)
1549    dispatch[EXT2[0]] = load_ext2
1550
1551    def load_ext4(self):
1552        code, = unpack('<i', self.read(4))
1553        self.get_extension(code)
1554    dispatch[EXT4[0]] = load_ext4
1555
1556    def get_extension(self, code):
1557        nil = []
1558        obj = _extension_cache.get(code, nil)
1559        if obj is not nil:
1560            self.append(obj)
1561            return
1562        key = _inverted_registry.get(code)
1563        if not key:
1564            if code <= 0: # note that 0 is forbidden
1565                # Corrupt or hostile pickle.
1566                raise UnpicklingError("EXT specifies code <= 0")
1567            raise ValueError("unregistered extension code %d" % code)
1568        obj = self.find_class(*key)
1569        _extension_cache[code] = obj
1570        self.append(obj)
1571
1572    def find_class(self, module, name):
1573        # Subclasses may override this.
1574        sys.audit('pickle.find_class', module, name)
1575        if self.proto < 3 and self.fix_imports:
1576            if (module, name) in _compat_pickle.NAME_MAPPING:
1577                module, name = _compat_pickle.NAME_MAPPING[(module, name)]
1578            elif module in _compat_pickle.IMPORT_MAPPING:
1579                module = _compat_pickle.IMPORT_MAPPING[module]
1580        __import__(module, level=0)
1581        if self.proto >= 4:
1582            return _getattribute(sys.modules[module], name)[0]
1583        else:
1584            return getattr(sys.modules[module], name)
1585
1586    def load_reduce(self):
1587        stack = self.stack
1588        args = stack.pop()
1589        func = stack[-1]
1590        stack[-1] = func(*args)
1591    dispatch[REDUCE[0]] = load_reduce
1592
1593    def load_pop(self):
1594        if self.stack:
1595            del self.stack[-1]
1596        else:
1597            self.pop_mark()
1598    dispatch[POP[0]] = load_pop
1599
1600    def load_pop_mark(self):
1601        self.pop_mark()
1602    dispatch[POP_MARK[0]] = load_pop_mark
1603
1604    def load_dup(self):
1605        self.append(self.stack[-1])
1606    dispatch[DUP[0]] = load_dup
1607
1608    def load_get(self):
1609        i = int(self.readline()[:-1])
1610        try:
1611            self.append(self.memo[i])
1612        except KeyError:
1613            msg = f'Memo value not found at index {i}'
1614            raise UnpicklingError(msg) from None
1615    dispatch[GET[0]] = load_get
1616
1617    def load_binget(self):
1618        i = self.read(1)[0]
1619        try:
1620            self.append(self.memo[i])
1621        except KeyError as exc:
1622            msg = f'Memo value not found at index {i}'
1623            raise UnpicklingError(msg) from None
1624    dispatch[BINGET[0]] = load_binget
1625
1626    def load_long_binget(self):
1627        i, = unpack('<I', self.read(4))
1628        try:
1629            self.append(self.memo[i])
1630        except KeyError as exc:
1631            msg = f'Memo value not found at index {i}'
1632            raise UnpicklingError(msg) from None
1633    dispatch[LONG_BINGET[0]] = load_long_binget
1634
1635    def load_put(self):
1636        i = int(self.readline()[:-1])
1637        if i < 0:
1638            raise ValueError("negative PUT argument")
1639        self.memo[i] = self.stack[-1]
1640    dispatch[PUT[0]] = load_put
1641
1642    def load_binput(self):
1643        i = self.read(1)[0]
1644        if i < 0:
1645            raise ValueError("negative BINPUT argument")
1646        self.memo[i] = self.stack[-1]
1647    dispatch[BINPUT[0]] = load_binput
1648
1649    def load_long_binput(self):
1650        i, = unpack('<I', self.read(4))
1651        if i > maxsize:
1652            raise ValueError("negative LONG_BINPUT argument")
1653        self.memo[i] = self.stack[-1]
1654    dispatch[LONG_BINPUT[0]] = load_long_binput
1655
1656    def load_memoize(self):
1657        memo = self.memo
1658        memo[len(memo)] = self.stack[-1]
1659    dispatch[MEMOIZE[0]] = load_memoize
1660
1661    def load_append(self):
1662        stack = self.stack
1663        value = stack.pop()
1664        list = stack[-1]
1665        list.append(value)
1666    dispatch[APPEND[0]] = load_append
1667
1668    def load_appends(self):
1669        items = self.pop_mark()
1670        list_obj = self.stack[-1]
1671        try:
1672            extend = list_obj.extend
1673        except AttributeError:
1674            pass
1675        else:
1676            extend(items)
1677            return
1678        # Even if the PEP 307 requires extend() and append() methods,
1679        # fall back on append() if the object has no extend() method
1680        # for backward compatibility.
1681        append = list_obj.append
1682        for item in items:
1683            append(item)
1684    dispatch[APPENDS[0]] = load_appends
1685
1686    def load_setitem(self):
1687        stack = self.stack
1688        value = stack.pop()
1689        key = stack.pop()
1690        dict = stack[-1]
1691        dict[key] = value
1692    dispatch[SETITEM[0]] = load_setitem
1693
1694    def load_setitems(self):
1695        items = self.pop_mark()
1696        dict = self.stack[-1]
1697        for i in range(0, len(items), 2):
1698            dict[items[i]] = items[i + 1]
1699    dispatch[SETITEMS[0]] = load_setitems
1700
1701    def load_additems(self):
1702        items = self.pop_mark()
1703        set_obj = self.stack[-1]
1704        if isinstance(set_obj, set):
1705            set_obj.update(items)
1706        else:
1707            add = set_obj.add
1708            for item in items:
1709                add(item)
1710    dispatch[ADDITEMS[0]] = load_additems
1711
1712    def load_build(self):
1713        stack = self.stack
1714        state = stack.pop()
1715        inst = stack[-1]
1716        setstate = getattr(inst, "__setstate__", None)
1717        if setstate is not None:
1718            setstate(state)
1719            return
1720        slotstate = None
1721        if isinstance(state, tuple) and len(state) == 2:
1722            state, slotstate = state
1723        if state:
1724            inst_dict = inst.__dict__
1725            intern = sys.intern
1726            for k, v in state.items():
1727                if type(k) is str:
1728                    inst_dict[intern(k)] = v
1729                else:
1730                    inst_dict[k] = v
1731        if slotstate:
1732            for k, v in slotstate.items():
1733                setattr(inst, k, v)
1734    dispatch[BUILD[0]] = load_build
1735
1736    def load_mark(self):
1737        self.metastack.append(self.stack)
1738        self.stack = []
1739        self.append = self.stack.append
1740    dispatch[MARK[0]] = load_mark
1741
1742    def load_stop(self):
1743        value = self.stack.pop()
1744        raise _Stop(value)
1745    dispatch[STOP[0]] = load_stop
1746
1747
1748# Shorthands
1749
1750def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None):
1751    _Pickler(file, protocol, fix_imports=fix_imports,
1752             buffer_callback=buffer_callback).dump(obj)
1753
1754def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None):
1755    f = io.BytesIO()
1756    _Pickler(f, protocol, fix_imports=fix_imports,
1757             buffer_callback=buffer_callback).dump(obj)
1758    res = f.getvalue()
1759    assert isinstance(res, bytes_types)
1760    return res
1761
1762def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict",
1763          buffers=None):
1764    return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
1765                     encoding=encoding, errors=errors).load()
1766
1767def _loads(s, /, *, fix_imports=True, encoding="ASCII", errors="strict",
1768           buffers=None):
1769    if isinstance(s, str):
1770        raise TypeError("Can't load pickle from unicode string")
1771    file = io.BytesIO(s)
1772    return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
1773                      encoding=encoding, errors=errors).load()
1774
1775# Use the faster _pickle if possible
1776try:
1777    from _pickle import (
1778        PickleError,
1779        PicklingError,
1780        UnpicklingError,
1781        Pickler,
1782        Unpickler,
1783        dump,
1784        dumps,
1785        load,
1786        loads
1787    )
1788except ImportError:
1789    Pickler, Unpickler = _Pickler, _Unpickler
1790    dump, dumps, load, loads = _dump, _dumps, _load, _loads
1791
1792# Doctest
1793def _test():
1794    import doctest
1795    return doctest.testmod()
1796
1797if __name__ == "__main__":
1798    import argparse
1799    parser = argparse.ArgumentParser(
1800        description='display contents of the pickle files')
1801    parser.add_argument(
1802        'pickle_file', type=argparse.FileType('br'),
1803        nargs='*', help='the pickle file')
1804    parser.add_argument(
1805        '-t', '--test', action='store_true',
1806        help='run self-test suite')
1807    parser.add_argument(
1808        '-v', action='store_true',
1809        help='run verbosely; only affects self-test run')
1810    args = parser.parse_args()
1811    if args.test:
1812        _test()
1813    else:
1814        if not args.pickle_file:
1815            parser.print_help()
1816        else:
1817            import pprint
1818            for f in args.pickle_file:
1819                obj = load(f)
1820                pprint.pprint(obj)
1821