1"""Create portable serialized representations of Python objects. 2 3See module copyreg for a mechanism for registering custom picklers. 4See module pickletools source for extensive comments. 5 6Classes: 7 8 Pickler 9 Unpickler 10 11Functions: 12 13 dump(object, file) 14 dumps(object) -> string 15 load(file) -> object 16 loads(string) -> object 17 18Misc variables: 19 20 __version__ 21 format_version 22 compatible_formats 23 24""" 25 26from types import FunctionType 27from copyreg import dispatch_table 28from copyreg import _extension_registry, _inverted_registry, _extension_cache 29from itertools import islice 30from functools import partial 31import sys 32from sys import maxsize 33from struct import pack, unpack 34import re 35import io 36import codecs 37import _compat_pickle 38 39__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", 40 "Unpickler", "dump", "dumps", "load", "loads"] 41 42# Shortcut for use in isinstance testing 43bytes_types = (bytes, bytearray) 44 45# These are purely informational; no code uses these. 46format_version = "4.0" # File format version we write 47compatible_formats = ["1.0", # Original protocol 0 48 "1.1", # Protocol 0 with INST added 49 "1.2", # Original protocol 1 50 "1.3", # Protocol 1 with BINFLOAT added 51 "2.0", # Protocol 2 52 "3.0", # Protocol 3 53 "4.0", # Protocol 4 54 ] # Old format versions we can read 55 56# This is the highest protocol number we know how to read. 57HIGHEST_PROTOCOL = 4 58 59# The protocol we write by default. May be less than HIGHEST_PROTOCOL. 60# We intentionally write a protocol that Python 2.x cannot read; 61# there are too many issues with that. 62DEFAULT_PROTOCOL = 3 63 64class PickleError(Exception): 65 """A common base class for the other pickling exceptions.""" 66 pass 67 68class PicklingError(PickleError): 69 """This exception is raised when an unpicklable object is passed to the 70 dump() method. 71 72 """ 73 pass 74 75class UnpicklingError(PickleError): 76 """This exception is raised when there is a problem unpickling an object, 77 such as a security violation. 78 79 Note that other exceptions may also be raised during unpickling, including 80 (but not necessarily limited to) AttributeError, EOFError, ImportError, 81 and IndexError. 82 83 """ 84 pass 85 86# An instance of _Stop is raised by Unpickler.load_stop() in response to 87# the STOP opcode, passing the object that is the result of unpickling. 88class _Stop(Exception): 89 def __init__(self, value): 90 self.value = value 91 92# Jython has PyStringMap; it's a dict subclass with string keys 93try: 94 from org.python.core import PyStringMap 95except ImportError: 96 PyStringMap = None 97 98# Pickle opcodes. See pickletools.py for extensive docs. The listing 99# here is in kind-of alphabetical order of 1-character pickle code. 100# pickletools groups them by purpose. 101 102MARK = b'(' # push special markobject on stack 103STOP = b'.' # every pickle ends with STOP 104POP = b'0' # discard topmost stack item 105POP_MARK = b'1' # discard stack top through topmost markobject 106DUP = b'2' # duplicate top stack item 107FLOAT = b'F' # push float object; decimal string argument 108INT = b'I' # push integer or bool; decimal string argument 109BININT = b'J' # push four-byte signed int 110BININT1 = b'K' # push 1-byte unsigned int 111LONG = b'L' # push long; decimal string argument 112BININT2 = b'M' # push 2-byte unsigned int 113NONE = b'N' # push None 114PERSID = b'P' # push persistent object; id is taken from string arg 115BINPERSID = b'Q' # " " " ; " " " " stack 116REDUCE = b'R' # apply callable to argtuple, both on stack 117STRING = b'S' # push string; NL-terminated string argument 118BINSTRING = b'T' # push string; counted binary string argument 119SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes 120UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument 121BINUNICODE = b'X' # " " " ; counted UTF-8 string argument 122APPEND = b'a' # append stack top to list below it 123BUILD = b'b' # call __setstate__ or __dict__.update() 124GLOBAL = b'c' # push self.find_class(modname, name); 2 string args 125DICT = b'd' # build a dict from stack items 126EMPTY_DICT = b'}' # push empty dict 127APPENDS = b'e' # extend list on stack by topmost stack slice 128GET = b'g' # push item from memo on stack; index is string arg 129BINGET = b'h' # " " " " " " ; " " 1-byte arg 130INST = b'i' # build & push class instance 131LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg 132LIST = b'l' # build list from topmost stack items 133EMPTY_LIST = b']' # push empty list 134OBJ = b'o' # build & push class instance 135PUT = b'p' # store stack top in memo; index is string arg 136BINPUT = b'q' # " " " " " ; " " 1-byte arg 137LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg 138SETITEM = b's' # add key+value pair to dict 139TUPLE = b't' # build tuple from topmost stack items 140EMPTY_TUPLE = b')' # push empty tuple 141SETITEMS = b'u' # modify dict by adding topmost key+value pairs 142BINFLOAT = b'G' # push float; arg is 8-byte float encoding 143 144TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py 145FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py 146 147# Protocol 2 148 149PROTO = b'\x80' # identify pickle protocol 150NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple 151EXT1 = b'\x82' # push object from extension registry; 1-byte index 152EXT2 = b'\x83' # ditto, but 2-byte index 153EXT4 = b'\x84' # ditto, but 4-byte index 154TUPLE1 = b'\x85' # build 1-tuple from stack top 155TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items 156TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items 157NEWTRUE = b'\x88' # push True 158NEWFALSE = b'\x89' # push False 159LONG1 = b'\x8a' # push long from < 256 bytes 160LONG4 = b'\x8b' # push really big long 161 162_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3] 163 164# Protocol 3 (Python 3.x) 165 166BINBYTES = b'B' # push bytes; counted binary string argument 167SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes 168 169# Protocol 4 170SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes 171BINUNICODE8 = b'\x8d' # push very long string 172BINBYTES8 = b'\x8e' # push very long bytes string 173EMPTY_SET = b'\x8f' # push empty set on the stack 174ADDITEMS = b'\x90' # modify set by adding topmost stack items 175FROZENSET = b'\x91' # build frozenset from topmost stack items 176NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments 177STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks 178MEMOIZE = b'\x94' # store top of the stack in memo 179FRAME = b'\x95' # indicate the beginning of a new frame 180 181__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)]) 182 183 184class _Framer: 185 186 _FRAME_SIZE_MIN = 4 187 _FRAME_SIZE_TARGET = 64 * 1024 188 189 def __init__(self, file_write): 190 self.file_write = file_write 191 self.current_frame = None 192 193 def start_framing(self): 194 self.current_frame = io.BytesIO() 195 196 def end_framing(self): 197 if self.current_frame and self.current_frame.tell() > 0: 198 self.commit_frame(force=True) 199 self.current_frame = None 200 201 def commit_frame(self, force=False): 202 if self.current_frame: 203 f = self.current_frame 204 if f.tell() >= self._FRAME_SIZE_TARGET or force: 205 data = f.getbuffer() 206 write = self.file_write 207 if len(data) >= self._FRAME_SIZE_MIN: 208 # Issue a single call to the write method of the underlying 209 # file object for the frame opcode with the size of the 210 # frame. The concatenation is expected to be less expensive 211 # than issuing an additional call to write. 212 write(FRAME + pack("<Q", len(data))) 213 214 # Issue a separate call to write to append the frame 215 # contents without concatenation to the above to avoid a 216 # memory copy. 217 write(data) 218 219 # Start the new frame with a new io.BytesIO instance so that 220 # the file object can have delayed access to the previous frame 221 # contents via an unreleased memoryview of the previous 222 # io.BytesIO instance. 223 self.current_frame = io.BytesIO() 224 225 def write(self, data): 226 if self.current_frame: 227 return self.current_frame.write(data) 228 else: 229 return self.file_write(data) 230 231 def write_large_bytes(self, header, payload): 232 write = self.file_write 233 if self.current_frame: 234 # Terminate the current frame and flush it to the file. 235 self.commit_frame(force=True) 236 237 # Perform direct write of the header and payload of the large binary 238 # object. Be careful not to concatenate the header and the payload 239 # prior to calling 'write' as we do not want to allocate a large 240 # temporary bytes object. 241 # We intentionally do not insert a protocol 4 frame opcode to make 242 # it possible to optimize file.read calls in the loader. 243 write(header) 244 write(payload) 245 246 247class _Unframer: 248 249 def __init__(self, file_read, file_readline, file_tell=None): 250 self.file_read = file_read 251 self.file_readline = file_readline 252 self.current_frame = None 253 254 def read(self, n): 255 if self.current_frame: 256 data = self.current_frame.read(n) 257 if not data and n != 0: 258 self.current_frame = None 259 return self.file_read(n) 260 if len(data) < n: 261 raise UnpicklingError( 262 "pickle exhausted before end of frame") 263 return data 264 else: 265 return self.file_read(n) 266 267 def readline(self): 268 if self.current_frame: 269 data = self.current_frame.readline() 270 if not data: 271 self.current_frame = None 272 return self.file_readline() 273 if data[-1] != b'\n'[0]: 274 raise UnpicklingError( 275 "pickle exhausted before end of frame") 276 return data 277 else: 278 return self.file_readline() 279 280 def load_frame(self, frame_size): 281 if self.current_frame and self.current_frame.read() != b'': 282 raise UnpicklingError( 283 "beginning of a new frame before end of current frame") 284 self.current_frame = io.BytesIO(self.file_read(frame_size)) 285 286 287# Tools used for pickling. 288 289def _getattribute(obj, name): 290 for subpath in name.split('.'): 291 if subpath == '<locals>': 292 raise AttributeError("Can't get local attribute {!r} on {!r}" 293 .format(name, obj)) 294 try: 295 parent = obj 296 obj = getattr(obj, subpath) 297 except AttributeError: 298 raise AttributeError("Can't get attribute {!r} on {!r}" 299 .format(name, obj)) from None 300 return obj, parent 301 302def whichmodule(obj, name): 303 """Find the module an object belong to.""" 304 module_name = getattr(obj, '__module__', None) 305 if module_name is not None: 306 return module_name 307 # Protect the iteration by using a list copy of sys.modules against dynamic 308 # modules that trigger imports of other modules upon calls to getattr. 309 for module_name, module in list(sys.modules.items()): 310 if module_name == '__main__' or module is None: 311 continue 312 try: 313 if _getattribute(module, name)[0] is obj: 314 return module_name 315 except AttributeError: 316 pass 317 return '__main__' 318 319def encode_long(x): 320 r"""Encode a long to a two's complement little-endian binary string. 321 Note that 0 is a special case, returning an empty string, to save a 322 byte in the LONG1 pickling context. 323 324 >>> encode_long(0) 325 b'' 326 >>> encode_long(255) 327 b'\xff\x00' 328 >>> encode_long(32767) 329 b'\xff\x7f' 330 >>> encode_long(-256) 331 b'\x00\xff' 332 >>> encode_long(-32768) 333 b'\x00\x80' 334 >>> encode_long(-128) 335 b'\x80' 336 >>> encode_long(127) 337 b'\x7f' 338 >>> 339 """ 340 if x == 0: 341 return b'' 342 nbytes = (x.bit_length() >> 3) + 1 343 result = x.to_bytes(nbytes, byteorder='little', signed=True) 344 if x < 0 and nbytes > 1: 345 if result[-1] == 0xff and (result[-2] & 0x80) != 0: 346 result = result[:-1] 347 return result 348 349def decode_long(data): 350 r"""Decode a long from a two's complement little-endian binary string. 351 352 >>> decode_long(b'') 353 0 354 >>> decode_long(b"\xff\x00") 355 255 356 >>> decode_long(b"\xff\x7f") 357 32767 358 >>> decode_long(b"\x00\xff") 359 -256 360 >>> decode_long(b"\x00\x80") 361 -32768 362 >>> decode_long(b"\x80") 363 -128 364 >>> decode_long(b"\x7f") 365 127 366 """ 367 return int.from_bytes(data, byteorder='little', signed=True) 368 369 370# Pickling machinery 371 372class _Pickler: 373 374 def __init__(self, file, protocol=None, *, fix_imports=True): 375 """This takes a binary file for writing a pickle data stream. 376 377 The optional *protocol* argument tells the pickler to use the 378 given protocol; supported protocols are 0, 1, 2, 3 and 4. The 379 default protocol is 3; a backward-incompatible protocol designed 380 for Python 3. 381 382 Specifying a negative protocol version selects the highest 383 protocol version supported. The higher the protocol used, the 384 more recent the version of Python needed to read the pickle 385 produced. 386 387 The *file* argument must have a write() method that accepts a 388 single bytes argument. It can thus be a file object opened for 389 binary writing, an io.BytesIO instance, or any other custom 390 object that meets this interface. 391 392 If *fix_imports* is True and *protocol* is less than 3, pickle 393 will try to map the new Python 3 names to the old module names 394 used in Python 2, so that the pickle data stream is readable 395 with Python 2. 396 """ 397 if protocol is None: 398 protocol = DEFAULT_PROTOCOL 399 if protocol < 0: 400 protocol = HIGHEST_PROTOCOL 401 elif not 0 <= protocol <= HIGHEST_PROTOCOL: 402 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL) 403 try: 404 self._file_write = file.write 405 except AttributeError: 406 raise TypeError("file must have a 'write' attribute") 407 self.framer = _Framer(self._file_write) 408 self.write = self.framer.write 409 self._write_large_bytes = self.framer.write_large_bytes 410 self.memo = {} 411 self.proto = int(protocol) 412 self.bin = protocol >= 1 413 self.fast = 0 414 self.fix_imports = fix_imports and protocol < 3 415 416 def clear_memo(self): 417 """Clears the pickler's "memo". 418 419 The memo is the data structure that remembers which objects the 420 pickler has already seen, so that shared or recursive objects 421 are pickled by reference and not by value. This method is 422 useful when re-using picklers. 423 """ 424 self.memo.clear() 425 426 def dump(self, obj): 427 """Write a pickled representation of obj to the open file.""" 428 # Check whether Pickler was initialized correctly. This is 429 # only needed to mimic the behavior of _pickle.Pickler.dump(). 430 if not hasattr(self, "_file_write"): 431 raise PicklingError("Pickler.__init__() was not called by " 432 "%s.__init__()" % (self.__class__.__name__,)) 433 if self.proto >= 2: 434 self.write(PROTO + pack("<B", self.proto)) 435 if self.proto >= 4: 436 self.framer.start_framing() 437 self.save(obj) 438 self.write(STOP) 439 self.framer.end_framing() 440 441 def memoize(self, obj): 442 """Store an object in the memo.""" 443 444 # The Pickler memo is a dictionary mapping object ids to 2-tuples 445 # that contain the Unpickler memo key and the object being memoized. 446 # The memo key is written to the pickle and will become 447 # the key in the Unpickler's memo. The object is stored in the 448 # Pickler memo so that transient objects are kept alive during 449 # pickling. 450 451 # The use of the Unpickler memo length as the memo key is just a 452 # convention. The only requirement is that the memo values be unique. 453 # But there appears no advantage to any other scheme, and this 454 # scheme allows the Unpickler memo to be implemented as a plain (but 455 # growable) array, indexed by memo key. 456 if self.fast: 457 return 458 assert id(obj) not in self.memo 459 idx = len(self.memo) 460 self.write(self.put(idx)) 461 self.memo[id(obj)] = idx, obj 462 463 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i. 464 def put(self, idx): 465 if self.proto >= 4: 466 return MEMOIZE 467 elif self.bin: 468 if idx < 256: 469 return BINPUT + pack("<B", idx) 470 else: 471 return LONG_BINPUT + pack("<I", idx) 472 else: 473 return PUT + repr(idx).encode("ascii") + b'\n' 474 475 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i. 476 def get(self, i): 477 if self.bin: 478 if i < 256: 479 return BINGET + pack("<B", i) 480 else: 481 return LONG_BINGET + pack("<I", i) 482 483 return GET + repr(i).encode("ascii") + b'\n' 484 485 def save(self, obj, save_persistent_id=True): 486 self.framer.commit_frame() 487 488 # Check for persistent id (defined by a subclass) 489 pid = self.persistent_id(obj) 490 if pid is not None and save_persistent_id: 491 self.save_pers(pid) 492 return 493 494 # Check the memo 495 x = self.memo.get(id(obj)) 496 if x is not None: 497 self.write(self.get(x[0])) 498 return 499 500 # Check the type dispatch table 501 t = type(obj) 502 f = self.dispatch.get(t) 503 if f is not None: 504 f(self, obj) # Call unbound method with explicit self 505 return 506 507 # Check private dispatch table if any, or else copyreg.dispatch_table 508 reduce = getattr(self, 'dispatch_table', dispatch_table).get(t) 509 if reduce is not None: 510 rv = reduce(obj) 511 else: 512 # Check for a class with a custom metaclass; treat as regular class 513 try: 514 issc = issubclass(t, type) 515 except TypeError: # t is not a class (old Boost; see SF #502085) 516 issc = False 517 if issc: 518 self.save_global(obj) 519 return 520 521 # Check for a __reduce_ex__ method, fall back to __reduce__ 522 reduce = getattr(obj, "__reduce_ex__", None) 523 if reduce is not None: 524 rv = reduce(self.proto) 525 else: 526 reduce = getattr(obj, "__reduce__", None) 527 if reduce is not None: 528 rv = reduce() 529 else: 530 raise PicklingError("Can't pickle %r object: %r" % 531 (t.__name__, obj)) 532 533 # Check for string returned by reduce(), meaning "save as global" 534 if isinstance(rv, str): 535 self.save_global(obj, rv) 536 return 537 538 # Assert that reduce() returned a tuple 539 if not isinstance(rv, tuple): 540 raise PicklingError("%s must return string or tuple" % reduce) 541 542 # Assert that it returned an appropriately sized tuple 543 l = len(rv) 544 if not (2 <= l <= 5): 545 raise PicklingError("Tuple returned by %s must have " 546 "two to five elements" % reduce) 547 548 # Save the reduce() output and finally memoize the object 549 self.save_reduce(obj=obj, *rv) 550 551 def persistent_id(self, obj): 552 # This exists so a subclass can override it 553 return None 554 555 def save_pers(self, pid): 556 # Save a persistent id reference 557 if self.bin: 558 self.save(pid, save_persistent_id=False) 559 self.write(BINPERSID) 560 else: 561 try: 562 self.write(PERSID + str(pid).encode("ascii") + b'\n') 563 except UnicodeEncodeError: 564 raise PicklingError( 565 "persistent IDs in protocol 0 must be ASCII strings") 566 567 def save_reduce(self, func, args, state=None, listitems=None, 568 dictitems=None, obj=None): 569 # This API is called by some subclasses 570 571 if not isinstance(args, tuple): 572 raise PicklingError("args from save_reduce() must be a tuple") 573 if not callable(func): 574 raise PicklingError("func from save_reduce() must be callable") 575 576 save = self.save 577 write = self.write 578 579 func_name = getattr(func, "__name__", "") 580 if self.proto >= 2 and func_name == "__newobj_ex__": 581 cls, args, kwargs = args 582 if not hasattr(cls, "__new__"): 583 raise PicklingError("args[0] from {} args has no __new__" 584 .format(func_name)) 585 if obj is not None and cls is not obj.__class__: 586 raise PicklingError("args[0] from {} args has the wrong class" 587 .format(func_name)) 588 if self.proto >= 4: 589 save(cls) 590 save(args) 591 save(kwargs) 592 write(NEWOBJ_EX) 593 else: 594 func = partial(cls.__new__, cls, *args, **kwargs) 595 save(func) 596 save(()) 597 write(REDUCE) 598 elif self.proto >= 2 and func_name == "__newobj__": 599 # A __reduce__ implementation can direct protocol 2 or newer to 600 # use the more efficient NEWOBJ opcode, while still 601 # allowing protocol 0 and 1 to work normally. For this to 602 # work, the function returned by __reduce__ should be 603 # called __newobj__, and its first argument should be a 604 # class. The implementation for __newobj__ 605 # should be as follows, although pickle has no way to 606 # verify this: 607 # 608 # def __newobj__(cls, *args): 609 # return cls.__new__(cls, *args) 610 # 611 # Protocols 0 and 1 will pickle a reference to __newobj__, 612 # while protocol 2 (and above) will pickle a reference to 613 # cls, the remaining args tuple, and the NEWOBJ code, 614 # which calls cls.__new__(cls, *args) at unpickling time 615 # (see load_newobj below). If __reduce__ returns a 616 # three-tuple, the state from the third tuple item will be 617 # pickled regardless of the protocol, calling __setstate__ 618 # at unpickling time (see load_build below). 619 # 620 # Note that no standard __newobj__ implementation exists; 621 # you have to provide your own. This is to enforce 622 # compatibility with Python 2.2 (pickles written using 623 # protocol 0 or 1 in Python 2.3 should be unpicklable by 624 # Python 2.2). 625 cls = args[0] 626 if not hasattr(cls, "__new__"): 627 raise PicklingError( 628 "args[0] from __newobj__ args has no __new__") 629 if obj is not None and cls is not obj.__class__: 630 raise PicklingError( 631 "args[0] from __newobj__ args has the wrong class") 632 args = args[1:] 633 save(cls) 634 save(args) 635 write(NEWOBJ) 636 else: 637 save(func) 638 save(args) 639 write(REDUCE) 640 641 if obj is not None: 642 # If the object is already in the memo, this means it is 643 # recursive. In this case, throw away everything we put on the 644 # stack, and fetch the object back from the memo. 645 if id(obj) in self.memo: 646 write(POP + self.get(self.memo[id(obj)][0])) 647 else: 648 self.memoize(obj) 649 650 # More new special cases (that work with older protocols as 651 # well): when __reduce__ returns a tuple with 4 or 5 items, 652 # the 4th and 5th item should be iterators that provide list 653 # items and dict items (as (key, value) tuples), or None. 654 655 if listitems is not None: 656 self._batch_appends(listitems) 657 658 if dictitems is not None: 659 self._batch_setitems(dictitems) 660 661 if state is not None: 662 save(state) 663 write(BUILD) 664 665 # Methods below this point are dispatched through the dispatch table 666 667 dispatch = {} 668 669 def save_none(self, obj): 670 self.write(NONE) 671 dispatch[type(None)] = save_none 672 673 def save_bool(self, obj): 674 if self.proto >= 2: 675 self.write(NEWTRUE if obj else NEWFALSE) 676 else: 677 self.write(TRUE if obj else FALSE) 678 dispatch[bool] = save_bool 679 680 def save_long(self, obj): 681 if self.bin: 682 # If the int is small enough to fit in a signed 4-byte 2's-comp 683 # format, we can store it more efficiently than the general 684 # case. 685 # First one- and two-byte unsigned ints: 686 if obj >= 0: 687 if obj <= 0xff: 688 self.write(BININT1 + pack("<B", obj)) 689 return 690 if obj <= 0xffff: 691 self.write(BININT2 + pack("<H", obj)) 692 return 693 # Next check for 4-byte signed ints: 694 if -0x80000000 <= obj <= 0x7fffffff: 695 self.write(BININT + pack("<i", obj)) 696 return 697 if self.proto >= 2: 698 encoded = encode_long(obj) 699 n = len(encoded) 700 if n < 256: 701 self.write(LONG1 + pack("<B", n) + encoded) 702 else: 703 self.write(LONG4 + pack("<i", n) + encoded) 704 return 705 if -0x80000000 <= obj <= 0x7fffffff: 706 self.write(INT + repr(obj).encode("ascii") + b'\n') 707 else: 708 self.write(LONG + repr(obj).encode("ascii") + b'L\n') 709 dispatch[int] = save_long 710 711 def save_float(self, obj): 712 if self.bin: 713 self.write(BINFLOAT + pack('>d', obj)) 714 else: 715 self.write(FLOAT + repr(obj).encode("ascii") + b'\n') 716 dispatch[float] = save_float 717 718 def save_bytes(self, obj): 719 if self.proto < 3: 720 if not obj: # bytes object is empty 721 self.save_reduce(bytes, (), obj=obj) 722 else: 723 self.save_reduce(codecs.encode, 724 (str(obj, 'latin1'), 'latin1'), obj=obj) 725 return 726 n = len(obj) 727 if n <= 0xff: 728 self.write(SHORT_BINBYTES + pack("<B", n) + obj) 729 elif n > 0xffffffff and self.proto >= 4: 730 self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj) 731 elif n >= self.framer._FRAME_SIZE_TARGET: 732 self._write_large_bytes(BINBYTES + pack("<I", n), obj) 733 else: 734 self.write(BINBYTES + pack("<I", n) + obj) 735 self.memoize(obj) 736 dispatch[bytes] = save_bytes 737 738 def save_str(self, obj): 739 if self.bin: 740 encoded = obj.encode('utf-8', 'surrogatepass') 741 n = len(encoded) 742 if n <= 0xff and self.proto >= 4: 743 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded) 744 elif n > 0xffffffff and self.proto >= 4: 745 self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded) 746 elif n >= self.framer._FRAME_SIZE_TARGET: 747 self._write_large_bytes(BINUNICODE + pack("<I", n), encoded) 748 else: 749 self.write(BINUNICODE + pack("<I", n) + encoded) 750 else: 751 obj = obj.replace("\\", "\\u005c") 752 obj = obj.replace("\0", "\\u0000") 753 obj = obj.replace("\n", "\\u000a") 754 obj = obj.replace("\r", "\\u000d") 755 obj = obj.replace("\x1a", "\\u001a") # EOF on DOS 756 self.write(UNICODE + obj.encode('raw-unicode-escape') + 757 b'\n') 758 self.memoize(obj) 759 dispatch[str] = save_str 760 761 def save_tuple(self, obj): 762 if not obj: # tuple is empty 763 if self.bin: 764 self.write(EMPTY_TUPLE) 765 else: 766 self.write(MARK + TUPLE) 767 return 768 769 n = len(obj) 770 save = self.save 771 memo = self.memo 772 if n <= 3 and self.proto >= 2: 773 for element in obj: 774 save(element) 775 # Subtle. Same as in the big comment below. 776 if id(obj) in memo: 777 get = self.get(memo[id(obj)][0]) 778 self.write(POP * n + get) 779 else: 780 self.write(_tuplesize2code[n]) 781 self.memoize(obj) 782 return 783 784 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple 785 # has more than 3 elements. 786 write = self.write 787 write(MARK) 788 for element in obj: 789 save(element) 790 791 if id(obj) in memo: 792 # Subtle. d was not in memo when we entered save_tuple(), so 793 # the process of saving the tuple's elements must have saved 794 # the tuple itself: the tuple is recursive. The proper action 795 # now is to throw away everything we put on the stack, and 796 # simply GET the tuple (it's already constructed). This check 797 # could have been done in the "for element" loop instead, but 798 # recursive tuples are a rare thing. 799 get = self.get(memo[id(obj)][0]) 800 if self.bin: 801 write(POP_MARK + get) 802 else: # proto 0 -- POP_MARK not available 803 write(POP * (n+1) + get) 804 return 805 806 # No recursion. 807 write(TUPLE) 808 self.memoize(obj) 809 810 dispatch[tuple] = save_tuple 811 812 def save_list(self, obj): 813 if self.bin: 814 self.write(EMPTY_LIST) 815 else: # proto 0 -- can't use EMPTY_LIST 816 self.write(MARK + LIST) 817 818 self.memoize(obj) 819 self._batch_appends(obj) 820 821 dispatch[list] = save_list 822 823 _BATCHSIZE = 1000 824 825 def _batch_appends(self, items): 826 # Helper to batch up APPENDS sequences 827 save = self.save 828 write = self.write 829 830 if not self.bin: 831 for x in items: 832 save(x) 833 write(APPEND) 834 return 835 836 it = iter(items) 837 while True: 838 tmp = list(islice(it, self._BATCHSIZE)) 839 n = len(tmp) 840 if n > 1: 841 write(MARK) 842 for x in tmp: 843 save(x) 844 write(APPENDS) 845 elif n: 846 save(tmp[0]) 847 write(APPEND) 848 # else tmp is empty, and we're done 849 if n < self._BATCHSIZE: 850 return 851 852 def save_dict(self, obj): 853 if self.bin: 854 self.write(EMPTY_DICT) 855 else: # proto 0 -- can't use EMPTY_DICT 856 self.write(MARK + DICT) 857 858 self.memoize(obj) 859 self._batch_setitems(obj.items()) 860 861 dispatch[dict] = save_dict 862 if PyStringMap is not None: 863 dispatch[PyStringMap] = save_dict 864 865 def _batch_setitems(self, items): 866 # Helper to batch up SETITEMS sequences; proto >= 1 only 867 save = self.save 868 write = self.write 869 870 if not self.bin: 871 for k, v in items: 872 save(k) 873 save(v) 874 write(SETITEM) 875 return 876 877 it = iter(items) 878 while True: 879 tmp = list(islice(it, self._BATCHSIZE)) 880 n = len(tmp) 881 if n > 1: 882 write(MARK) 883 for k, v in tmp: 884 save(k) 885 save(v) 886 write(SETITEMS) 887 elif n: 888 k, v = tmp[0] 889 save(k) 890 save(v) 891 write(SETITEM) 892 # else tmp is empty, and we're done 893 if n < self._BATCHSIZE: 894 return 895 896 def save_set(self, obj): 897 save = self.save 898 write = self.write 899 900 if self.proto < 4: 901 self.save_reduce(set, (list(obj),), obj=obj) 902 return 903 904 write(EMPTY_SET) 905 self.memoize(obj) 906 907 it = iter(obj) 908 while True: 909 batch = list(islice(it, self._BATCHSIZE)) 910 n = len(batch) 911 if n > 0: 912 write(MARK) 913 for item in batch: 914 save(item) 915 write(ADDITEMS) 916 if n < self._BATCHSIZE: 917 return 918 dispatch[set] = save_set 919 920 def save_frozenset(self, obj): 921 save = self.save 922 write = self.write 923 924 if self.proto < 4: 925 self.save_reduce(frozenset, (list(obj),), obj=obj) 926 return 927 928 write(MARK) 929 for item in obj: 930 save(item) 931 932 if id(obj) in self.memo: 933 # If the object is already in the memo, this means it is 934 # recursive. In this case, throw away everything we put on the 935 # stack, and fetch the object back from the memo. 936 write(POP_MARK + self.get(self.memo[id(obj)][0])) 937 return 938 939 write(FROZENSET) 940 self.memoize(obj) 941 dispatch[frozenset] = save_frozenset 942 943 def save_global(self, obj, name=None): 944 write = self.write 945 memo = self.memo 946 947 if name is None: 948 name = getattr(obj, '__qualname__', None) 949 if name is None: 950 name = obj.__name__ 951 952 module_name = whichmodule(obj, name) 953 try: 954 __import__(module_name, level=0) 955 module = sys.modules[module_name] 956 obj2, parent = _getattribute(module, name) 957 except (ImportError, KeyError, AttributeError): 958 raise PicklingError( 959 "Can't pickle %r: it's not found as %s.%s" % 960 (obj, module_name, name)) from None 961 else: 962 if obj2 is not obj: 963 raise PicklingError( 964 "Can't pickle %r: it's not the same object as %s.%s" % 965 (obj, module_name, name)) 966 967 if self.proto >= 2: 968 code = _extension_registry.get((module_name, name)) 969 if code: 970 assert code > 0 971 if code <= 0xff: 972 write(EXT1 + pack("<B", code)) 973 elif code <= 0xffff: 974 write(EXT2 + pack("<H", code)) 975 else: 976 write(EXT4 + pack("<i", code)) 977 return 978 lastname = name.rpartition('.')[2] 979 if parent is module: 980 name = lastname 981 # Non-ASCII identifiers are supported only with protocols >= 3. 982 if self.proto >= 4: 983 self.save(module_name) 984 self.save(name) 985 write(STACK_GLOBAL) 986 elif parent is not module: 987 self.save_reduce(getattr, (parent, lastname)) 988 elif self.proto >= 3: 989 write(GLOBAL + bytes(module_name, "utf-8") + b'\n' + 990 bytes(name, "utf-8") + b'\n') 991 else: 992 if self.fix_imports: 993 r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING 994 r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING 995 if (module_name, name) in r_name_mapping: 996 module_name, name = r_name_mapping[(module_name, name)] 997 elif module_name in r_import_mapping: 998 module_name = r_import_mapping[module_name] 999 try: 1000 write(GLOBAL + bytes(module_name, "ascii") + b'\n' + 1001 bytes(name, "ascii") + b'\n') 1002 except UnicodeEncodeError: 1003 raise PicklingError( 1004 "can't pickle global identifier '%s.%s' using " 1005 "pickle protocol %i" % (module, name, self.proto)) from None 1006 1007 self.memoize(obj) 1008 1009 def save_type(self, obj): 1010 if obj is type(None): 1011 return self.save_reduce(type, (None,), obj=obj) 1012 elif obj is type(NotImplemented): 1013 return self.save_reduce(type, (NotImplemented,), obj=obj) 1014 elif obj is type(...): 1015 return self.save_reduce(type, (...,), obj=obj) 1016 return self.save_global(obj) 1017 1018 dispatch[FunctionType] = save_global 1019 dispatch[type] = save_type 1020 1021 1022# Unpickling machinery 1023 1024class _Unpickler: 1025 1026 def __init__(self, file, *, fix_imports=True, 1027 encoding="ASCII", errors="strict"): 1028 """This takes a binary file for reading a pickle data stream. 1029 1030 The protocol version of the pickle is detected automatically, so 1031 no proto argument is needed. 1032 1033 The argument *file* must have two methods, a read() method that 1034 takes an integer argument, and a readline() method that requires 1035 no arguments. Both methods should return bytes. Thus *file* 1036 can be a binary file object opened for reading, an io.BytesIO 1037 object, or any other custom object that meets this interface. 1038 1039 The file-like object must have two methods, a read() method 1040 that takes an integer argument, and a readline() method that 1041 requires no arguments. Both methods should return bytes. 1042 Thus file-like object can be a binary file object opened for 1043 reading, a BytesIO object, or any other custom object that 1044 meets this interface. 1045 1046 Optional keyword arguments are *fix_imports*, *encoding* and 1047 *errors*, which are used to control compatibility support for 1048 pickle stream generated by Python 2. If *fix_imports* is True, 1049 pickle will try to map the old Python 2 names to the new names 1050 used in Python 3. The *encoding* and *errors* tell pickle how 1051 to decode 8-bit string instances pickled by Python 2; these 1052 default to 'ASCII' and 'strict', respectively. *encoding* can be 1053 'bytes' to read theses 8-bit string instances as bytes objects. 1054 """ 1055 self._file_readline = file.readline 1056 self._file_read = file.read 1057 self.memo = {} 1058 self.encoding = encoding 1059 self.errors = errors 1060 self.proto = 0 1061 self.fix_imports = fix_imports 1062 1063 def load(self): 1064 """Read a pickled object representation from the open file. 1065 1066 Return the reconstituted object hierarchy specified in the file. 1067 """ 1068 # Check whether Unpickler was initialized correctly. This is 1069 # only needed to mimic the behavior of _pickle.Unpickler.dump(). 1070 if not hasattr(self, "_file_read"): 1071 raise UnpicklingError("Unpickler.__init__() was not called by " 1072 "%s.__init__()" % (self.__class__.__name__,)) 1073 self._unframer = _Unframer(self._file_read, self._file_readline) 1074 self.read = self._unframer.read 1075 self.readline = self._unframer.readline 1076 self.metastack = [] 1077 self.stack = [] 1078 self.append = self.stack.append 1079 self.proto = 0 1080 read = self.read 1081 dispatch = self.dispatch 1082 try: 1083 while True: 1084 key = read(1) 1085 if not key: 1086 raise EOFError 1087 assert isinstance(key, bytes_types) 1088 dispatch[key[0]](self) 1089 except _Stop as stopinst: 1090 return stopinst.value 1091 1092 # Return a list of items pushed in the stack after last MARK instruction. 1093 def pop_mark(self): 1094 items = self.stack 1095 self.stack = self.metastack.pop() 1096 self.append = self.stack.append 1097 return items 1098 1099 def persistent_load(self, pid): 1100 raise UnpicklingError("unsupported persistent id encountered") 1101 1102 dispatch = {} 1103 1104 def load_proto(self): 1105 proto = self.read(1)[0] 1106 if not 0 <= proto <= HIGHEST_PROTOCOL: 1107 raise ValueError("unsupported pickle protocol: %d" % proto) 1108 self.proto = proto 1109 dispatch[PROTO[0]] = load_proto 1110 1111 def load_frame(self): 1112 frame_size, = unpack('<Q', self.read(8)) 1113 if frame_size > sys.maxsize: 1114 raise ValueError("frame size > sys.maxsize: %d" % frame_size) 1115 self._unframer.load_frame(frame_size) 1116 dispatch[FRAME[0]] = load_frame 1117 1118 def load_persid(self): 1119 try: 1120 pid = self.readline()[:-1].decode("ascii") 1121 except UnicodeDecodeError: 1122 raise UnpicklingError( 1123 "persistent IDs in protocol 0 must be ASCII strings") 1124 self.append(self.persistent_load(pid)) 1125 dispatch[PERSID[0]] = load_persid 1126 1127 def load_binpersid(self): 1128 pid = self.stack.pop() 1129 self.append(self.persistent_load(pid)) 1130 dispatch[BINPERSID[0]] = load_binpersid 1131 1132 def load_none(self): 1133 self.append(None) 1134 dispatch[NONE[0]] = load_none 1135 1136 def load_false(self): 1137 self.append(False) 1138 dispatch[NEWFALSE[0]] = load_false 1139 1140 def load_true(self): 1141 self.append(True) 1142 dispatch[NEWTRUE[0]] = load_true 1143 1144 def load_int(self): 1145 data = self.readline() 1146 if data == FALSE[1:]: 1147 val = False 1148 elif data == TRUE[1:]: 1149 val = True 1150 else: 1151 val = int(data, 0) 1152 self.append(val) 1153 dispatch[INT[0]] = load_int 1154 1155 def load_binint(self): 1156 self.append(unpack('<i', self.read(4))[0]) 1157 dispatch[BININT[0]] = load_binint 1158 1159 def load_binint1(self): 1160 self.append(self.read(1)[0]) 1161 dispatch[BININT1[0]] = load_binint1 1162 1163 def load_binint2(self): 1164 self.append(unpack('<H', self.read(2))[0]) 1165 dispatch[BININT2[0]] = load_binint2 1166 1167 def load_long(self): 1168 val = self.readline()[:-1] 1169 if val and val[-1] == b'L'[0]: 1170 val = val[:-1] 1171 self.append(int(val, 0)) 1172 dispatch[LONG[0]] = load_long 1173 1174 def load_long1(self): 1175 n = self.read(1)[0] 1176 data = self.read(n) 1177 self.append(decode_long(data)) 1178 dispatch[LONG1[0]] = load_long1 1179 1180 def load_long4(self): 1181 n, = unpack('<i', self.read(4)) 1182 if n < 0: 1183 # Corrupt or hostile pickle -- we never write one like this 1184 raise UnpicklingError("LONG pickle has negative byte count") 1185 data = self.read(n) 1186 self.append(decode_long(data)) 1187 dispatch[LONG4[0]] = load_long4 1188 1189 def load_float(self): 1190 self.append(float(self.readline()[:-1])) 1191 dispatch[FLOAT[0]] = load_float 1192 1193 def load_binfloat(self): 1194 self.append(unpack('>d', self.read(8))[0]) 1195 dispatch[BINFLOAT[0]] = load_binfloat 1196 1197 def _decode_string(self, value): 1198 # Used to allow strings from Python 2 to be decoded either as 1199 # bytes or Unicode strings. This should be used only with the 1200 # STRING, BINSTRING and SHORT_BINSTRING opcodes. 1201 if self.encoding == "bytes": 1202 return value 1203 else: 1204 return value.decode(self.encoding, self.errors) 1205 1206 def load_string(self): 1207 data = self.readline()[:-1] 1208 # Strip outermost quotes 1209 if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'': 1210 data = data[1:-1] 1211 else: 1212 raise UnpicklingError("the STRING opcode argument must be quoted") 1213 self.append(self._decode_string(codecs.escape_decode(data)[0])) 1214 dispatch[STRING[0]] = load_string 1215 1216 def load_binstring(self): 1217 # Deprecated BINSTRING uses signed 32-bit length 1218 len, = unpack('<i', self.read(4)) 1219 if len < 0: 1220 raise UnpicklingError("BINSTRING pickle has negative byte count") 1221 data = self.read(len) 1222 self.append(self._decode_string(data)) 1223 dispatch[BINSTRING[0]] = load_binstring 1224 1225 def load_binbytes(self): 1226 len, = unpack('<I', self.read(4)) 1227 if len > maxsize: 1228 raise UnpicklingError("BINBYTES exceeds system's maximum size " 1229 "of %d bytes" % maxsize) 1230 self.append(self.read(len)) 1231 dispatch[BINBYTES[0]] = load_binbytes 1232 1233 def load_unicode(self): 1234 self.append(str(self.readline()[:-1], 'raw-unicode-escape')) 1235 dispatch[UNICODE[0]] = load_unicode 1236 1237 def load_binunicode(self): 1238 len, = unpack('<I', self.read(4)) 1239 if len > maxsize: 1240 raise UnpicklingError("BINUNICODE exceeds system's maximum size " 1241 "of %d bytes" % maxsize) 1242 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1243 dispatch[BINUNICODE[0]] = load_binunicode 1244 1245 def load_binunicode8(self): 1246 len, = unpack('<Q', self.read(8)) 1247 if len > maxsize: 1248 raise UnpicklingError("BINUNICODE8 exceeds system's maximum size " 1249 "of %d bytes" % maxsize) 1250 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1251 dispatch[BINUNICODE8[0]] = load_binunicode8 1252 1253 def load_binbytes8(self): 1254 len, = unpack('<Q', self.read(8)) 1255 if len > maxsize: 1256 raise UnpicklingError("BINBYTES8 exceeds system's maximum size " 1257 "of %d bytes" % maxsize) 1258 self.append(self.read(len)) 1259 dispatch[BINBYTES8[0]] = load_binbytes8 1260 1261 def load_short_binstring(self): 1262 len = self.read(1)[0] 1263 data = self.read(len) 1264 self.append(self._decode_string(data)) 1265 dispatch[SHORT_BINSTRING[0]] = load_short_binstring 1266 1267 def load_short_binbytes(self): 1268 len = self.read(1)[0] 1269 self.append(self.read(len)) 1270 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes 1271 1272 def load_short_binunicode(self): 1273 len = self.read(1)[0] 1274 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1275 dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode 1276 1277 def load_tuple(self): 1278 items = self.pop_mark() 1279 self.append(tuple(items)) 1280 dispatch[TUPLE[0]] = load_tuple 1281 1282 def load_empty_tuple(self): 1283 self.append(()) 1284 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple 1285 1286 def load_tuple1(self): 1287 self.stack[-1] = (self.stack[-1],) 1288 dispatch[TUPLE1[0]] = load_tuple1 1289 1290 def load_tuple2(self): 1291 self.stack[-2:] = [(self.stack[-2], self.stack[-1])] 1292 dispatch[TUPLE2[0]] = load_tuple2 1293 1294 def load_tuple3(self): 1295 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])] 1296 dispatch[TUPLE3[0]] = load_tuple3 1297 1298 def load_empty_list(self): 1299 self.append([]) 1300 dispatch[EMPTY_LIST[0]] = load_empty_list 1301 1302 def load_empty_dictionary(self): 1303 self.append({}) 1304 dispatch[EMPTY_DICT[0]] = load_empty_dictionary 1305 1306 def load_empty_set(self): 1307 self.append(set()) 1308 dispatch[EMPTY_SET[0]] = load_empty_set 1309 1310 def load_frozenset(self): 1311 items = self.pop_mark() 1312 self.append(frozenset(items)) 1313 dispatch[FROZENSET[0]] = load_frozenset 1314 1315 def load_list(self): 1316 items = self.pop_mark() 1317 self.append(items) 1318 dispatch[LIST[0]] = load_list 1319 1320 def load_dict(self): 1321 items = self.pop_mark() 1322 d = {items[i]: items[i+1] 1323 for i in range(0, len(items), 2)} 1324 self.append(d) 1325 dispatch[DICT[0]] = load_dict 1326 1327 # INST and OBJ differ only in how they get a class object. It's not 1328 # only sensible to do the rest in a common routine, the two routines 1329 # previously diverged and grew different bugs. 1330 # klass is the class to instantiate, and k points to the topmost mark 1331 # object, following which are the arguments for klass.__init__. 1332 def _instantiate(self, klass, args): 1333 if (args or not isinstance(klass, type) or 1334 hasattr(klass, "__getinitargs__")): 1335 try: 1336 value = klass(*args) 1337 except TypeError as err: 1338 raise TypeError("in constructor for %s: %s" % 1339 (klass.__name__, str(err)), sys.exc_info()[2]) 1340 else: 1341 value = klass.__new__(klass) 1342 self.append(value) 1343 1344 def load_inst(self): 1345 module = self.readline()[:-1].decode("ascii") 1346 name = self.readline()[:-1].decode("ascii") 1347 klass = self.find_class(module, name) 1348 self._instantiate(klass, self.pop_mark()) 1349 dispatch[INST[0]] = load_inst 1350 1351 def load_obj(self): 1352 # Stack is ... markobject classobject arg1 arg2 ... 1353 args = self.pop_mark() 1354 cls = args.pop(0) 1355 self._instantiate(cls, args) 1356 dispatch[OBJ[0]] = load_obj 1357 1358 def load_newobj(self): 1359 args = self.stack.pop() 1360 cls = self.stack.pop() 1361 obj = cls.__new__(cls, *args) 1362 self.append(obj) 1363 dispatch[NEWOBJ[0]] = load_newobj 1364 1365 def load_newobj_ex(self): 1366 kwargs = self.stack.pop() 1367 args = self.stack.pop() 1368 cls = self.stack.pop() 1369 obj = cls.__new__(cls, *args, **kwargs) 1370 self.append(obj) 1371 dispatch[NEWOBJ_EX[0]] = load_newobj_ex 1372 1373 def load_global(self): 1374 module = self.readline()[:-1].decode("utf-8") 1375 name = self.readline()[:-1].decode("utf-8") 1376 klass = self.find_class(module, name) 1377 self.append(klass) 1378 dispatch[GLOBAL[0]] = load_global 1379 1380 def load_stack_global(self): 1381 name = self.stack.pop() 1382 module = self.stack.pop() 1383 if type(name) is not str or type(module) is not str: 1384 raise UnpicklingError("STACK_GLOBAL requires str") 1385 self.append(self.find_class(module, name)) 1386 dispatch[STACK_GLOBAL[0]] = load_stack_global 1387 1388 def load_ext1(self): 1389 code = self.read(1)[0] 1390 self.get_extension(code) 1391 dispatch[EXT1[0]] = load_ext1 1392 1393 def load_ext2(self): 1394 code, = unpack('<H', self.read(2)) 1395 self.get_extension(code) 1396 dispatch[EXT2[0]] = load_ext2 1397 1398 def load_ext4(self): 1399 code, = unpack('<i', self.read(4)) 1400 self.get_extension(code) 1401 dispatch[EXT4[0]] = load_ext4 1402 1403 def get_extension(self, code): 1404 nil = [] 1405 obj = _extension_cache.get(code, nil) 1406 if obj is not nil: 1407 self.append(obj) 1408 return 1409 key = _inverted_registry.get(code) 1410 if not key: 1411 if code <= 0: # note that 0 is forbidden 1412 # Corrupt or hostile pickle. 1413 raise UnpicklingError("EXT specifies code <= 0") 1414 raise ValueError("unregistered extension code %d" % code) 1415 obj = self.find_class(*key) 1416 _extension_cache[code] = obj 1417 self.append(obj) 1418 1419 def find_class(self, module, name): 1420 # Subclasses may override this. 1421 if self.proto < 3 and self.fix_imports: 1422 if (module, name) in _compat_pickle.NAME_MAPPING: 1423 module, name = _compat_pickle.NAME_MAPPING[(module, name)] 1424 elif module in _compat_pickle.IMPORT_MAPPING: 1425 module = _compat_pickle.IMPORT_MAPPING[module] 1426 __import__(module, level=0) 1427 if self.proto >= 4: 1428 return _getattribute(sys.modules[module], name)[0] 1429 else: 1430 return getattr(sys.modules[module], name) 1431 1432 def load_reduce(self): 1433 stack = self.stack 1434 args = stack.pop() 1435 func = stack[-1] 1436 stack[-1] = func(*args) 1437 dispatch[REDUCE[0]] = load_reduce 1438 1439 def load_pop(self): 1440 if self.stack: 1441 del self.stack[-1] 1442 else: 1443 self.pop_mark() 1444 dispatch[POP[0]] = load_pop 1445 1446 def load_pop_mark(self): 1447 self.pop_mark() 1448 dispatch[POP_MARK[0]] = load_pop_mark 1449 1450 def load_dup(self): 1451 self.append(self.stack[-1]) 1452 dispatch[DUP[0]] = load_dup 1453 1454 def load_get(self): 1455 i = int(self.readline()[:-1]) 1456 self.append(self.memo[i]) 1457 dispatch[GET[0]] = load_get 1458 1459 def load_binget(self): 1460 i = self.read(1)[0] 1461 self.append(self.memo[i]) 1462 dispatch[BINGET[0]] = load_binget 1463 1464 def load_long_binget(self): 1465 i, = unpack('<I', self.read(4)) 1466 self.append(self.memo[i]) 1467 dispatch[LONG_BINGET[0]] = load_long_binget 1468 1469 def load_put(self): 1470 i = int(self.readline()[:-1]) 1471 if i < 0: 1472 raise ValueError("negative PUT argument") 1473 self.memo[i] = self.stack[-1] 1474 dispatch[PUT[0]] = load_put 1475 1476 def load_binput(self): 1477 i = self.read(1)[0] 1478 if i < 0: 1479 raise ValueError("negative BINPUT argument") 1480 self.memo[i] = self.stack[-1] 1481 dispatch[BINPUT[0]] = load_binput 1482 1483 def load_long_binput(self): 1484 i, = unpack('<I', self.read(4)) 1485 if i > maxsize: 1486 raise ValueError("negative LONG_BINPUT argument") 1487 self.memo[i] = self.stack[-1] 1488 dispatch[LONG_BINPUT[0]] = load_long_binput 1489 1490 def load_memoize(self): 1491 memo = self.memo 1492 memo[len(memo)] = self.stack[-1] 1493 dispatch[MEMOIZE[0]] = load_memoize 1494 1495 def load_append(self): 1496 stack = self.stack 1497 value = stack.pop() 1498 list = stack[-1] 1499 list.append(value) 1500 dispatch[APPEND[0]] = load_append 1501 1502 def load_appends(self): 1503 items = self.pop_mark() 1504 list_obj = self.stack[-1] 1505 try: 1506 extend = list_obj.extend 1507 except AttributeError: 1508 pass 1509 else: 1510 extend(items) 1511 return 1512 # Even if the PEP 307 requires extend() and append() methods, 1513 # fall back on append() if the object has no extend() method 1514 # for backward compatibility. 1515 append = list_obj.append 1516 for item in items: 1517 append(item) 1518 dispatch[APPENDS[0]] = load_appends 1519 1520 def load_setitem(self): 1521 stack = self.stack 1522 value = stack.pop() 1523 key = stack.pop() 1524 dict = stack[-1] 1525 dict[key] = value 1526 dispatch[SETITEM[0]] = load_setitem 1527 1528 def load_setitems(self): 1529 items = self.pop_mark() 1530 dict = self.stack[-1] 1531 for i in range(0, len(items), 2): 1532 dict[items[i]] = items[i + 1] 1533 dispatch[SETITEMS[0]] = load_setitems 1534 1535 def load_additems(self): 1536 items = self.pop_mark() 1537 set_obj = self.stack[-1] 1538 if isinstance(set_obj, set): 1539 set_obj.update(items) 1540 else: 1541 add = set_obj.add 1542 for item in items: 1543 add(item) 1544 dispatch[ADDITEMS[0]] = load_additems 1545 1546 def load_build(self): 1547 stack = self.stack 1548 state = stack.pop() 1549 inst = stack[-1] 1550 setstate = getattr(inst, "__setstate__", None) 1551 if setstate is not None: 1552 setstate(state) 1553 return 1554 slotstate = None 1555 if isinstance(state, tuple) and len(state) == 2: 1556 state, slotstate = state 1557 if state: 1558 inst_dict = inst.__dict__ 1559 intern = sys.intern 1560 for k, v in state.items(): 1561 if type(k) is str: 1562 inst_dict[intern(k)] = v 1563 else: 1564 inst_dict[k] = v 1565 if slotstate: 1566 for k, v in slotstate.items(): 1567 setattr(inst, k, v) 1568 dispatch[BUILD[0]] = load_build 1569 1570 def load_mark(self): 1571 self.metastack.append(self.stack) 1572 self.stack = [] 1573 self.append = self.stack.append 1574 dispatch[MARK[0]] = load_mark 1575 1576 def load_stop(self): 1577 value = self.stack.pop() 1578 raise _Stop(value) 1579 dispatch[STOP[0]] = load_stop 1580 1581 1582# Shorthands 1583 1584def _dump(obj, file, protocol=None, *, fix_imports=True): 1585 _Pickler(file, protocol, fix_imports=fix_imports).dump(obj) 1586 1587def _dumps(obj, protocol=None, *, fix_imports=True): 1588 f = io.BytesIO() 1589 _Pickler(f, protocol, fix_imports=fix_imports).dump(obj) 1590 res = f.getvalue() 1591 assert isinstance(res, bytes_types) 1592 return res 1593 1594def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"): 1595 return _Unpickler(file, fix_imports=fix_imports, 1596 encoding=encoding, errors=errors).load() 1597 1598def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"): 1599 if isinstance(s, str): 1600 raise TypeError("Can't load pickle from unicode string") 1601 file = io.BytesIO(s) 1602 return _Unpickler(file, fix_imports=fix_imports, 1603 encoding=encoding, errors=errors).load() 1604 1605# Use the faster _pickle if possible 1606try: 1607 from _pickle import ( 1608 PickleError, 1609 PicklingError, 1610 UnpicklingError, 1611 Pickler, 1612 Unpickler, 1613 dump, 1614 dumps, 1615 load, 1616 loads 1617 ) 1618except ImportError: 1619 Pickler, Unpickler = _Pickler, _Unpickler 1620 dump, dumps, load, loads = _dump, _dumps, _load, _loads 1621 1622# Doctest 1623def _test(): 1624 import doctest 1625 return doctest.testmod() 1626 1627if __name__ == "__main__": 1628 import argparse 1629 parser = argparse.ArgumentParser( 1630 description='display contents of the pickle files') 1631 parser.add_argument( 1632 'pickle_file', type=argparse.FileType('br'), 1633 nargs='*', help='the pickle file') 1634 parser.add_argument( 1635 '-t', '--test', action='store_true', 1636 help='run self-test suite') 1637 parser.add_argument( 1638 '-v', action='store_true', 1639 help='run verbosely; only affects self-test run') 1640 args = parser.parse_args() 1641 if args.test: 1642 _test() 1643 else: 1644 if not args.pickle_file: 1645 parser.print_help() 1646 else: 1647 import pprint 1648 for f in args.pickle_file: 1649 obj = load(f) 1650 pprint.pprint(obj) 1651