1 
2 /* Core extension modules are built-in on some platforms (e.g. Windows). */
3 #ifdef Py_BUILD_CORE
4 #define Py_BUILD_CORE_BUILTIN
5 #undef Py_BUILD_CORE
6 #endif
7 
8 #include "Python.h"
9 #include "structmember.h"
10 
11 PyDoc_STRVAR(pickle_module_doc,
12 "Optimized C implementation for the Python pickle module.");
13 
14 /*[clinic input]
15 module _pickle
16 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
17 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
18 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
19 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
20 [clinic start generated code]*/
21 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
22 
23 /* Bump this when new opcodes are added to the pickle protocol. */
24 enum {
25     HIGHEST_PROTOCOL = 4,
26     DEFAULT_PROTOCOL = 3
27 };
28 
29 /* Pickle opcodes. These must be kept updated with pickle.py.
30    Extensive docs are in pickletools.py. */
31 enum opcode {
32     MARK            = '(',
33     STOP            = '.',
34     POP             = '0',
35     POP_MARK        = '1',
36     DUP             = '2',
37     FLOAT           = 'F',
38     INT             = 'I',
39     BININT          = 'J',
40     BININT1         = 'K',
41     LONG            = 'L',
42     BININT2         = 'M',
43     NONE            = 'N',
44     PERSID          = 'P',
45     BINPERSID       = 'Q',
46     REDUCE          = 'R',
47     STRING          = 'S',
48     BINSTRING       = 'T',
49     SHORT_BINSTRING = 'U',
50     UNICODE         = 'V',
51     BINUNICODE      = 'X',
52     APPEND          = 'a',
53     BUILD           = 'b',
54     GLOBAL          = 'c',
55     DICT            = 'd',
56     EMPTY_DICT      = '}',
57     APPENDS         = 'e',
58     GET             = 'g',
59     BINGET          = 'h',
60     INST            = 'i',
61     LONG_BINGET     = 'j',
62     LIST            = 'l',
63     EMPTY_LIST      = ']',
64     OBJ             = 'o',
65     PUT             = 'p',
66     BINPUT          = 'q',
67     LONG_BINPUT     = 'r',
68     SETITEM         = 's',
69     TUPLE           = 't',
70     EMPTY_TUPLE     = ')',
71     SETITEMS        = 'u',
72     BINFLOAT        = 'G',
73 
74     /* Protocol 2. */
75     PROTO       = '\x80',
76     NEWOBJ      = '\x81',
77     EXT1        = '\x82',
78     EXT2        = '\x83',
79     EXT4        = '\x84',
80     TUPLE1      = '\x85',
81     TUPLE2      = '\x86',
82     TUPLE3      = '\x87',
83     NEWTRUE     = '\x88',
84     NEWFALSE    = '\x89',
85     LONG1       = '\x8a',
86     LONG4       = '\x8b',
87 
88     /* Protocol 3 (Python 3.x) */
89     BINBYTES       = 'B',
90     SHORT_BINBYTES = 'C',
91 
92     /* Protocol 4 */
93     SHORT_BINUNICODE = '\x8c',
94     BINUNICODE8      = '\x8d',
95     BINBYTES8        = '\x8e',
96     EMPTY_SET        = '\x8f',
97     ADDITEMS         = '\x90',
98     FROZENSET        = '\x91',
99     NEWOBJ_EX        = '\x92',
100     STACK_GLOBAL     = '\x93',
101     MEMOIZE          = '\x94',
102     FRAME            = '\x95'
103 };
104 
105 enum {
106    /* Keep in synch with pickle.Pickler._BATCHSIZE.  This is how many elements
107       batch_list/dict() pumps out before doing APPENDS/SETITEMS.  Nothing will
108       break if this gets out of synch with pickle.py, but it's unclear that would
109       help anything either. */
110     BATCHSIZE = 1000,
111 
112     /* Nesting limit until Pickler, when running in "fast mode", starts
113        checking for self-referential data-structures. */
114     FAST_NESTING_LIMIT = 50,
115 
116     /* Initial size of the write buffer of Pickler. */
117     WRITE_BUF_SIZE = 4096,
118 
119     /* Prefetch size when unpickling (disabled on unpeekable streams) */
120     PREFETCH = 8192 * 16,
121 
122     FRAME_SIZE_MIN = 4,
123     FRAME_SIZE_TARGET = 64 * 1024,
124     FRAME_HEADER_SIZE = 9
125 };
126 
127 /*************************************************************************/
128 
129 /* State of the pickle module, per PEP 3121. */
130 typedef struct {
131     /* Exception classes for pickle. */
132     PyObject *PickleError;
133     PyObject *PicklingError;
134     PyObject *UnpicklingError;
135 
136     /* copyreg.dispatch_table, {type_object: pickling_function} */
137     PyObject *dispatch_table;
138 
139     /* For the extension opcodes EXT1, EXT2 and EXT4. */
140 
141     /* copyreg._extension_registry, {(module_name, function_name): code} */
142     PyObject *extension_registry;
143     /* copyreg._extension_cache, {code: object} */
144     PyObject *extension_cache;
145     /* copyreg._inverted_registry, {code: (module_name, function_name)} */
146     PyObject *inverted_registry;
147 
148     /* Import mappings for compatibility with Python 2.x */
149 
150     /* _compat_pickle.NAME_MAPPING,
151        {(oldmodule, oldname): (newmodule, newname)} */
152     PyObject *name_mapping_2to3;
153     /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
154     PyObject *import_mapping_2to3;
155     /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
156     PyObject *name_mapping_3to2;
157     PyObject *import_mapping_3to2;
158 
159     /* codecs.encode, used for saving bytes in older protocols */
160     PyObject *codecs_encode;
161     /* builtins.getattr, used for saving nested names with protocol < 4 */
162     PyObject *getattr;
163     /* functools.partial, used for implementing __newobj_ex__ with protocols
164        2 and 3 */
165     PyObject *partial;
166 } PickleState;
167 
168 /* Forward declaration of the _pickle module definition. */
169 static struct PyModuleDef _picklemodule;
170 
171 /* Given a module object, get its per-module state. */
172 static PickleState *
_Pickle_GetState(PyObject * module)173 _Pickle_GetState(PyObject *module)
174 {
175     return (PickleState *)PyModule_GetState(module);
176 }
177 
178 /* Find the module instance imported in the currently running sub-interpreter
179    and get its state. */
180 static PickleState *
_Pickle_GetGlobalState(void)181 _Pickle_GetGlobalState(void)
182 {
183     return _Pickle_GetState(PyState_FindModule(&_picklemodule));
184 }
185 
186 /* Clear the given pickle module state. */
187 static void
_Pickle_ClearState(PickleState * st)188 _Pickle_ClearState(PickleState *st)
189 {
190     Py_CLEAR(st->PickleError);
191     Py_CLEAR(st->PicklingError);
192     Py_CLEAR(st->UnpicklingError);
193     Py_CLEAR(st->dispatch_table);
194     Py_CLEAR(st->extension_registry);
195     Py_CLEAR(st->extension_cache);
196     Py_CLEAR(st->inverted_registry);
197     Py_CLEAR(st->name_mapping_2to3);
198     Py_CLEAR(st->import_mapping_2to3);
199     Py_CLEAR(st->name_mapping_3to2);
200     Py_CLEAR(st->import_mapping_3to2);
201     Py_CLEAR(st->codecs_encode);
202     Py_CLEAR(st->getattr);
203     Py_CLEAR(st->partial);
204 }
205 
206 /* Initialize the given pickle module state. */
207 static int
_Pickle_InitState(PickleState * st)208 _Pickle_InitState(PickleState *st)
209 {
210     PyObject *copyreg = NULL;
211     PyObject *compat_pickle = NULL;
212     PyObject *codecs = NULL;
213     PyObject *functools = NULL;
214     _Py_IDENTIFIER(getattr);
215 
216     st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
217     if (st->getattr == NULL)
218         goto error;
219 
220     copyreg = PyImport_ImportModule("copyreg");
221     if (!copyreg)
222         goto error;
223     st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
224     if (!st->dispatch_table)
225         goto error;
226     if (!PyDict_CheckExact(st->dispatch_table)) {
227         PyErr_Format(PyExc_RuntimeError,
228                      "copyreg.dispatch_table should be a dict, not %.200s",
229                      Py_TYPE(st->dispatch_table)->tp_name);
230         goto error;
231     }
232     st->extension_registry = \
233         PyObject_GetAttrString(copyreg, "_extension_registry");
234     if (!st->extension_registry)
235         goto error;
236     if (!PyDict_CheckExact(st->extension_registry)) {
237         PyErr_Format(PyExc_RuntimeError,
238                      "copyreg._extension_registry should be a dict, "
239                      "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
240         goto error;
241     }
242     st->inverted_registry = \
243         PyObject_GetAttrString(copyreg, "_inverted_registry");
244     if (!st->inverted_registry)
245         goto error;
246     if (!PyDict_CheckExact(st->inverted_registry)) {
247         PyErr_Format(PyExc_RuntimeError,
248                      "copyreg._inverted_registry should be a dict, "
249                      "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
250         goto error;
251     }
252     st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
253     if (!st->extension_cache)
254         goto error;
255     if (!PyDict_CheckExact(st->extension_cache)) {
256         PyErr_Format(PyExc_RuntimeError,
257                      "copyreg._extension_cache should be a dict, "
258                      "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
259         goto error;
260     }
261     Py_CLEAR(copyreg);
262 
263     /* Load the 2.x -> 3.x stdlib module mapping tables */
264     compat_pickle = PyImport_ImportModule("_compat_pickle");
265     if (!compat_pickle)
266         goto error;
267     st->name_mapping_2to3 = \
268         PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
269     if (!st->name_mapping_2to3)
270         goto error;
271     if (!PyDict_CheckExact(st->name_mapping_2to3)) {
272         PyErr_Format(PyExc_RuntimeError,
273                      "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
274                      Py_TYPE(st->name_mapping_2to3)->tp_name);
275         goto error;
276     }
277     st->import_mapping_2to3 = \
278         PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
279     if (!st->import_mapping_2to3)
280         goto error;
281     if (!PyDict_CheckExact(st->import_mapping_2to3)) {
282         PyErr_Format(PyExc_RuntimeError,
283                      "_compat_pickle.IMPORT_MAPPING should be a dict, "
284                      "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
285         goto error;
286     }
287     /* ... and the 3.x -> 2.x mapping tables */
288     st->name_mapping_3to2 = \
289         PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
290     if (!st->name_mapping_3to2)
291         goto error;
292     if (!PyDict_CheckExact(st->name_mapping_3to2)) {
293         PyErr_Format(PyExc_RuntimeError,
294                      "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
295                      "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
296         goto error;
297     }
298     st->import_mapping_3to2 = \
299         PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
300     if (!st->import_mapping_3to2)
301         goto error;
302     if (!PyDict_CheckExact(st->import_mapping_3to2)) {
303         PyErr_Format(PyExc_RuntimeError,
304                      "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
305                      "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
306         goto error;
307     }
308     Py_CLEAR(compat_pickle);
309 
310     codecs = PyImport_ImportModule("codecs");
311     if (codecs == NULL)
312         goto error;
313     st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
314     if (st->codecs_encode == NULL) {
315         goto error;
316     }
317     if (!PyCallable_Check(st->codecs_encode)) {
318         PyErr_Format(PyExc_RuntimeError,
319                      "codecs.encode should be a callable, not %.200s",
320                      Py_TYPE(st->codecs_encode)->tp_name);
321         goto error;
322     }
323     Py_CLEAR(codecs);
324 
325     functools = PyImport_ImportModule("functools");
326     if (!functools)
327         goto error;
328     st->partial = PyObject_GetAttrString(functools, "partial");
329     if (!st->partial)
330         goto error;
331     Py_CLEAR(functools);
332 
333     return 0;
334 
335   error:
336     Py_CLEAR(copyreg);
337     Py_CLEAR(compat_pickle);
338     Py_CLEAR(codecs);
339     Py_CLEAR(functools);
340     _Pickle_ClearState(st);
341     return -1;
342 }
343 
344 /* Helper for calling a function with a single argument quickly.
345 
346    This function steals the reference of the given argument. */
347 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)348 _Pickle_FastCall(PyObject *func, PyObject *obj)
349 {
350     PyObject *result;
351 
352     result = PyObject_CallFunctionObjArgs(func, obj, NULL);
353     Py_DECREF(obj);
354     return result;
355 }
356 
357 /*************************************************************************/
358 
359 /* Retrieve and deconstruct a method for avoiding a reference cycle
360    (pickler -> bound method of pickler -> pickler) */
361 static int
init_method_ref(PyObject * self,_Py_Identifier * name,PyObject ** method_func,PyObject ** method_self)362 init_method_ref(PyObject *self, _Py_Identifier *name,
363                 PyObject **method_func, PyObject **method_self)
364 {
365     PyObject *func, *func2;
366     int ret;
367 
368     /* *method_func and *method_self should be consistent.  All refcount decrements
369        should be occurred after setting *method_self and *method_func. */
370     ret = _PyObject_LookupAttrId(self, name, &func);
371     if (func == NULL) {
372         *method_self = NULL;
373         Py_CLEAR(*method_func);
374         return ret;
375     }
376 
377     if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
378         /* Deconstruct a bound Python method */
379         func2 = PyMethod_GET_FUNCTION(func);
380         Py_INCREF(func2);
381         *method_self = self; /* borrowed */
382         Py_XSETREF(*method_func, func2);
383         Py_DECREF(func);
384         return 0;
385     }
386     else {
387         *method_self = NULL;
388         Py_XSETREF(*method_func, func);
389         return 0;
390     }
391 }
392 
393 /* Bind a method if it was deconstructed */
394 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)395 reconstruct_method(PyObject *func, PyObject *self)
396 {
397     if (self) {
398         return PyMethod_New(func, self);
399     }
400     else {
401         Py_INCREF(func);
402         return func;
403     }
404 }
405 
406 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)407 call_method(PyObject *func, PyObject *self, PyObject *obj)
408 {
409     if (self) {
410         return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
411     }
412     else {
413         return PyObject_CallFunctionObjArgs(func, obj, NULL);
414     }
415 }
416 
417 /*************************************************************************/
418 
419 /* Internal data type used as the unpickling stack. */
420 typedef struct {
421     PyObject_VAR_HEAD
422     PyObject **data;
423     int mark_set;          /* is MARK set? */
424     Py_ssize_t fence;      /* position of top MARK or 0 */
425     Py_ssize_t allocated;  /* number of slots in data allocated */
426 } Pdata;
427 
428 static void
Pdata_dealloc(Pdata * self)429 Pdata_dealloc(Pdata *self)
430 {
431     Py_ssize_t i = Py_SIZE(self);
432     while (--i >= 0) {
433         Py_DECREF(self->data[i]);
434     }
435     PyMem_FREE(self->data);
436     PyObject_Del(self);
437 }
438 
439 static PyTypeObject Pdata_Type = {
440     PyVarObject_HEAD_INIT(NULL, 0)
441     "_pickle.Pdata",              /*tp_name*/
442     sizeof(Pdata),                /*tp_basicsize*/
443     sizeof(PyObject *),           /*tp_itemsize*/
444     (destructor)Pdata_dealloc,    /*tp_dealloc*/
445 };
446 
447 static PyObject *
Pdata_New(void)448 Pdata_New(void)
449 {
450     Pdata *self;
451 
452     if (!(self = PyObject_New(Pdata, &Pdata_Type)))
453         return NULL;
454     Py_SIZE(self) = 0;
455     self->mark_set = 0;
456     self->fence = 0;
457     self->allocated = 8;
458     self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
459     if (self->data)
460         return (PyObject *)self;
461     Py_DECREF(self);
462     return PyErr_NoMemory();
463 }
464 
465 
466 /* Retain only the initial clearto items.  If clearto >= the current
467  * number of items, this is a (non-erroneous) NOP.
468  */
469 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)470 Pdata_clear(Pdata *self, Py_ssize_t clearto)
471 {
472     Py_ssize_t i = Py_SIZE(self);
473 
474     assert(clearto >= self->fence);
475     if (clearto >= i)
476         return 0;
477 
478     while (--i >= clearto) {
479         Py_CLEAR(self->data[i]);
480     }
481     Py_SIZE(self) = clearto;
482     return 0;
483 }
484 
485 static int
Pdata_grow(Pdata * self)486 Pdata_grow(Pdata *self)
487 {
488     PyObject **data = self->data;
489     size_t allocated = (size_t)self->allocated;
490     size_t new_allocated;
491 
492     new_allocated = (allocated >> 3) + 6;
493     /* check for integer overflow */
494     if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
495         goto nomemory;
496     new_allocated += allocated;
497     PyMem_RESIZE(data, PyObject *, new_allocated);
498     if (data == NULL)
499         goto nomemory;
500 
501     self->data = data;
502     self->allocated = (Py_ssize_t)new_allocated;
503     return 0;
504 
505   nomemory:
506     PyErr_NoMemory();
507     return -1;
508 }
509 
510 static int
Pdata_stack_underflow(Pdata * self)511 Pdata_stack_underflow(Pdata *self)
512 {
513     PickleState *st = _Pickle_GetGlobalState();
514     PyErr_SetString(st->UnpicklingError,
515                     self->mark_set ?
516                     "unexpected MARK found" :
517                     "unpickling stack underflow");
518     return -1;
519 }
520 
521 /* D is a Pdata*.  Pop the topmost element and store it into V, which
522  * must be an lvalue holding PyObject*.  On stack underflow, UnpicklingError
523  * is raised and V is set to NULL.
524  */
525 static PyObject *
Pdata_pop(Pdata * self)526 Pdata_pop(Pdata *self)
527 {
528     if (Py_SIZE(self) <= self->fence) {
529         Pdata_stack_underflow(self);
530         return NULL;
531     }
532     return self->data[--Py_SIZE(self)];
533 }
534 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
535 
536 static int
Pdata_push(Pdata * self,PyObject * obj)537 Pdata_push(Pdata *self, PyObject *obj)
538 {
539     if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
540         return -1;
541     }
542     self->data[Py_SIZE(self)++] = obj;
543     return 0;
544 }
545 
546 /* Push an object on stack, transferring its ownership to the stack. */
547 #define PDATA_PUSH(D, O, ER) do {                               \
548         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
549 
550 /* Push an object on stack, adding a new reference to the object. */
551 #define PDATA_APPEND(D, O, ER) do {                             \
552         Py_INCREF((O));                                         \
553         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
554 
555 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)556 Pdata_poptuple(Pdata *self, Py_ssize_t start)
557 {
558     PyObject *tuple;
559     Py_ssize_t len, i, j;
560 
561     if (start < self->fence) {
562         Pdata_stack_underflow(self);
563         return NULL;
564     }
565     len = Py_SIZE(self) - start;
566     tuple = PyTuple_New(len);
567     if (tuple == NULL)
568         return NULL;
569     for (i = start, j = 0; j < len; i++, j++)
570         PyTuple_SET_ITEM(tuple, j, self->data[i]);
571 
572     Py_SIZE(self) = start;
573     return tuple;
574 }
575 
576 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)577 Pdata_poplist(Pdata *self, Py_ssize_t start)
578 {
579     PyObject *list;
580     Py_ssize_t len, i, j;
581 
582     len = Py_SIZE(self) - start;
583     list = PyList_New(len);
584     if (list == NULL)
585         return NULL;
586     for (i = start, j = 0; j < len; i++, j++)
587         PyList_SET_ITEM(list, j, self->data[i]);
588 
589     Py_SIZE(self) = start;
590     return list;
591 }
592 
593 typedef struct {
594     PyObject *me_key;
595     Py_ssize_t me_value;
596 } PyMemoEntry;
597 
598 typedef struct {
599     size_t mt_mask;
600     size_t mt_used;
601     size_t mt_allocated;
602     PyMemoEntry *mt_table;
603 } PyMemoTable;
604 
605 typedef struct PicklerObject {
606     PyObject_HEAD
607     PyMemoTable *memo;          /* Memo table, keep track of the seen
608                                    objects to support self-referential objects
609                                    pickling. */
610     PyObject *pers_func;        /* persistent_id() method, can be NULL */
611     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
612                                    is an unbound method, NULL otherwise */
613     PyObject *dispatch_table;   /* private dispatch_table, can be NULL */
614 
615     PyObject *write;            /* write() method of the output stream. */
616     PyObject *output_buffer;    /* Write into a local bytearray buffer before
617                                    flushing to the stream. */
618     Py_ssize_t output_len;      /* Length of output_buffer. */
619     Py_ssize_t max_output_len;  /* Allocation size of output_buffer. */
620     int proto;                  /* Pickle protocol number, >= 0 */
621     int bin;                    /* Boolean, true if proto > 0 */
622     int framing;                /* True when framing is enabled, proto >= 4 */
623     Py_ssize_t frame_start;     /* Position in output_buffer where the
624                                    current frame begins. -1 if there
625                                    is no frame currently open. */
626 
627     Py_ssize_t buf_size;        /* Size of the current buffered pickle data */
628     int fast;                   /* Enable fast mode if set to a true value.
629                                    The fast mode disable the usage of memo,
630                                    therefore speeding the pickling process by
631                                    not generating superfluous PUT opcodes. It
632                                    should not be used if with self-referential
633                                    objects. */
634     int fast_nesting;
635     int fix_imports;            /* Indicate whether Pickler should fix
636                                    the name of globals for Python 2.x. */
637     PyObject *fast_memo;
638 } PicklerObject;
639 
640 typedef struct UnpicklerObject {
641     PyObject_HEAD
642     Pdata *stack;               /* Pickle data stack, store unpickled objects. */
643 
644     /* The unpickler memo is just an array of PyObject *s. Using a dict
645        is unnecessary, since the keys are contiguous ints. */
646     PyObject **memo;
647     size_t memo_size;       /* Capacity of the memo array */
648     size_t memo_len;        /* Number of objects in the memo */
649 
650     PyObject *pers_func;        /* persistent_load() method, can be NULL. */
651     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
652                                    is an unbound method, NULL otherwise */
653 
654     Py_buffer buffer;
655     char *input_buffer;
656     char *input_line;
657     Py_ssize_t input_len;
658     Py_ssize_t next_read_idx;
659     Py_ssize_t prefetched_idx;  /* index of first prefetched byte */
660 
661     PyObject *read;             /* read() method of the input stream. */
662     PyObject *readline;         /* readline() method of the input stream. */
663     PyObject *peek;             /* peek() method of the input stream, or NULL */
664 
665     char *encoding;             /* Name of the encoding to be used for
666                                    decoding strings pickled using Python
667                                    2.x. The default value is "ASCII" */
668     char *errors;               /* Name of errors handling scheme to used when
669                                    decoding strings. The default value is
670                                    "strict". */
671     Py_ssize_t *marks;          /* Mark stack, used for unpickling container
672                                    objects. */
673     Py_ssize_t num_marks;       /* Number of marks in the mark stack. */
674     Py_ssize_t marks_size;      /* Current allocated size of the mark stack. */
675     int proto;                  /* Protocol of the pickle loaded. */
676     int fix_imports;            /* Indicate whether Unpickler should fix
677                                    the name of globals pickled by Python 2.x. */
678 } UnpicklerObject;
679 
680 typedef struct {
681     PyObject_HEAD
682     PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
683 }  PicklerMemoProxyObject;
684 
685 typedef struct {
686     PyObject_HEAD
687     UnpicklerObject *unpickler;
688 } UnpicklerMemoProxyObject;
689 
690 /* Forward declarations */
691 static int save(PicklerObject *, PyObject *, int);
692 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
693 static PyTypeObject Pickler_Type;
694 static PyTypeObject Unpickler_Type;
695 
696 #include "clinic/_pickle.c.h"
697 
698 /*************************************************************************
699  A custom hashtable mapping void* to Python ints. This is used by the pickler
700  for memoization. Using a custom hashtable rather than PyDict allows us to skip
701  a bunch of unnecessary object creation. This makes a huge performance
702  difference. */
703 
704 #define MT_MINSIZE 8
705 #define PERTURB_SHIFT 5
706 
707 
708 static PyMemoTable *
PyMemoTable_New(void)709 PyMemoTable_New(void)
710 {
711     PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
712     if (memo == NULL) {
713         PyErr_NoMemory();
714         return NULL;
715     }
716 
717     memo->mt_used = 0;
718     memo->mt_allocated = MT_MINSIZE;
719     memo->mt_mask = MT_MINSIZE - 1;
720     memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
721     if (memo->mt_table == NULL) {
722         PyMem_FREE(memo);
723         PyErr_NoMemory();
724         return NULL;
725     }
726     memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
727 
728     return memo;
729 }
730 
731 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)732 PyMemoTable_Copy(PyMemoTable *self)
733 {
734     PyMemoTable *new = PyMemoTable_New();
735     if (new == NULL)
736         return NULL;
737 
738     new->mt_used = self->mt_used;
739     new->mt_allocated = self->mt_allocated;
740     new->mt_mask = self->mt_mask;
741     /* The table we get from _New() is probably smaller than we wanted.
742        Free it and allocate one that's the right size. */
743     PyMem_FREE(new->mt_table);
744     new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
745     if (new->mt_table == NULL) {
746         PyMem_FREE(new);
747         PyErr_NoMemory();
748         return NULL;
749     }
750     for (size_t i = 0; i < self->mt_allocated; i++) {
751         Py_XINCREF(self->mt_table[i].me_key);
752     }
753     memcpy(new->mt_table, self->mt_table,
754            sizeof(PyMemoEntry) * self->mt_allocated);
755 
756     return new;
757 }
758 
759 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)760 PyMemoTable_Size(PyMemoTable *self)
761 {
762     return self->mt_used;
763 }
764 
765 static int
PyMemoTable_Clear(PyMemoTable * self)766 PyMemoTable_Clear(PyMemoTable *self)
767 {
768     Py_ssize_t i = self->mt_allocated;
769 
770     while (--i >= 0) {
771         Py_XDECREF(self->mt_table[i].me_key);
772     }
773     self->mt_used = 0;
774     memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
775     return 0;
776 }
777 
778 static void
PyMemoTable_Del(PyMemoTable * self)779 PyMemoTable_Del(PyMemoTable *self)
780 {
781     if (self == NULL)
782         return;
783     PyMemoTable_Clear(self);
784 
785     PyMem_FREE(self->mt_table);
786     PyMem_FREE(self);
787 }
788 
789 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
790    can be considerably simpler than dictobject.c's lookdict(). */
791 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)792 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
793 {
794     size_t i;
795     size_t perturb;
796     size_t mask = self->mt_mask;
797     PyMemoEntry *table = self->mt_table;
798     PyMemoEntry *entry;
799     Py_hash_t hash = (Py_hash_t)key >> 3;
800 
801     i = hash & mask;
802     entry = &table[i];
803     if (entry->me_key == NULL || entry->me_key == key)
804         return entry;
805 
806     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
807         i = (i << 2) + i + perturb + 1;
808         entry = &table[i & mask];
809         if (entry->me_key == NULL || entry->me_key == key)
810             return entry;
811     }
812     Py_UNREACHABLE();
813 }
814 
815 /* Returns -1 on failure, 0 on success. */
816 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)817 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
818 {
819     PyMemoEntry *oldtable = NULL;
820     PyMemoEntry *oldentry, *newentry;
821     size_t new_size = MT_MINSIZE;
822     size_t to_process;
823 
824     assert(min_size > 0);
825 
826     if (min_size > PY_SSIZE_T_MAX) {
827         PyErr_NoMemory();
828         return -1;
829     }
830 
831     /* Find the smallest valid table size >= min_size. */
832     while (new_size < min_size) {
833         new_size <<= 1;
834     }
835     /* new_size needs to be a power of two. */
836     assert((new_size & (new_size - 1)) == 0);
837 
838     /* Allocate new table. */
839     oldtable = self->mt_table;
840     self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
841     if (self->mt_table == NULL) {
842         self->mt_table = oldtable;
843         PyErr_NoMemory();
844         return -1;
845     }
846     self->mt_allocated = new_size;
847     self->mt_mask = new_size - 1;
848     memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
849 
850     /* Copy entries from the old table. */
851     to_process = self->mt_used;
852     for (oldentry = oldtable; to_process > 0; oldentry++) {
853         if (oldentry->me_key != NULL) {
854             to_process--;
855             /* newentry is a pointer to a chunk of the new
856                mt_table, so we're setting the key:value pair
857                in-place. */
858             newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
859             newentry->me_key = oldentry->me_key;
860             newentry->me_value = oldentry->me_value;
861         }
862     }
863 
864     /* Deallocate the old table. */
865     PyMem_FREE(oldtable);
866     return 0;
867 }
868 
869 /* Returns NULL on failure, a pointer to the value otherwise. */
870 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)871 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
872 {
873     PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
874     if (entry->me_key == NULL)
875         return NULL;
876     return &entry->me_value;
877 }
878 
879 /* Returns -1 on failure, 0 on success. */
880 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)881 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
882 {
883     PyMemoEntry *entry;
884 
885     assert(key != NULL);
886 
887     entry = _PyMemoTable_Lookup(self, key);
888     if (entry->me_key != NULL) {
889         entry->me_value = value;
890         return 0;
891     }
892     Py_INCREF(key);
893     entry->me_key = key;
894     entry->me_value = value;
895     self->mt_used++;
896 
897     /* If we added a key, we can safely resize. Otherwise just return!
898      * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
899      *
900      * Quadrupling the size improves average table sparseness
901      * (reducing collisions) at the cost of some memory. It also halves
902      * the number of expensive resize operations in a growing memo table.
903      *
904      * Very large memo tables (over 50K items) use doubling instead.
905      * This may help applications with severe memory constraints.
906      */
907     if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
908         return 0;
909     }
910     // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
911     size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
912     return _PyMemoTable_ResizeTable(self, desired_size);
913 }
914 
915 #undef MT_MINSIZE
916 #undef PERTURB_SHIFT
917 
918 /*************************************************************************/
919 
920 
921 static int
_Pickler_ClearBuffer(PicklerObject * self)922 _Pickler_ClearBuffer(PicklerObject *self)
923 {
924     Py_XSETREF(self->output_buffer,
925               PyBytes_FromStringAndSize(NULL, self->max_output_len));
926     if (self->output_buffer == NULL)
927         return -1;
928     self->output_len = 0;
929     self->frame_start = -1;
930     return 0;
931 }
932 
933 static void
_write_size64(char * out,size_t value)934 _write_size64(char *out, size_t value)
935 {
936     size_t i;
937 
938     Py_BUILD_ASSERT(sizeof(size_t) <= 8);
939 
940     for (i = 0; i < sizeof(size_t); i++) {
941         out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
942     }
943     for (i = sizeof(size_t); i < 8; i++) {
944         out[i] = 0;
945     }
946 }
947 
948 static int
_Pickler_CommitFrame(PicklerObject * self)949 _Pickler_CommitFrame(PicklerObject *self)
950 {
951     size_t frame_len;
952     char *qdata;
953 
954     if (!self->framing || self->frame_start == -1)
955         return 0;
956     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
957     qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
958     if (frame_len >= FRAME_SIZE_MIN) {
959         qdata[0] = FRAME;
960         _write_size64(qdata + 1, frame_len);
961     }
962     else {
963         memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
964         self->output_len -= FRAME_HEADER_SIZE;
965     }
966     self->frame_start = -1;
967     return 0;
968 }
969 
970 static PyObject *
_Pickler_GetString(PicklerObject * self)971 _Pickler_GetString(PicklerObject *self)
972 {
973     PyObject *output_buffer = self->output_buffer;
974 
975     assert(self->output_buffer != NULL);
976 
977     if (_Pickler_CommitFrame(self))
978         return NULL;
979 
980     self->output_buffer = NULL;
981     /* Resize down to exact size */
982     if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
983         return NULL;
984     return output_buffer;
985 }
986 
987 static int
_Pickler_FlushToFile(PicklerObject * self)988 _Pickler_FlushToFile(PicklerObject *self)
989 {
990     PyObject *output, *result;
991 
992     assert(self->write != NULL);
993 
994     /* This will commit the frame first */
995     output = _Pickler_GetString(self);
996     if (output == NULL)
997         return -1;
998 
999     result = _Pickle_FastCall(self->write, output);
1000     Py_XDECREF(result);
1001     return (result == NULL) ? -1 : 0;
1002 }
1003 
1004 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1005 _Pickler_OpcodeBoundary(PicklerObject *self)
1006 {
1007     Py_ssize_t frame_len;
1008 
1009     if (!self->framing || self->frame_start == -1) {
1010         return 0;
1011     }
1012     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1013     if (frame_len >= FRAME_SIZE_TARGET) {
1014         if(_Pickler_CommitFrame(self)) {
1015             return -1;
1016         }
1017         /* Flush the content of the committed frame to the underlying
1018          * file and reuse the pickler buffer for the next frame so as
1019          * to limit memory usage when dumping large complex objects to
1020          * a file.
1021          *
1022          * self->write is NULL when called via dumps.
1023          */
1024         if (self->write != NULL) {
1025             if (_Pickler_FlushToFile(self) < 0) {
1026                 return -1;
1027             }
1028             if (_Pickler_ClearBuffer(self) < 0) {
1029                 return -1;
1030             }
1031         }
1032     }
1033     return 0;
1034 }
1035 
1036 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1037 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1038 {
1039     Py_ssize_t i, n, required;
1040     char *buffer;
1041     int need_new_frame;
1042 
1043     assert(s != NULL);
1044     need_new_frame = (self->framing && self->frame_start == -1);
1045 
1046     if (need_new_frame)
1047         n = data_len + FRAME_HEADER_SIZE;
1048     else
1049         n = data_len;
1050 
1051     required = self->output_len + n;
1052     if (required > self->max_output_len) {
1053         /* Make place in buffer for the pickle chunk */
1054         if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1055             PyErr_NoMemory();
1056             return -1;
1057         }
1058         self->max_output_len = (self->output_len + n) / 2 * 3;
1059         if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1060             return -1;
1061     }
1062     buffer = PyBytes_AS_STRING(self->output_buffer);
1063     if (need_new_frame) {
1064         /* Setup new frame */
1065         Py_ssize_t frame_start = self->output_len;
1066         self->frame_start = frame_start;
1067         for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1068             /* Write an invalid value, for debugging */
1069             buffer[frame_start + i] = 0xFE;
1070         }
1071         self->output_len += FRAME_HEADER_SIZE;
1072     }
1073     if (data_len < 8) {
1074         /* This is faster than memcpy when the string is short. */
1075         for (i = 0; i < data_len; i++) {
1076             buffer[self->output_len + i] = s[i];
1077         }
1078     }
1079     else {
1080         memcpy(buffer + self->output_len, s, data_len);
1081     }
1082     self->output_len += data_len;
1083     return data_len;
1084 }
1085 
1086 static PicklerObject *
_Pickler_New(void)1087 _Pickler_New(void)
1088 {
1089     PicklerObject *self;
1090 
1091     self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1092     if (self == NULL)
1093         return NULL;
1094 
1095     self->pers_func = NULL;
1096     self->dispatch_table = NULL;
1097     self->write = NULL;
1098     self->proto = 0;
1099     self->bin = 0;
1100     self->framing = 0;
1101     self->frame_start = -1;
1102     self->fast = 0;
1103     self->fast_nesting = 0;
1104     self->fix_imports = 0;
1105     self->fast_memo = NULL;
1106     self->max_output_len = WRITE_BUF_SIZE;
1107     self->output_len = 0;
1108 
1109     self->memo = PyMemoTable_New();
1110     self->output_buffer = PyBytes_FromStringAndSize(NULL,
1111                                                     self->max_output_len);
1112 
1113     if (self->memo == NULL || self->output_buffer == NULL) {
1114         Py_DECREF(self);
1115         return NULL;
1116     }
1117 
1118     PyObject_GC_Track(self);
1119     return self;
1120 }
1121 
1122 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1123 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1124 {
1125     long proto;
1126 
1127     if (protocol == NULL || protocol == Py_None) {
1128         proto = DEFAULT_PROTOCOL;
1129     }
1130     else {
1131         proto = PyLong_AsLong(protocol);
1132         if (proto < 0) {
1133             if (proto == -1 && PyErr_Occurred())
1134                 return -1;
1135             proto = HIGHEST_PROTOCOL;
1136         }
1137         else if (proto > HIGHEST_PROTOCOL) {
1138             PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1139                          HIGHEST_PROTOCOL);
1140             return -1;
1141         }
1142     }
1143     self->proto = (int)proto;
1144     self->bin = proto > 0;
1145     self->fix_imports = fix_imports && proto < 3;
1146     return 0;
1147 }
1148 
1149 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1150    be called once on a freshly created Pickler. */
1151 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1152 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1153 {
1154     _Py_IDENTIFIER(write);
1155     assert(file != NULL);
1156     if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1157         return -1;
1158     }
1159     if (self->write == NULL) {
1160         PyErr_SetString(PyExc_TypeError,
1161                         "file must have a 'write' attribute");
1162         return -1;
1163     }
1164 
1165     return 0;
1166 }
1167 
1168 /* Returns the size of the input on success, -1 on failure. This takes its
1169    own reference to `input`. */
1170 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1171 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1172 {
1173     if (self->buffer.buf != NULL)
1174         PyBuffer_Release(&self->buffer);
1175     if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1176         return -1;
1177     self->input_buffer = self->buffer.buf;
1178     self->input_len = self->buffer.len;
1179     self->next_read_idx = 0;
1180     self->prefetched_idx = self->input_len;
1181     return self->input_len;
1182 }
1183 
1184 static int
bad_readline(void)1185 bad_readline(void)
1186 {
1187     PickleState *st = _Pickle_GetGlobalState();
1188     PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1189     return -1;
1190 }
1191 
1192 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1193 _Unpickler_SkipConsumed(UnpicklerObject *self)
1194 {
1195     Py_ssize_t consumed;
1196     PyObject *r;
1197 
1198     consumed = self->next_read_idx - self->prefetched_idx;
1199     if (consumed <= 0)
1200         return 0;
1201 
1202     assert(self->peek);  /* otherwise we did something wrong */
1203     /* This makes a useless copy... */
1204     r = PyObject_CallFunction(self->read, "n", consumed);
1205     if (r == NULL)
1206         return -1;
1207     Py_DECREF(r);
1208 
1209     self->prefetched_idx = self->next_read_idx;
1210     return 0;
1211 }
1212 
1213 static const Py_ssize_t READ_WHOLE_LINE = -1;
1214 
1215 /* If reading from a file, we need to only pull the bytes we need, since there
1216    may be multiple pickle objects arranged contiguously in the same input
1217    buffer.
1218 
1219    If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1220    bytes from the input stream/buffer.
1221 
1222    Update the unpickler's input buffer with the newly-read data. Returns -1 on
1223    failure; on success, returns the number of bytes read from the file.
1224 
1225    On success, self->input_len will be 0; this is intentional so that when
1226    unpickling from a file, the "we've run out of data" code paths will trigger,
1227    causing the Unpickler to go back to the file for more data. Use the returned
1228    size to tell you how much data you can process. */
1229 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1230 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1231 {
1232     PyObject *data;
1233     Py_ssize_t read_size;
1234 
1235     assert(self->read != NULL);
1236 
1237     if (_Unpickler_SkipConsumed(self) < 0)
1238         return -1;
1239 
1240     if (n == READ_WHOLE_LINE) {
1241         data = _PyObject_CallNoArg(self->readline);
1242     }
1243     else {
1244         PyObject *len;
1245         /* Prefetch some data without advancing the file pointer, if possible */
1246         if (self->peek && n < PREFETCH) {
1247             len = PyLong_FromSsize_t(PREFETCH);
1248             if (len == NULL)
1249                 return -1;
1250             data = _Pickle_FastCall(self->peek, len);
1251             if (data == NULL) {
1252                 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1253                     return -1;
1254                 /* peek() is probably not supported by the given file object */
1255                 PyErr_Clear();
1256                 Py_CLEAR(self->peek);
1257             }
1258             else {
1259                 read_size = _Unpickler_SetStringInput(self, data);
1260                 Py_DECREF(data);
1261                 self->prefetched_idx = 0;
1262                 if (n <= read_size)
1263                     return n;
1264             }
1265         }
1266         len = PyLong_FromSsize_t(n);
1267         if (len == NULL)
1268             return -1;
1269         data = _Pickle_FastCall(self->read, len);
1270     }
1271     if (data == NULL)
1272         return -1;
1273 
1274     read_size = _Unpickler_SetStringInput(self, data);
1275     Py_DECREF(data);
1276     return read_size;
1277 }
1278 
1279 /* Don't call it directly: use _Unpickler_Read() */
1280 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1281 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1282 {
1283     Py_ssize_t num_read;
1284 
1285     *s = NULL;
1286     if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1287         PickleState *st = _Pickle_GetGlobalState();
1288         PyErr_SetString(st->UnpicklingError,
1289                         "read would overflow (invalid bytecode)");
1290         return -1;
1291     }
1292 
1293     /* This case is handled by the _Unpickler_Read() macro for efficiency */
1294     assert(self->next_read_idx + n > self->input_len);
1295 
1296     if (!self->read)
1297         return bad_readline();
1298 
1299     num_read = _Unpickler_ReadFromFile(self, n);
1300     if (num_read < 0)
1301         return -1;
1302     if (num_read < n)
1303         return bad_readline();
1304     *s = self->input_buffer;
1305     self->next_read_idx = n;
1306     return n;
1307 }
1308 
1309 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1310 
1311    This should be used for all data reads, rather than accessing the unpickler's
1312    input buffer directly. This method deals correctly with reading from input
1313    streams, which the input buffer doesn't deal with.
1314 
1315    Note that when reading from a file-like object, self->next_read_idx won't
1316    be updated (it should remain at 0 for the entire unpickling process). You
1317    should use this function's return value to know how many bytes you can
1318    consume.
1319 
1320    Returns -1 (with an exception set) on failure. On success, return the
1321    number of chars read. */
1322 #define _Unpickler_Read(self, s, n) \
1323     (((n) <= (self)->input_len - (self)->next_read_idx)      \
1324      ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1325         (self)->next_read_idx += (n),                        \
1326         (n))                                                 \
1327      : _Unpickler_ReadImpl(self, (s), (n)))
1328 
1329 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1330 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1331                     char **result)
1332 {
1333     char *input_line = PyMem_Realloc(self->input_line, len + 1);
1334     if (input_line == NULL) {
1335         PyErr_NoMemory();
1336         return -1;
1337     }
1338 
1339     memcpy(input_line, line, len);
1340     input_line[len] = '\0';
1341     self->input_line = input_line;
1342     *result = self->input_line;
1343     return len;
1344 }
1345 
1346 /* Read a line from the input stream/buffer. If we run off the end of the input
1347    before hitting \n, raise an error.
1348 
1349    Returns the number of chars read, or -1 on failure. */
1350 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1351 _Unpickler_Readline(UnpicklerObject *self, char **result)
1352 {
1353     Py_ssize_t i, num_read;
1354 
1355     for (i = self->next_read_idx; i < self->input_len; i++) {
1356         if (self->input_buffer[i] == '\n') {
1357             char *line_start = self->input_buffer + self->next_read_idx;
1358             num_read = i - self->next_read_idx + 1;
1359             self->next_read_idx = i + 1;
1360             return _Unpickler_CopyLine(self, line_start, num_read, result);
1361         }
1362     }
1363     if (!self->read)
1364         return bad_readline();
1365 
1366     num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1367     if (num_read < 0)
1368         return -1;
1369     if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1370         return bad_readline();
1371     self->next_read_idx = num_read;
1372     return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1373 }
1374 
1375 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1376    will be modified in place. */
1377 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1378 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1379 {
1380     size_t i;
1381 
1382     assert(new_size > self->memo_size);
1383 
1384     PyObject **memo_new = self->memo;
1385     PyMem_RESIZE(memo_new, PyObject *, new_size);
1386     if (memo_new == NULL) {
1387         PyErr_NoMemory();
1388         return -1;
1389     }
1390     self->memo = memo_new;
1391     for (i = self->memo_size; i < new_size; i++)
1392         self->memo[i] = NULL;
1393     self->memo_size = new_size;
1394     return 0;
1395 }
1396 
1397 /* Returns NULL if idx is out of bounds. */
1398 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1399 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1400 {
1401     if (idx >= self->memo_size)
1402         return NULL;
1403 
1404     return self->memo[idx];
1405 }
1406 
1407 /* Returns -1 (with an exception set) on failure, 0 on success.
1408    This takes its own reference to `value`. */
1409 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1410 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1411 {
1412     PyObject *old_item;
1413 
1414     if (idx >= self->memo_size) {
1415         if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1416             return -1;
1417         assert(idx < self->memo_size);
1418     }
1419     Py_INCREF(value);
1420     old_item = self->memo[idx];
1421     self->memo[idx] = value;
1422     if (old_item != NULL) {
1423         Py_DECREF(old_item);
1424     }
1425     else {
1426         self->memo_len++;
1427     }
1428     return 0;
1429 }
1430 
1431 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1432 _Unpickler_NewMemo(Py_ssize_t new_size)
1433 {
1434     PyObject **memo = PyMem_NEW(PyObject *, new_size);
1435     if (memo == NULL) {
1436         PyErr_NoMemory();
1437         return NULL;
1438     }
1439     memset(memo, 0, new_size * sizeof(PyObject *));
1440     return memo;
1441 }
1442 
1443 /* Free the unpickler's memo, taking care to decref any items left in it. */
1444 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1445 _Unpickler_MemoCleanup(UnpicklerObject *self)
1446 {
1447     Py_ssize_t i;
1448     PyObject **memo = self->memo;
1449 
1450     if (self->memo == NULL)
1451         return;
1452     self->memo = NULL;
1453     i = self->memo_size;
1454     while (--i >= 0) {
1455         Py_XDECREF(memo[i]);
1456     }
1457     PyMem_FREE(memo);
1458 }
1459 
1460 static UnpicklerObject *
_Unpickler_New(void)1461 _Unpickler_New(void)
1462 {
1463     UnpicklerObject *self;
1464 
1465     self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1466     if (self == NULL)
1467         return NULL;
1468 
1469     self->pers_func = NULL;
1470     self->input_buffer = NULL;
1471     self->input_line = NULL;
1472     self->input_len = 0;
1473     self->next_read_idx = 0;
1474     self->prefetched_idx = 0;
1475     self->read = NULL;
1476     self->readline = NULL;
1477     self->peek = NULL;
1478     self->encoding = NULL;
1479     self->errors = NULL;
1480     self->marks = NULL;
1481     self->num_marks = 0;
1482     self->marks_size = 0;
1483     self->proto = 0;
1484     self->fix_imports = 0;
1485     memset(&self->buffer, 0, sizeof(Py_buffer));
1486     self->memo_size = 32;
1487     self->memo_len = 0;
1488     self->memo = _Unpickler_NewMemo(self->memo_size);
1489     self->stack = (Pdata *)Pdata_New();
1490 
1491     if (self->memo == NULL || self->stack == NULL) {
1492         Py_DECREF(self);
1493         return NULL;
1494     }
1495 
1496     PyObject_GC_Track(self);
1497     return self;
1498 }
1499 
1500 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1501    be called once on a freshly created Pickler. */
1502 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1503 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1504 {
1505     _Py_IDENTIFIER(peek);
1506     _Py_IDENTIFIER(read);
1507     _Py_IDENTIFIER(readline);
1508 
1509     if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1510         return -1;
1511     }
1512     (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1513     (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
1514     if (self->readline == NULL || self->read == NULL) {
1515         if (!PyErr_Occurred()) {
1516             PyErr_SetString(PyExc_TypeError,
1517                             "file must have 'read' and 'readline' attributes");
1518         }
1519         Py_CLEAR(self->read);
1520         Py_CLEAR(self->readline);
1521         Py_CLEAR(self->peek);
1522         return -1;
1523     }
1524     return 0;
1525 }
1526 
1527 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1528    be called once on a freshly created Pickler. */
1529 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1530 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1531                             const char *encoding,
1532                             const char *errors)
1533 {
1534     if (encoding == NULL)
1535         encoding = "ASCII";
1536     if (errors == NULL)
1537         errors = "strict";
1538 
1539     self->encoding = _PyMem_Strdup(encoding);
1540     self->errors = _PyMem_Strdup(errors);
1541     if (self->encoding == NULL || self->errors == NULL) {
1542         PyErr_NoMemory();
1543         return -1;
1544     }
1545     return 0;
1546 }
1547 
1548 /* Generate a GET opcode for an object stored in the memo. */
1549 static int
memo_get(PicklerObject * self,PyObject * key)1550 memo_get(PicklerObject *self, PyObject *key)
1551 {
1552     Py_ssize_t *value;
1553     char pdata[30];
1554     Py_ssize_t len;
1555 
1556     value = PyMemoTable_Get(self->memo, key);
1557     if (value == NULL)  {
1558         PyErr_SetObject(PyExc_KeyError, key);
1559         return -1;
1560     }
1561 
1562     if (!self->bin) {
1563         pdata[0] = GET;
1564         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1565                       "%" PY_FORMAT_SIZE_T "d\n", *value);
1566         len = strlen(pdata);
1567     }
1568     else {
1569         if (*value < 256) {
1570             pdata[0] = BINGET;
1571             pdata[1] = (unsigned char)(*value & 0xff);
1572             len = 2;
1573         }
1574         else if ((size_t)*value <= 0xffffffffUL) {
1575             pdata[0] = LONG_BINGET;
1576             pdata[1] = (unsigned char)(*value & 0xff);
1577             pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1578             pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1579             pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1580             len = 5;
1581         }
1582         else { /* unlikely */
1583             PickleState *st = _Pickle_GetGlobalState();
1584             PyErr_SetString(st->PicklingError,
1585                             "memo id too large for LONG_BINGET");
1586             return -1;
1587         }
1588     }
1589 
1590     if (_Pickler_Write(self, pdata, len) < 0)
1591         return -1;
1592 
1593     return 0;
1594 }
1595 
1596 /* Store an object in the memo, assign it a new unique ID based on the number
1597    of objects currently stored in the memo and generate a PUT opcode. */
1598 static int
memo_put(PicklerObject * self,PyObject * obj)1599 memo_put(PicklerObject *self, PyObject *obj)
1600 {
1601     char pdata[30];
1602     Py_ssize_t len;
1603     Py_ssize_t idx;
1604 
1605     const char memoize_op = MEMOIZE;
1606 
1607     if (self->fast)
1608         return 0;
1609 
1610     idx = PyMemoTable_Size(self->memo);
1611     if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1612         return -1;
1613 
1614     if (self->proto >= 4) {
1615         if (_Pickler_Write(self, &memoize_op, 1) < 0)
1616             return -1;
1617         return 0;
1618     }
1619     else if (!self->bin) {
1620         pdata[0] = PUT;
1621         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1622                       "%" PY_FORMAT_SIZE_T "d\n", idx);
1623         len = strlen(pdata);
1624     }
1625     else {
1626         if (idx < 256) {
1627             pdata[0] = BINPUT;
1628             pdata[1] = (unsigned char)idx;
1629             len = 2;
1630         }
1631         else if ((size_t)idx <= 0xffffffffUL) {
1632             pdata[0] = LONG_BINPUT;
1633             pdata[1] = (unsigned char)(idx & 0xff);
1634             pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1635             pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1636             pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1637             len = 5;
1638         }
1639         else { /* unlikely */
1640             PickleState *st = _Pickle_GetGlobalState();
1641             PyErr_SetString(st->PicklingError,
1642                             "memo id too large for LONG_BINPUT");
1643             return -1;
1644         }
1645     }
1646     if (_Pickler_Write(self, pdata, len) < 0)
1647         return -1;
1648 
1649     return 0;
1650 }
1651 
1652 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1653 get_dotted_path(PyObject *obj, PyObject *name)
1654 {
1655     _Py_static_string(PyId_dot, ".");
1656     PyObject *dotted_path;
1657     Py_ssize_t i, n;
1658 
1659     dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1660     if (dotted_path == NULL)
1661         return NULL;
1662     n = PyList_GET_SIZE(dotted_path);
1663     assert(n >= 1);
1664     for (i = 0; i < n; i++) {
1665         PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1666         if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1667             if (obj == NULL)
1668                 PyErr_Format(PyExc_AttributeError,
1669                              "Can't pickle local object %R", name);
1670             else
1671                 PyErr_Format(PyExc_AttributeError,
1672                              "Can't pickle local attribute %R on %R", name, obj);
1673             Py_DECREF(dotted_path);
1674             return NULL;
1675         }
1676     }
1677     return dotted_path;
1678 }
1679 
1680 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1681 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1682 {
1683     Py_ssize_t i, n;
1684     PyObject *parent = NULL;
1685 
1686     assert(PyList_CheckExact(names));
1687     Py_INCREF(obj);
1688     n = PyList_GET_SIZE(names);
1689     for (i = 0; i < n; i++) {
1690         PyObject *name = PyList_GET_ITEM(names, i);
1691         Py_XDECREF(parent);
1692         parent = obj;
1693         (void)_PyObject_LookupAttr(parent, name, &obj);
1694         if (obj == NULL) {
1695             Py_DECREF(parent);
1696             return NULL;
1697         }
1698     }
1699     if (pparent != NULL)
1700         *pparent = parent;
1701     else
1702         Py_XDECREF(parent);
1703     return obj;
1704 }
1705 
1706 
1707 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1708 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1709 {
1710     PyObject *dotted_path, *attr;
1711 
1712     if (allow_qualname) {
1713         dotted_path = get_dotted_path(obj, name);
1714         if (dotted_path == NULL)
1715             return NULL;
1716         attr = get_deep_attribute(obj, dotted_path, NULL);
1717         Py_DECREF(dotted_path);
1718     }
1719     else {
1720         (void)_PyObject_LookupAttr(obj, name, &attr);
1721     }
1722     if (attr == NULL && !PyErr_Occurred()) {
1723         PyErr_Format(PyExc_AttributeError,
1724                      "Can't get attribute %R on %R", name, obj);
1725     }
1726     return attr;
1727 }
1728 
1729 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1730 _checkmodule(PyObject *module_name, PyObject *module,
1731              PyObject *global, PyObject *dotted_path)
1732 {
1733     if (module == Py_None) {
1734         return -1;
1735     }
1736     if (PyUnicode_Check(module_name) &&
1737             _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1738         return -1;
1739     }
1740 
1741     PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1742     if (candidate == NULL) {
1743         return -1;
1744     }
1745     if (candidate != global) {
1746         Py_DECREF(candidate);
1747         return -1;
1748     }
1749     Py_DECREF(candidate);
1750     return 0;
1751 }
1752 
1753 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1754 whichmodule(PyObject *global, PyObject *dotted_path)
1755 {
1756     PyObject *module_name;
1757     PyObject *module = NULL;
1758     Py_ssize_t i;
1759     PyObject *modules;
1760     _Py_IDENTIFIER(__module__);
1761     _Py_IDENTIFIER(modules);
1762     _Py_IDENTIFIER(__main__);
1763 
1764     if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1765         return NULL;
1766     }
1767     if (module_name) {
1768         /* In some rare cases (e.g., bound methods of extension types),
1769            __module__ can be None. If it is so, then search sys.modules for
1770            the module of global. */
1771         if (module_name != Py_None)
1772             return module_name;
1773         Py_CLEAR(module_name);
1774     }
1775     assert(module_name == NULL);
1776 
1777     /* Fallback on walking sys.modules */
1778     modules = _PySys_GetObjectId(&PyId_modules);
1779     if (modules == NULL) {
1780         PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1781         return NULL;
1782     }
1783     if (PyDict_CheckExact(modules)) {
1784         i = 0;
1785         while (PyDict_Next(modules, &i, &module_name, &module)) {
1786             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1787                 Py_INCREF(module_name);
1788                 return module_name;
1789             }
1790             if (PyErr_Occurred()) {
1791                 return NULL;
1792             }
1793         }
1794     }
1795     else {
1796         PyObject *iterator = PyObject_GetIter(modules);
1797         if (iterator == NULL) {
1798             return NULL;
1799         }
1800         while ((module_name = PyIter_Next(iterator))) {
1801             module = PyObject_GetItem(modules, module_name);
1802             if (module == NULL) {
1803                 Py_DECREF(module_name);
1804                 Py_DECREF(iterator);
1805                 return NULL;
1806             }
1807             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1808                 Py_DECREF(module);
1809                 Py_DECREF(iterator);
1810                 return module_name;
1811             }
1812             Py_DECREF(module);
1813             Py_DECREF(module_name);
1814             if (PyErr_Occurred()) {
1815                 Py_DECREF(iterator);
1816                 return NULL;
1817             }
1818         }
1819         Py_DECREF(iterator);
1820     }
1821 
1822     /* If no module is found, use __main__. */
1823     module_name = _PyUnicode_FromId(&PyId___main__);
1824     Py_XINCREF(module_name);
1825     return module_name;
1826 }
1827 
1828 /* fast_save_enter() and fast_save_leave() are guards against recursive
1829    objects when Pickler is used with the "fast mode" (i.e., with object
1830    memoization disabled). If the nesting of a list or dict object exceed
1831    FAST_NESTING_LIMIT, these guards will start keeping an internal
1832    reference to the seen list or dict objects and check whether these objects
1833    are recursive. These are not strictly necessary, since save() has a
1834    hard-coded recursion limit, but they give a nicer error message than the
1835    typical RuntimeError. */
1836 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1837 fast_save_enter(PicklerObject *self, PyObject *obj)
1838 {
1839     /* if fast_nesting < 0, we're doing an error exit. */
1840     if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1841         PyObject *key = NULL;
1842         if (self->fast_memo == NULL) {
1843             self->fast_memo = PyDict_New();
1844             if (self->fast_memo == NULL) {
1845                 self->fast_nesting = -1;
1846                 return 0;
1847             }
1848         }
1849         key = PyLong_FromVoidPtr(obj);
1850         if (key == NULL) {
1851             self->fast_nesting = -1;
1852             return 0;
1853         }
1854         if (PyDict_GetItemWithError(self->fast_memo, key)) {
1855             Py_DECREF(key);
1856             PyErr_Format(PyExc_ValueError,
1857                          "fast mode: can't pickle cyclic objects "
1858                          "including object type %.200s at %p",
1859                          obj->ob_type->tp_name, obj);
1860             self->fast_nesting = -1;
1861             return 0;
1862         }
1863         if (PyErr_Occurred()) {
1864             Py_DECREF(key);
1865             self->fast_nesting = -1;
1866             return 0;
1867         }
1868         if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1869             Py_DECREF(key);
1870             self->fast_nesting = -1;
1871             return 0;
1872         }
1873         Py_DECREF(key);
1874     }
1875     return 1;
1876 }
1877 
1878 static int
fast_save_leave(PicklerObject * self,PyObject * obj)1879 fast_save_leave(PicklerObject *self, PyObject *obj)
1880 {
1881     if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1882         PyObject *key = PyLong_FromVoidPtr(obj);
1883         if (key == NULL)
1884             return 0;
1885         if (PyDict_DelItem(self->fast_memo, key) < 0) {
1886             Py_DECREF(key);
1887             return 0;
1888         }
1889         Py_DECREF(key);
1890     }
1891     return 1;
1892 }
1893 
1894 static int
save_none(PicklerObject * self,PyObject * obj)1895 save_none(PicklerObject *self, PyObject *obj)
1896 {
1897     const char none_op = NONE;
1898     if (_Pickler_Write(self, &none_op, 1) < 0)
1899         return -1;
1900 
1901     return 0;
1902 }
1903 
1904 static int
save_bool(PicklerObject * self,PyObject * obj)1905 save_bool(PicklerObject *self, PyObject *obj)
1906 {
1907     if (self->proto >= 2) {
1908         const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
1909         if (_Pickler_Write(self, &bool_op, 1) < 0)
1910             return -1;
1911     }
1912     else {
1913         /* These aren't opcodes -- they're ways to pickle bools before protocol 2
1914          * so that unpicklers written before bools were introduced unpickle them
1915          * as ints, but unpicklers after can recognize that bools were intended.
1916          * Note that protocol 2 added direct ways to pickle bools.
1917          */
1918         const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
1919         if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
1920             return -1;
1921     }
1922     return 0;
1923 }
1924 
1925 static int
save_long(PicklerObject * self,PyObject * obj)1926 save_long(PicklerObject *self, PyObject *obj)
1927 {
1928     PyObject *repr = NULL;
1929     Py_ssize_t size;
1930     long val;
1931     int overflow;
1932     int status = 0;
1933 
1934     val= PyLong_AsLongAndOverflow(obj, &overflow);
1935     if (!overflow && (sizeof(long) <= 4 ||
1936             (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
1937     {
1938         /* result fits in a signed 4-byte integer.
1939 
1940            Note: we can't use -0x80000000L in the above condition because some
1941            compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
1942            before applying the unary minus when sizeof(long) <= 4. The
1943            resulting value stays unsigned which is commonly not what we want,
1944            so MSVC happily warns us about it.  However, that result would have
1945            been fine because we guard for sizeof(long) <= 4 which turns the
1946            condition true in that particular case. */
1947         char pdata[32];
1948         Py_ssize_t len = 0;
1949 
1950         if (self->bin) {
1951             pdata[1] = (unsigned char)(val & 0xff);
1952             pdata[2] = (unsigned char)((val >> 8) & 0xff);
1953             pdata[3] = (unsigned char)((val >> 16) & 0xff);
1954             pdata[4] = (unsigned char)((val >> 24) & 0xff);
1955 
1956             if ((pdata[4] != 0) || (pdata[3] != 0)) {
1957                 pdata[0] = BININT;
1958                 len = 5;
1959             }
1960             else if (pdata[2] != 0) {
1961                 pdata[0] = BININT2;
1962                 len = 3;
1963             }
1964             else {
1965                 pdata[0] = BININT1;
1966                 len = 2;
1967             }
1968         }
1969         else {
1970             sprintf(pdata, "%c%ld\n", INT,  val);
1971             len = strlen(pdata);
1972         }
1973         if (_Pickler_Write(self, pdata, len) < 0)
1974             return -1;
1975 
1976         return 0;
1977     }
1978     assert(!PyErr_Occurred());
1979 
1980     if (self->proto >= 2) {
1981         /* Linear-time pickling. */
1982         size_t nbits;
1983         size_t nbytes;
1984         unsigned char *pdata;
1985         char header[5];
1986         int i;
1987         int sign = _PyLong_Sign(obj);
1988 
1989         if (sign == 0) {
1990             header[0] = LONG1;
1991             header[1] = 0;      /* It's 0 -- an empty bytestring. */
1992             if (_Pickler_Write(self, header, 2) < 0)
1993                 goto error;
1994             return 0;
1995         }
1996         nbits = _PyLong_NumBits(obj);
1997         if (nbits == (size_t)-1 && PyErr_Occurred())
1998             goto error;
1999         /* How many bytes do we need?  There are nbits >> 3 full
2000          * bytes of data, and nbits & 7 leftover bits.  If there
2001          * are any leftover bits, then we clearly need another
2002          * byte.  What's not so obvious is that we *probably*
2003          * need another byte even if there aren't any leftovers:
2004          * the most-significant bit of the most-significant byte
2005          * acts like a sign bit, and it's usually got a sense
2006          * opposite of the one we need.  The exception is ints
2007          * of the form -(2**(8*j-1)) for j > 0.  Such an int is
2008          * its own 256's-complement, so has the right sign bit
2009          * even without the extra byte.  That's a pain to check
2010          * for in advance, though, so we always grab an extra
2011          * byte at the start, and cut it back later if possible.
2012          */
2013         nbytes = (nbits >> 3) + 1;
2014         if (nbytes > 0x7fffffffL) {
2015             PyErr_SetString(PyExc_OverflowError,
2016                             "int too large to pickle");
2017             goto error;
2018         }
2019         repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2020         if (repr == NULL)
2021             goto error;
2022         pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2023         i = _PyLong_AsByteArray((PyLongObject *)obj,
2024                                 pdata, nbytes,
2025                                 1 /* little endian */ , 1 /* signed */ );
2026         if (i < 0)
2027             goto error;
2028         /* If the int is negative, this may be a byte more than
2029          * needed.  This is so iff the MSB is all redundant sign
2030          * bits.
2031          */
2032         if (sign < 0 &&
2033             nbytes > 1 &&
2034             pdata[nbytes - 1] == 0xff &&
2035             (pdata[nbytes - 2] & 0x80) != 0) {
2036             nbytes--;
2037         }
2038 
2039         if (nbytes < 256) {
2040             header[0] = LONG1;
2041             header[1] = (unsigned char)nbytes;
2042             size = 2;
2043         }
2044         else {
2045             header[0] = LONG4;
2046             size = (Py_ssize_t) nbytes;
2047             for (i = 1; i < 5; i++) {
2048                 header[i] = (unsigned char)(size & 0xff);
2049                 size >>= 8;
2050             }
2051             size = 5;
2052         }
2053         if (_Pickler_Write(self, header, size) < 0 ||
2054             _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2055             goto error;
2056     }
2057     else {
2058         const char long_op = LONG;
2059         const char *string;
2060 
2061         /* proto < 2: write the repr and newline.  This is quadratic-time (in
2062            the number of digits), in both directions.  We add a trailing 'L'
2063            to the repr, for compatibility with Python 2.x. */
2064 
2065         repr = PyObject_Repr(obj);
2066         if (repr == NULL)
2067             goto error;
2068 
2069         string = PyUnicode_AsUTF8AndSize(repr, &size);
2070         if (string == NULL)
2071             goto error;
2072 
2073         if (_Pickler_Write(self, &long_op, 1) < 0 ||
2074             _Pickler_Write(self, string, size) < 0 ||
2075             _Pickler_Write(self, "L\n", 2) < 0)
2076             goto error;
2077     }
2078 
2079     if (0) {
2080   error:
2081       status = -1;
2082     }
2083     Py_XDECREF(repr);
2084 
2085     return status;
2086 }
2087 
2088 static int
save_float(PicklerObject * self,PyObject * obj)2089 save_float(PicklerObject *self, PyObject *obj)
2090 {
2091     double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2092 
2093     if (self->bin) {
2094         char pdata[9];
2095         pdata[0] = BINFLOAT;
2096         if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2097             return -1;
2098         if (_Pickler_Write(self, pdata, 9) < 0)
2099             return -1;
2100    }
2101     else {
2102         int result = -1;
2103         char *buf = NULL;
2104         char op = FLOAT;
2105 
2106         if (_Pickler_Write(self, &op, 1) < 0)
2107             goto done;
2108 
2109         buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2110         if (!buf) {
2111             PyErr_NoMemory();
2112             goto done;
2113         }
2114 
2115         if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2116             goto done;
2117 
2118         if (_Pickler_Write(self, "\n", 1) < 0)
2119             goto done;
2120 
2121         result = 0;
2122 done:
2123         PyMem_Free(buf);
2124         return result;
2125     }
2126 
2127     return 0;
2128 }
2129 
2130 /* Perform direct write of the header and payload of the binary object.
2131 
2132    The large contiguous data is written directly into the underlying file
2133    object, bypassing the output_buffer of the Pickler.  We intentionally
2134    do not insert a protocol 4 frame opcode to make it possible to optimize
2135    file.read calls in the loader.
2136  */
2137 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2138 _Pickler_write_bytes(PicklerObject *self,
2139                      const char *header, Py_ssize_t header_size,
2140                      const char *data, Py_ssize_t data_size,
2141                      PyObject *payload)
2142 {
2143     int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2144     int framing = self->framing;
2145 
2146     if (bypass_buffer) {
2147         assert(self->output_buffer != NULL);
2148         /* Commit the previous frame. */
2149         if (_Pickler_CommitFrame(self)) {
2150             return -1;
2151         }
2152         /* Disable framing temporarily */
2153         self->framing = 0;
2154     }
2155 
2156     if (_Pickler_Write(self, header, header_size) < 0) {
2157         return -1;
2158     }
2159 
2160     if (bypass_buffer && self->write != NULL) {
2161         /* Bypass the in-memory buffer to directly stream large data
2162            into the underlying file object. */
2163         PyObject *result, *mem = NULL;
2164         /* Dump the output buffer to the file. */
2165         if (_Pickler_FlushToFile(self) < 0) {
2166             return -1;
2167         }
2168 
2169         /* Stream write the payload into the file without going through the
2170            output buffer. */
2171         if (payload == NULL) {
2172             /* TODO: It would be better to use a memoryview with a linked
2173                original string if this is possible. */
2174             payload = mem = PyBytes_FromStringAndSize(data, data_size);
2175             if (payload == NULL) {
2176                 return -1;
2177             }
2178         }
2179         result = PyObject_CallFunctionObjArgs(self->write, payload, NULL);
2180         Py_XDECREF(mem);
2181         if (result == NULL) {
2182             return -1;
2183         }
2184         Py_DECREF(result);
2185 
2186         /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2187         if (_Pickler_ClearBuffer(self) < 0) {
2188             return -1;
2189         }
2190     }
2191     else {
2192         if (_Pickler_Write(self, data, data_size) < 0) {
2193             return -1;
2194         }
2195     }
2196 
2197     /* Re-enable framing for subsequent calls to _Pickler_Write. */
2198     self->framing = framing;
2199 
2200     return 0;
2201 }
2202 
2203 static int
save_bytes(PicklerObject * self,PyObject * obj)2204 save_bytes(PicklerObject *self, PyObject *obj)
2205 {
2206     if (self->proto < 3) {
2207         /* Older pickle protocols do not have an opcode for pickling bytes
2208            objects. Therefore, we need to fake the copy protocol (i.e.,
2209            the __reduce__ method) to permit bytes object unpickling.
2210 
2211            Here we use a hack to be compatible with Python 2. Since in Python
2212            2 'bytes' is just an alias for 'str' (which has different
2213            parameters than the actual bytes object), we use codecs.encode
2214            to create the appropriate 'str' object when unpickled using
2215            Python 2 *and* the appropriate 'bytes' object when unpickled
2216            using Python 3. Again this is a hack and we don't need to do this
2217            with newer protocols. */
2218         PyObject *reduce_value = NULL;
2219         int status;
2220 
2221         if (PyBytes_GET_SIZE(obj) == 0) {
2222             reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2223         }
2224         else {
2225             PickleState *st = _Pickle_GetGlobalState();
2226             PyObject *unicode_str =
2227                 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2228                                        PyBytes_GET_SIZE(obj),
2229                                        "strict");
2230             _Py_IDENTIFIER(latin1);
2231 
2232             if (unicode_str == NULL)
2233                 return -1;
2234             reduce_value = Py_BuildValue("(O(OO))",
2235                                          st->codecs_encode, unicode_str,
2236                                          _PyUnicode_FromId(&PyId_latin1));
2237             Py_DECREF(unicode_str);
2238         }
2239 
2240         if (reduce_value == NULL)
2241             return -1;
2242 
2243         /* save_reduce() will memoize the object automatically. */
2244         status = save_reduce(self, reduce_value, obj);
2245         Py_DECREF(reduce_value);
2246         return status;
2247     }
2248     else {
2249         Py_ssize_t size;
2250         char header[9];
2251         Py_ssize_t len;
2252 
2253         size = PyBytes_GET_SIZE(obj);
2254         if (size < 0)
2255             return -1;
2256 
2257         if (size <= 0xff) {
2258             header[0] = SHORT_BINBYTES;
2259             header[1] = (unsigned char)size;
2260             len = 2;
2261         }
2262         else if ((size_t)size <= 0xffffffffUL) {
2263             header[0] = BINBYTES;
2264             header[1] = (unsigned char)(size & 0xff);
2265             header[2] = (unsigned char)((size >> 8) & 0xff);
2266             header[3] = (unsigned char)((size >> 16) & 0xff);
2267             header[4] = (unsigned char)((size >> 24) & 0xff);
2268             len = 5;
2269         }
2270         else if (self->proto >= 4) {
2271             header[0] = BINBYTES8;
2272             _write_size64(header + 1, size);
2273             len = 9;
2274         }
2275         else {
2276             PyErr_SetString(PyExc_OverflowError,
2277                             "cannot serialize a bytes object larger than 4 GiB");
2278             return -1;          /* string too large */
2279         }
2280 
2281         if (_Pickler_write_bytes(self, header, len,
2282                                  PyBytes_AS_STRING(obj), size, obj) < 0)
2283         {
2284             return -1;
2285         }
2286 
2287         if (memo_put(self, obj) < 0)
2288             return -1;
2289 
2290         return 0;
2291     }
2292 }
2293 
2294 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2295    backslash and newline characters to \uXXXX escapes. */
2296 static PyObject *
raw_unicode_escape(PyObject * obj)2297 raw_unicode_escape(PyObject *obj)
2298 {
2299     char *p;
2300     Py_ssize_t i, size;
2301     void *data;
2302     unsigned int kind;
2303     _PyBytesWriter writer;
2304 
2305     if (PyUnicode_READY(obj))
2306         return NULL;
2307 
2308     _PyBytesWriter_Init(&writer);
2309 
2310     size = PyUnicode_GET_LENGTH(obj);
2311     data = PyUnicode_DATA(obj);
2312     kind = PyUnicode_KIND(obj);
2313 
2314     p = _PyBytesWriter_Alloc(&writer, size);
2315     if (p == NULL)
2316         goto error;
2317     writer.overallocate = 1;
2318 
2319     for (i=0; i < size; i++) {
2320         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2321         /* Map 32-bit characters to '\Uxxxxxxxx' */
2322         if (ch >= 0x10000) {
2323             /* -1: subtract 1 preallocated byte */
2324             p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2325             if (p == NULL)
2326                 goto error;
2327 
2328             *p++ = '\\';
2329             *p++ = 'U';
2330             *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2331             *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2332             *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2333             *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2334             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2335             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2336             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2337             *p++ = Py_hexdigits[ch & 15];
2338         }
2339         /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2340         else if (ch >= 256 ||
2341                  ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2342                  ch == 0x1a)
2343         {
2344             /* -1: subtract 1 preallocated byte */
2345             p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2346             if (p == NULL)
2347                 goto error;
2348 
2349             *p++ = '\\';
2350             *p++ = 'u';
2351             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2352             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2353             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2354             *p++ = Py_hexdigits[ch & 15];
2355         }
2356         /* Copy everything else as-is */
2357         else
2358             *p++ = (char) ch;
2359     }
2360 
2361     return _PyBytesWriter_Finish(&writer, p);
2362 
2363 error:
2364     _PyBytesWriter_Dealloc(&writer);
2365     return NULL;
2366 }
2367 
2368 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2369 write_unicode_binary(PicklerObject *self, PyObject *obj)
2370 {
2371     char header[9];
2372     Py_ssize_t len;
2373     PyObject *encoded = NULL;
2374     Py_ssize_t size;
2375     const char *data;
2376 
2377     if (PyUnicode_READY(obj))
2378         return -1;
2379 
2380     data = PyUnicode_AsUTF8AndSize(obj, &size);
2381     if (data == NULL) {
2382         /* Issue #8383: for strings with lone surrogates, fallback on the
2383            "surrogatepass" error handler. */
2384         PyErr_Clear();
2385         encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2386         if (encoded == NULL)
2387             return -1;
2388 
2389         data = PyBytes_AS_STRING(encoded);
2390         size = PyBytes_GET_SIZE(encoded);
2391     }
2392 
2393     assert(size >= 0);
2394     if (size <= 0xff && self->proto >= 4) {
2395         header[0] = SHORT_BINUNICODE;
2396         header[1] = (unsigned char)(size & 0xff);
2397         len = 2;
2398     }
2399     else if ((size_t)size <= 0xffffffffUL) {
2400         header[0] = BINUNICODE;
2401         header[1] = (unsigned char)(size & 0xff);
2402         header[2] = (unsigned char)((size >> 8) & 0xff);
2403         header[3] = (unsigned char)((size >> 16) & 0xff);
2404         header[4] = (unsigned char)((size >> 24) & 0xff);
2405         len = 5;
2406     }
2407     else if (self->proto >= 4) {
2408         header[0] = BINUNICODE8;
2409         _write_size64(header + 1, size);
2410         len = 9;
2411     }
2412     else {
2413         PyErr_SetString(PyExc_OverflowError,
2414                         "cannot serialize a string larger than 4GiB");
2415         Py_XDECREF(encoded);
2416         return -1;
2417     }
2418 
2419     if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2420         Py_XDECREF(encoded);
2421         return -1;
2422     }
2423     Py_XDECREF(encoded);
2424     return 0;
2425 }
2426 
2427 static int
save_unicode(PicklerObject * self,PyObject * obj)2428 save_unicode(PicklerObject *self, PyObject *obj)
2429 {
2430     if (self->bin) {
2431         if (write_unicode_binary(self, obj) < 0)
2432             return -1;
2433     }
2434     else {
2435         PyObject *encoded;
2436         Py_ssize_t size;
2437         const char unicode_op = UNICODE;
2438 
2439         encoded = raw_unicode_escape(obj);
2440         if (encoded == NULL)
2441             return -1;
2442 
2443         if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2444             Py_DECREF(encoded);
2445             return -1;
2446         }
2447 
2448         size = PyBytes_GET_SIZE(encoded);
2449         if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2450             Py_DECREF(encoded);
2451             return -1;
2452         }
2453         Py_DECREF(encoded);
2454 
2455         if (_Pickler_Write(self, "\n", 1) < 0)
2456             return -1;
2457     }
2458     if (memo_put(self, obj) < 0)
2459         return -1;
2460 
2461     return 0;
2462 }
2463 
2464 /* A helper for save_tuple.  Push the len elements in tuple t on the stack. */
2465 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2466 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2467 {
2468     Py_ssize_t i;
2469 
2470     assert(PyTuple_Size(t) == len);
2471 
2472     for (i = 0; i < len; i++) {
2473         PyObject *element = PyTuple_GET_ITEM(t, i);
2474 
2475         if (element == NULL)
2476             return -1;
2477         if (save(self, element, 0) < 0)
2478             return -1;
2479     }
2480 
2481     return 0;
2482 }
2483 
2484 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2485  * used across protocols to minimize the space needed to pickle them.
2486  * Tuples are also the only builtin immutable type that can be recursive
2487  * (a tuple can be reached from itself), and that requires some subtle
2488  * magic so that it works in all cases.  IOW, this is a long routine.
2489  */
2490 static int
save_tuple(PicklerObject * self,PyObject * obj)2491 save_tuple(PicklerObject *self, PyObject *obj)
2492 {
2493     Py_ssize_t len, i;
2494 
2495     const char mark_op = MARK;
2496     const char tuple_op = TUPLE;
2497     const char pop_op = POP;
2498     const char pop_mark_op = POP_MARK;
2499     const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2500 
2501     if ((len = PyTuple_Size(obj)) < 0)
2502         return -1;
2503 
2504     if (len == 0) {
2505         char pdata[2];
2506 
2507         if (self->proto) {
2508             pdata[0] = EMPTY_TUPLE;
2509             len = 1;
2510         }
2511         else {
2512             pdata[0] = MARK;
2513             pdata[1] = TUPLE;
2514             len = 2;
2515         }
2516         if (_Pickler_Write(self, pdata, len) < 0)
2517             return -1;
2518         return 0;
2519     }
2520 
2521     /* The tuple isn't in the memo now.  If it shows up there after
2522      * saving the tuple elements, the tuple must be recursive, in
2523      * which case we'll pop everything we put on the stack, and fetch
2524      * its value from the memo.
2525      */
2526     if (len <= 3 && self->proto >= 2) {
2527         /* Use TUPLE{1,2,3} opcodes. */
2528         if (store_tuple_elements(self, obj, len) < 0)
2529             return -1;
2530 
2531         if (PyMemoTable_Get(self->memo, obj)) {
2532             /* pop the len elements */
2533             for (i = 0; i < len; i++)
2534                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2535                     return -1;
2536             /* fetch from memo */
2537             if (memo_get(self, obj) < 0)
2538                 return -1;
2539 
2540             return 0;
2541         }
2542         else { /* Not recursive. */
2543             if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2544                 return -1;
2545         }
2546         goto memoize;
2547     }
2548 
2549     /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2550      * Generate MARK e1 e2 ... TUPLE
2551      */
2552     if (_Pickler_Write(self, &mark_op, 1) < 0)
2553         return -1;
2554 
2555     if (store_tuple_elements(self, obj, len) < 0)
2556         return -1;
2557 
2558     if (PyMemoTable_Get(self->memo, obj)) {
2559         /* pop the stack stuff we pushed */
2560         if (self->bin) {
2561             if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2562                 return -1;
2563         }
2564         else {
2565             /* Note that we pop one more than len, to remove
2566              * the MARK too.
2567              */
2568             for (i = 0; i <= len; i++)
2569                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2570                     return -1;
2571         }
2572         /* fetch from memo */
2573         if (memo_get(self, obj) < 0)
2574             return -1;
2575 
2576         return 0;
2577     }
2578     else { /* Not recursive. */
2579         if (_Pickler_Write(self, &tuple_op, 1) < 0)
2580             return -1;
2581     }
2582 
2583   memoize:
2584     if (memo_put(self, obj) < 0)
2585         return -1;
2586 
2587     return 0;
2588 }
2589 
2590 /* iter is an iterator giving items, and we batch up chunks of
2591  *     MARK item item ... item APPENDS
2592  * opcode sequences.  Calling code should have arranged to first create an
2593  * empty list, or list-like object, for the APPENDS to operate on.
2594  * Returns 0 on success, <0 on error.
2595  */
2596 static int
batch_list(PicklerObject * self,PyObject * iter)2597 batch_list(PicklerObject *self, PyObject *iter)
2598 {
2599     PyObject *obj = NULL;
2600     PyObject *firstitem = NULL;
2601     int i, n;
2602 
2603     const char mark_op = MARK;
2604     const char append_op = APPEND;
2605     const char appends_op = APPENDS;
2606 
2607     assert(iter != NULL);
2608 
2609     /* XXX: I think this function could be made faster by avoiding the
2610        iterator interface and fetching objects directly from list using
2611        PyList_GET_ITEM.
2612     */
2613 
2614     if (self->proto == 0) {
2615         /* APPENDS isn't available; do one at a time. */
2616         for (;;) {
2617             obj = PyIter_Next(iter);
2618             if (obj == NULL) {
2619                 if (PyErr_Occurred())
2620                     return -1;
2621                 break;
2622             }
2623             i = save(self, obj, 0);
2624             Py_DECREF(obj);
2625             if (i < 0)
2626                 return -1;
2627             if (_Pickler_Write(self, &append_op, 1) < 0)
2628                 return -1;
2629         }
2630         return 0;
2631     }
2632 
2633     /* proto > 0:  write in batches of BATCHSIZE. */
2634     do {
2635         /* Get first item */
2636         firstitem = PyIter_Next(iter);
2637         if (firstitem == NULL) {
2638             if (PyErr_Occurred())
2639                 goto error;
2640 
2641             /* nothing more to add */
2642             break;
2643         }
2644 
2645         /* Try to get a second item */
2646         obj = PyIter_Next(iter);
2647         if (obj == NULL) {
2648             if (PyErr_Occurred())
2649                 goto error;
2650 
2651             /* Only one item to write */
2652             if (save(self, firstitem, 0) < 0)
2653                 goto error;
2654             if (_Pickler_Write(self, &append_op, 1) < 0)
2655                 goto error;
2656             Py_CLEAR(firstitem);
2657             break;
2658         }
2659 
2660         /* More than one item to write */
2661 
2662         /* Pump out MARK, items, APPENDS. */
2663         if (_Pickler_Write(self, &mark_op, 1) < 0)
2664             goto error;
2665 
2666         if (save(self, firstitem, 0) < 0)
2667             goto error;
2668         Py_CLEAR(firstitem);
2669         n = 1;
2670 
2671         /* Fetch and save up to BATCHSIZE items */
2672         while (obj) {
2673             if (save(self, obj, 0) < 0)
2674                 goto error;
2675             Py_CLEAR(obj);
2676             n += 1;
2677 
2678             if (n == BATCHSIZE)
2679                 break;
2680 
2681             obj = PyIter_Next(iter);
2682             if (obj == NULL) {
2683                 if (PyErr_Occurred())
2684                     goto error;
2685                 break;
2686             }
2687         }
2688 
2689         if (_Pickler_Write(self, &appends_op, 1) < 0)
2690             goto error;
2691 
2692     } while (n == BATCHSIZE);
2693     return 0;
2694 
2695   error:
2696     Py_XDECREF(firstitem);
2697     Py_XDECREF(obj);
2698     return -1;
2699 }
2700 
2701 /* This is a variant of batch_list() above, specialized for lists (with no
2702  * support for list subclasses). Like batch_list(), we batch up chunks of
2703  *     MARK item item ... item APPENDS
2704  * opcode sequences.  Calling code should have arranged to first create an
2705  * empty list, or list-like object, for the APPENDS to operate on.
2706  * Returns 0 on success, -1 on error.
2707  *
2708  * This version is considerably faster than batch_list(), if less general.
2709  *
2710  * Note that this only works for protocols > 0.
2711  */
2712 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2713 batch_list_exact(PicklerObject *self, PyObject *obj)
2714 {
2715     PyObject *item = NULL;
2716     Py_ssize_t this_batch, total;
2717 
2718     const char append_op = APPEND;
2719     const char appends_op = APPENDS;
2720     const char mark_op = MARK;
2721 
2722     assert(obj != NULL);
2723     assert(self->proto > 0);
2724     assert(PyList_CheckExact(obj));
2725 
2726     if (PyList_GET_SIZE(obj) == 1) {
2727         item = PyList_GET_ITEM(obj, 0);
2728         if (save(self, item, 0) < 0)
2729             return -1;
2730         if (_Pickler_Write(self, &append_op, 1) < 0)
2731             return -1;
2732         return 0;
2733     }
2734 
2735     /* Write in batches of BATCHSIZE. */
2736     total = 0;
2737     do {
2738         this_batch = 0;
2739         if (_Pickler_Write(self, &mark_op, 1) < 0)
2740             return -1;
2741         while (total < PyList_GET_SIZE(obj)) {
2742             item = PyList_GET_ITEM(obj, total);
2743             if (save(self, item, 0) < 0)
2744                 return -1;
2745             total++;
2746             if (++this_batch == BATCHSIZE)
2747                 break;
2748         }
2749         if (_Pickler_Write(self, &appends_op, 1) < 0)
2750             return -1;
2751 
2752     } while (total < PyList_GET_SIZE(obj));
2753 
2754     return 0;
2755 }
2756 
2757 static int
save_list(PicklerObject * self,PyObject * obj)2758 save_list(PicklerObject *self, PyObject *obj)
2759 {
2760     char header[3];
2761     Py_ssize_t len;
2762     int status = 0;
2763 
2764     if (self->fast && !fast_save_enter(self, obj))
2765         goto error;
2766 
2767     /* Create an empty list. */
2768     if (self->bin) {
2769         header[0] = EMPTY_LIST;
2770         len = 1;
2771     }
2772     else {
2773         header[0] = MARK;
2774         header[1] = LIST;
2775         len = 2;
2776     }
2777 
2778     if (_Pickler_Write(self, header, len) < 0)
2779         goto error;
2780 
2781     /* Get list length, and bow out early if empty. */
2782     if ((len = PyList_Size(obj)) < 0)
2783         goto error;
2784 
2785     if (memo_put(self, obj) < 0)
2786         goto error;
2787 
2788     if (len != 0) {
2789         /* Materialize the list elements. */
2790         if (PyList_CheckExact(obj) && self->proto > 0) {
2791             if (Py_EnterRecursiveCall(" while pickling an object"))
2792                 goto error;
2793             status = batch_list_exact(self, obj);
2794             Py_LeaveRecursiveCall();
2795         } else {
2796             PyObject *iter = PyObject_GetIter(obj);
2797             if (iter == NULL)
2798                 goto error;
2799 
2800             if (Py_EnterRecursiveCall(" while pickling an object")) {
2801                 Py_DECREF(iter);
2802                 goto error;
2803             }
2804             status = batch_list(self, iter);
2805             Py_LeaveRecursiveCall();
2806             Py_DECREF(iter);
2807         }
2808     }
2809     if (0) {
2810   error:
2811         status = -1;
2812     }
2813 
2814     if (self->fast && !fast_save_leave(self, obj))
2815         status = -1;
2816 
2817     return status;
2818 }
2819 
2820 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2821  *     MARK key value ... key value SETITEMS
2822  * opcode sequences.  Calling code should have arranged to first create an
2823  * empty dict, or dict-like object, for the SETITEMS to operate on.
2824  * Returns 0 on success, <0 on error.
2825  *
2826  * This is very much like batch_list().  The difference between saving
2827  * elements directly, and picking apart two-tuples, is so long-winded at
2828  * the C level, though, that attempts to combine these routines were too
2829  * ugly to bear.
2830  */
2831 static int
batch_dict(PicklerObject * self,PyObject * iter)2832 batch_dict(PicklerObject *self, PyObject *iter)
2833 {
2834     PyObject *obj = NULL;
2835     PyObject *firstitem = NULL;
2836     int i, n;
2837 
2838     const char mark_op = MARK;
2839     const char setitem_op = SETITEM;
2840     const char setitems_op = SETITEMS;
2841 
2842     assert(iter != NULL);
2843 
2844     if (self->proto == 0) {
2845         /* SETITEMS isn't available; do one at a time. */
2846         for (;;) {
2847             obj = PyIter_Next(iter);
2848             if (obj == NULL) {
2849                 if (PyErr_Occurred())
2850                     return -1;
2851                 break;
2852             }
2853             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2854                 PyErr_SetString(PyExc_TypeError, "dict items "
2855                                 "iterator must return 2-tuples");
2856                 return -1;
2857             }
2858             i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2859             if (i >= 0)
2860                 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2861             Py_DECREF(obj);
2862             if (i < 0)
2863                 return -1;
2864             if (_Pickler_Write(self, &setitem_op, 1) < 0)
2865                 return -1;
2866         }
2867         return 0;
2868     }
2869 
2870     /* proto > 0:  write in batches of BATCHSIZE. */
2871     do {
2872         /* Get first item */
2873         firstitem = PyIter_Next(iter);
2874         if (firstitem == NULL) {
2875             if (PyErr_Occurred())
2876                 goto error;
2877 
2878             /* nothing more to add */
2879             break;
2880         }
2881         if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2882             PyErr_SetString(PyExc_TypeError, "dict items "
2883                                 "iterator must return 2-tuples");
2884             goto error;
2885         }
2886 
2887         /* Try to get a second item */
2888         obj = PyIter_Next(iter);
2889         if (obj == NULL) {
2890             if (PyErr_Occurred())
2891                 goto error;
2892 
2893             /* Only one item to write */
2894             if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2895                 goto error;
2896             if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2897                 goto error;
2898             if (_Pickler_Write(self, &setitem_op, 1) < 0)
2899                 goto error;
2900             Py_CLEAR(firstitem);
2901             break;
2902         }
2903 
2904         /* More than one item to write */
2905 
2906         /* Pump out MARK, items, SETITEMS. */
2907         if (_Pickler_Write(self, &mark_op, 1) < 0)
2908             goto error;
2909 
2910         if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2911             goto error;
2912         if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2913             goto error;
2914         Py_CLEAR(firstitem);
2915         n = 1;
2916 
2917         /* Fetch and save up to BATCHSIZE items */
2918         while (obj) {
2919             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2920                 PyErr_SetString(PyExc_TypeError, "dict items "
2921                     "iterator must return 2-tuples");
2922                 goto error;
2923             }
2924             if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2925                 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2926                 goto error;
2927             Py_CLEAR(obj);
2928             n += 1;
2929 
2930             if (n == BATCHSIZE)
2931                 break;
2932 
2933             obj = PyIter_Next(iter);
2934             if (obj == NULL) {
2935                 if (PyErr_Occurred())
2936                     goto error;
2937                 break;
2938             }
2939         }
2940 
2941         if (_Pickler_Write(self, &setitems_op, 1) < 0)
2942             goto error;
2943 
2944     } while (n == BATCHSIZE);
2945     return 0;
2946 
2947   error:
2948     Py_XDECREF(firstitem);
2949     Py_XDECREF(obj);
2950     return -1;
2951 }
2952 
2953 /* This is a variant of batch_dict() above that specializes for dicts, with no
2954  * support for dict subclasses. Like batch_dict(), we batch up chunks of
2955  *     MARK key value ... key value SETITEMS
2956  * opcode sequences.  Calling code should have arranged to first create an
2957  * empty dict, or dict-like object, for the SETITEMS to operate on.
2958  * Returns 0 on success, -1 on error.
2959  *
2960  * Note that this currently doesn't work for protocol 0.
2961  */
2962 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)2963 batch_dict_exact(PicklerObject *self, PyObject *obj)
2964 {
2965     PyObject *key = NULL, *value = NULL;
2966     int i;
2967     Py_ssize_t dict_size, ppos = 0;
2968 
2969     const char mark_op = MARK;
2970     const char setitem_op = SETITEM;
2971     const char setitems_op = SETITEMS;
2972 
2973     assert(obj != NULL && PyDict_CheckExact(obj));
2974     assert(self->proto > 0);
2975 
2976     dict_size = PyDict_GET_SIZE(obj);
2977 
2978     /* Special-case len(d) == 1 to save space. */
2979     if (dict_size == 1) {
2980         PyDict_Next(obj, &ppos, &key, &value);
2981         if (save(self, key, 0) < 0)
2982             return -1;
2983         if (save(self, value, 0) < 0)
2984             return -1;
2985         if (_Pickler_Write(self, &setitem_op, 1) < 0)
2986             return -1;
2987         return 0;
2988     }
2989 
2990     /* Write in batches of BATCHSIZE. */
2991     do {
2992         i = 0;
2993         if (_Pickler_Write(self, &mark_op, 1) < 0)
2994             return -1;
2995         while (PyDict_Next(obj, &ppos, &key, &value)) {
2996             if (save(self, key, 0) < 0)
2997                 return -1;
2998             if (save(self, value, 0) < 0)
2999                 return -1;
3000             if (++i == BATCHSIZE)
3001                 break;
3002         }
3003         if (_Pickler_Write(self, &setitems_op, 1) < 0)
3004             return -1;
3005         if (PyDict_GET_SIZE(obj) != dict_size) {
3006             PyErr_Format(
3007                 PyExc_RuntimeError,
3008                 "dictionary changed size during iteration");
3009             return -1;
3010         }
3011 
3012     } while (i == BATCHSIZE);
3013     return 0;
3014 }
3015 
3016 static int
save_dict(PicklerObject * self,PyObject * obj)3017 save_dict(PicklerObject *self, PyObject *obj)
3018 {
3019     PyObject *items, *iter;
3020     char header[3];
3021     Py_ssize_t len;
3022     int status = 0;
3023     assert(PyDict_Check(obj));
3024 
3025     if (self->fast && !fast_save_enter(self, obj))
3026         goto error;
3027 
3028     /* Create an empty dict. */
3029     if (self->bin) {
3030         header[0] = EMPTY_DICT;
3031         len = 1;
3032     }
3033     else {
3034         header[0] = MARK;
3035         header[1] = DICT;
3036         len = 2;
3037     }
3038 
3039     if (_Pickler_Write(self, header, len) < 0)
3040         goto error;
3041 
3042     if (memo_put(self, obj) < 0)
3043         goto error;
3044 
3045     if (PyDict_GET_SIZE(obj)) {
3046         /* Save the dict items. */
3047         if (PyDict_CheckExact(obj) && self->proto > 0) {
3048             /* We can take certain shortcuts if we know this is a dict and
3049                not a dict subclass. */
3050             if (Py_EnterRecursiveCall(" while pickling an object"))
3051                 goto error;
3052             status = batch_dict_exact(self, obj);
3053             Py_LeaveRecursiveCall();
3054         } else {
3055             _Py_IDENTIFIER(items);
3056 
3057             items = _PyObject_CallMethodId(obj, &PyId_items, NULL);
3058             if (items == NULL)
3059                 goto error;
3060             iter = PyObject_GetIter(items);
3061             Py_DECREF(items);
3062             if (iter == NULL)
3063                 goto error;
3064             if (Py_EnterRecursiveCall(" while pickling an object")) {
3065                 Py_DECREF(iter);
3066                 goto error;
3067             }
3068             status = batch_dict(self, iter);
3069             Py_LeaveRecursiveCall();
3070             Py_DECREF(iter);
3071         }
3072     }
3073 
3074     if (0) {
3075   error:
3076         status = -1;
3077     }
3078 
3079     if (self->fast && !fast_save_leave(self, obj))
3080         status = -1;
3081 
3082     return status;
3083 }
3084 
3085 static int
save_set(PicklerObject * self,PyObject * obj)3086 save_set(PicklerObject *self, PyObject *obj)
3087 {
3088     PyObject *item;
3089     int i;
3090     Py_ssize_t set_size, ppos = 0;
3091     Py_hash_t hash;
3092 
3093     const char empty_set_op = EMPTY_SET;
3094     const char mark_op = MARK;
3095     const char additems_op = ADDITEMS;
3096 
3097     if (self->proto < 4) {
3098         PyObject *items;
3099         PyObject *reduce_value;
3100         int status;
3101 
3102         items = PySequence_List(obj);
3103         if (items == NULL) {
3104             return -1;
3105         }
3106         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3107         Py_DECREF(items);
3108         if (reduce_value == NULL) {
3109             return -1;
3110         }
3111         /* save_reduce() will memoize the object automatically. */
3112         status = save_reduce(self, reduce_value, obj);
3113         Py_DECREF(reduce_value);
3114         return status;
3115     }
3116 
3117     if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3118         return -1;
3119 
3120     if (memo_put(self, obj) < 0)
3121         return -1;
3122 
3123     set_size = PySet_GET_SIZE(obj);
3124     if (set_size == 0)
3125         return 0;  /* nothing to do */
3126 
3127     /* Write in batches of BATCHSIZE. */
3128     do {
3129         i = 0;
3130         if (_Pickler_Write(self, &mark_op, 1) < 0)
3131             return -1;
3132         while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3133             if (save(self, item, 0) < 0)
3134                 return -1;
3135             if (++i == BATCHSIZE)
3136                 break;
3137         }
3138         if (_Pickler_Write(self, &additems_op, 1) < 0)
3139             return -1;
3140         if (PySet_GET_SIZE(obj) != set_size) {
3141             PyErr_Format(
3142                 PyExc_RuntimeError,
3143                 "set changed size during iteration");
3144             return -1;
3145         }
3146     } while (i == BATCHSIZE);
3147 
3148     return 0;
3149 }
3150 
3151 static int
save_frozenset(PicklerObject * self,PyObject * obj)3152 save_frozenset(PicklerObject *self, PyObject *obj)
3153 {
3154     PyObject *iter;
3155 
3156     const char mark_op = MARK;
3157     const char frozenset_op = FROZENSET;
3158 
3159     if (self->fast && !fast_save_enter(self, obj))
3160         return -1;
3161 
3162     if (self->proto < 4) {
3163         PyObject *items;
3164         PyObject *reduce_value;
3165         int status;
3166 
3167         items = PySequence_List(obj);
3168         if (items == NULL) {
3169             return -1;
3170         }
3171         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3172                                      items);
3173         Py_DECREF(items);
3174         if (reduce_value == NULL) {
3175             return -1;
3176         }
3177         /* save_reduce() will memoize the object automatically. */
3178         status = save_reduce(self, reduce_value, obj);
3179         Py_DECREF(reduce_value);
3180         return status;
3181     }
3182 
3183     if (_Pickler_Write(self, &mark_op, 1) < 0)
3184         return -1;
3185 
3186     iter = PyObject_GetIter(obj);
3187     if (iter == NULL) {
3188         return -1;
3189     }
3190     for (;;) {
3191         PyObject *item;
3192 
3193         item = PyIter_Next(iter);
3194         if (item == NULL) {
3195             if (PyErr_Occurred()) {
3196                 Py_DECREF(iter);
3197                 return -1;
3198             }
3199             break;
3200         }
3201         if (save(self, item, 0) < 0) {
3202             Py_DECREF(item);
3203             Py_DECREF(iter);
3204             return -1;
3205         }
3206         Py_DECREF(item);
3207     }
3208     Py_DECREF(iter);
3209 
3210     /* If the object is already in the memo, this means it is
3211        recursive. In this case, throw away everything we put on the
3212        stack, and fetch the object back from the memo. */
3213     if (PyMemoTable_Get(self->memo, obj)) {
3214         const char pop_mark_op = POP_MARK;
3215 
3216         if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3217             return -1;
3218         if (memo_get(self, obj) < 0)
3219             return -1;
3220         return 0;
3221     }
3222 
3223     if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3224         return -1;
3225     if (memo_put(self, obj) < 0)
3226         return -1;
3227 
3228     return 0;
3229 }
3230 
3231 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3232 fix_imports(PyObject **module_name, PyObject **global_name)
3233 {
3234     PyObject *key;
3235     PyObject *item;
3236     PickleState *st = _Pickle_GetGlobalState();
3237 
3238     key = PyTuple_Pack(2, *module_name, *global_name);
3239     if (key == NULL)
3240         return -1;
3241     item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3242     Py_DECREF(key);
3243     if (item) {
3244         PyObject *fixed_module_name;
3245         PyObject *fixed_global_name;
3246 
3247         if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3248             PyErr_Format(PyExc_RuntimeError,
3249                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3250                          "should be 2-tuples, not %.200s",
3251                          Py_TYPE(item)->tp_name);
3252             return -1;
3253         }
3254         fixed_module_name = PyTuple_GET_ITEM(item, 0);
3255         fixed_global_name = PyTuple_GET_ITEM(item, 1);
3256         if (!PyUnicode_Check(fixed_module_name) ||
3257             !PyUnicode_Check(fixed_global_name)) {
3258             PyErr_Format(PyExc_RuntimeError,
3259                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3260                          "should be pairs of str, not (%.200s, %.200s)",
3261                          Py_TYPE(fixed_module_name)->tp_name,
3262                          Py_TYPE(fixed_global_name)->tp_name);
3263             return -1;
3264         }
3265 
3266         Py_CLEAR(*module_name);
3267         Py_CLEAR(*global_name);
3268         Py_INCREF(fixed_module_name);
3269         Py_INCREF(fixed_global_name);
3270         *module_name = fixed_module_name;
3271         *global_name = fixed_global_name;
3272         return 0;
3273     }
3274     else if (PyErr_Occurred()) {
3275         return -1;
3276     }
3277 
3278     item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3279     if (item) {
3280         if (!PyUnicode_Check(item)) {
3281             PyErr_Format(PyExc_RuntimeError,
3282                          "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3283                          "should be strings, not %.200s",
3284                          Py_TYPE(item)->tp_name);
3285             return -1;
3286         }
3287         Py_INCREF(item);
3288         Py_XSETREF(*module_name, item);
3289     }
3290     else if (PyErr_Occurred()) {
3291         return -1;
3292     }
3293 
3294     return 0;
3295 }
3296 
3297 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3298 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3299 {
3300     PyObject *global_name = NULL;
3301     PyObject *module_name = NULL;
3302     PyObject *module = NULL;
3303     PyObject *parent = NULL;
3304     PyObject *dotted_path = NULL;
3305     PyObject *lastname = NULL;
3306     PyObject *cls;
3307     PickleState *st = _Pickle_GetGlobalState();
3308     int status = 0;
3309     _Py_IDENTIFIER(__name__);
3310     _Py_IDENTIFIER(__qualname__);
3311 
3312     const char global_op = GLOBAL;
3313 
3314     if (name) {
3315         Py_INCREF(name);
3316         global_name = name;
3317     }
3318     else {
3319         if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3320             goto error;
3321         if (global_name == NULL) {
3322             global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3323             if (global_name == NULL)
3324                 goto error;
3325         }
3326     }
3327 
3328     dotted_path = get_dotted_path(module, global_name);
3329     if (dotted_path == NULL)
3330         goto error;
3331     module_name = whichmodule(obj, dotted_path);
3332     if (module_name == NULL)
3333         goto error;
3334 
3335     /* XXX: Change to use the import C API directly with level=0 to disallow
3336        relative imports.
3337 
3338        XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3339        builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3340        custom import functions (IMHO, this would be a nice security
3341        feature). The import C API would need to be extended to support the
3342        extra parameters of __import__ to fix that. */
3343     module = PyImport_Import(module_name);
3344     if (module == NULL) {
3345         PyErr_Format(st->PicklingError,
3346                      "Can't pickle %R: import of module %R failed",
3347                      obj, module_name);
3348         goto error;
3349     }
3350     lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3351     Py_INCREF(lastname);
3352     cls = get_deep_attribute(module, dotted_path, &parent);
3353     Py_CLEAR(dotted_path);
3354     if (cls == NULL) {
3355         PyErr_Format(st->PicklingError,
3356                      "Can't pickle %R: attribute lookup %S on %S failed",
3357                      obj, global_name, module_name);
3358         goto error;
3359     }
3360     if (cls != obj) {
3361         Py_DECREF(cls);
3362         PyErr_Format(st->PicklingError,
3363                      "Can't pickle %R: it's not the same object as %S.%S",
3364                      obj, module_name, global_name);
3365         goto error;
3366     }
3367     Py_DECREF(cls);
3368 
3369     if (self->proto >= 2) {
3370         /* See whether this is in the extension registry, and if
3371          * so generate an EXT opcode.
3372          */
3373         PyObject *extension_key;
3374         PyObject *code_obj;      /* extension code as Python object */
3375         long code;               /* extension code as C value */
3376         char pdata[5];
3377         Py_ssize_t n;
3378 
3379         extension_key = PyTuple_Pack(2, module_name, global_name);
3380         if (extension_key == NULL) {
3381             goto error;
3382         }
3383         code_obj = PyDict_GetItemWithError(st->extension_registry,
3384                                            extension_key);
3385         Py_DECREF(extension_key);
3386         /* The object is not registered in the extension registry.
3387            This is the most likely code path. */
3388         if (code_obj == NULL) {
3389             if (PyErr_Occurred()) {
3390                 goto error;
3391             }
3392             goto gen_global;
3393         }
3394 
3395         /* XXX: pickle.py doesn't check neither the type, nor the range
3396            of the value returned by the extension_registry. It should for
3397            consistency. */
3398 
3399         /* Verify code_obj has the right type and value. */
3400         if (!PyLong_Check(code_obj)) {
3401             PyErr_Format(st->PicklingError,
3402                          "Can't pickle %R: extension code %R isn't an integer",
3403                          obj, code_obj);
3404             goto error;
3405         }
3406         code = PyLong_AS_LONG(code_obj);
3407         if (code <= 0 || code > 0x7fffffffL) {
3408             if (!PyErr_Occurred())
3409                 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3410                              "code %ld is out of range", obj, code);
3411             goto error;
3412         }
3413 
3414         /* Generate an EXT opcode. */
3415         if (code <= 0xff) {
3416             pdata[0] = EXT1;
3417             pdata[1] = (unsigned char)code;
3418             n = 2;
3419         }
3420         else if (code <= 0xffff) {
3421             pdata[0] = EXT2;
3422             pdata[1] = (unsigned char)(code & 0xff);
3423             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3424             n = 3;
3425         }
3426         else {
3427             pdata[0] = EXT4;
3428             pdata[1] = (unsigned char)(code & 0xff);
3429             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3430             pdata[3] = (unsigned char)((code >> 16) & 0xff);
3431             pdata[4] = (unsigned char)((code >> 24) & 0xff);
3432             n = 5;
3433         }
3434 
3435         if (_Pickler_Write(self, pdata, n) < 0)
3436             goto error;
3437     }
3438     else {
3439   gen_global:
3440         if (parent == module) {
3441             Py_INCREF(lastname);
3442             Py_DECREF(global_name);
3443             global_name = lastname;
3444         }
3445         if (self->proto >= 4) {
3446             const char stack_global_op = STACK_GLOBAL;
3447 
3448             if (save(self, module_name, 0) < 0)
3449                 goto error;
3450             if (save(self, global_name, 0) < 0)
3451                 goto error;
3452 
3453             if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3454                 goto error;
3455         }
3456         else if (parent != module) {
3457             PickleState *st = _Pickle_GetGlobalState();
3458             PyObject *reduce_value = Py_BuildValue("(O(OO))",
3459                                         st->getattr, parent, lastname);
3460             if (reduce_value == NULL)
3461                 goto error;
3462             status = save_reduce(self, reduce_value, NULL);
3463             Py_DECREF(reduce_value);
3464             if (status < 0)
3465                 goto error;
3466         }
3467         else {
3468             /* Generate a normal global opcode if we are using a pickle
3469                protocol < 4, or if the object is not registered in the
3470                extension registry. */
3471             PyObject *encoded;
3472             PyObject *(*unicode_encoder)(PyObject *);
3473 
3474             if (_Pickler_Write(self, &global_op, 1) < 0)
3475                 goto error;
3476 
3477             /* For protocol < 3 and if the user didn't request against doing
3478                so, we convert module names to the old 2.x module names. */
3479             if (self->proto < 3 && self->fix_imports) {
3480                 if (fix_imports(&module_name, &global_name) < 0) {
3481                     goto error;
3482                 }
3483             }
3484 
3485             /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3486                both the module name and the global name using UTF-8. We do so
3487                only when we are using the pickle protocol newer than version
3488                3. This is to ensure compatibility with older Unpickler running
3489                on Python 2.x. */
3490             if (self->proto == 3) {
3491                 unicode_encoder = PyUnicode_AsUTF8String;
3492             }
3493             else {
3494                 unicode_encoder = PyUnicode_AsASCIIString;
3495             }
3496             encoded = unicode_encoder(module_name);
3497             if (encoded == NULL) {
3498                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3499                     PyErr_Format(st->PicklingError,
3500                                  "can't pickle module identifier '%S' using "
3501                                  "pickle protocol %i",
3502                                  module_name, self->proto);
3503                 goto error;
3504             }
3505             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3506                                PyBytes_GET_SIZE(encoded)) < 0) {
3507                 Py_DECREF(encoded);
3508                 goto error;
3509             }
3510             Py_DECREF(encoded);
3511             if(_Pickler_Write(self, "\n", 1) < 0)
3512                 goto error;
3513 
3514             /* Save the name of the module. */
3515             encoded = unicode_encoder(global_name);
3516             if (encoded == NULL) {
3517                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3518                     PyErr_Format(st->PicklingError,
3519                                  "can't pickle global identifier '%S' using "
3520                                  "pickle protocol %i",
3521                                  global_name, self->proto);
3522                 goto error;
3523             }
3524             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3525                                PyBytes_GET_SIZE(encoded)) < 0) {
3526                 Py_DECREF(encoded);
3527                 goto error;
3528             }
3529             Py_DECREF(encoded);
3530             if (_Pickler_Write(self, "\n", 1) < 0)
3531                 goto error;
3532         }
3533         /* Memoize the object. */
3534         if (memo_put(self, obj) < 0)
3535             goto error;
3536     }
3537 
3538     if (0) {
3539   error:
3540         status = -1;
3541     }
3542     Py_XDECREF(module_name);
3543     Py_XDECREF(global_name);
3544     Py_XDECREF(module);
3545     Py_XDECREF(parent);
3546     Py_XDECREF(dotted_path);
3547     Py_XDECREF(lastname);
3548 
3549     return status;
3550 }
3551 
3552 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3553 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3554 {
3555     PyObject *reduce_value;
3556     int status;
3557 
3558     reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3559     if (reduce_value == NULL) {
3560         return -1;
3561     }
3562     status = save_reduce(self, reduce_value, obj);
3563     Py_DECREF(reduce_value);
3564     return status;
3565 }
3566 
3567 static int
save_type(PicklerObject * self,PyObject * obj)3568 save_type(PicklerObject *self, PyObject *obj)
3569 {
3570     if (obj == (PyObject *)&_PyNone_Type) {
3571         return save_singleton_type(self, obj, Py_None);
3572     }
3573     else if (obj == (PyObject *)&PyEllipsis_Type) {
3574         return save_singleton_type(self, obj, Py_Ellipsis);
3575     }
3576     else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3577         return save_singleton_type(self, obj, Py_NotImplemented);
3578     }
3579     return save_global(self, obj, NULL);
3580 }
3581 
3582 static int
save_pers(PicklerObject * self,PyObject * obj)3583 save_pers(PicklerObject *self, PyObject *obj)
3584 {
3585     PyObject *pid = NULL;
3586     int status = 0;
3587 
3588     const char persid_op = PERSID;
3589     const char binpersid_op = BINPERSID;
3590 
3591     pid = call_method(self->pers_func, self->pers_func_self, obj);
3592     if (pid == NULL)
3593         return -1;
3594 
3595     if (pid != Py_None) {
3596         if (self->bin) {
3597             if (save(self, pid, 1) < 0 ||
3598                 _Pickler_Write(self, &binpersid_op, 1) < 0)
3599                 goto error;
3600         }
3601         else {
3602             PyObject *pid_str;
3603 
3604             pid_str = PyObject_Str(pid);
3605             if (pid_str == NULL)
3606                 goto error;
3607 
3608             /* XXX: Should it check whether the pid contains embedded
3609                newlines? */
3610             if (!PyUnicode_IS_ASCII(pid_str)) {
3611                 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3612                                 "persistent IDs in protocol 0 must be "
3613                                 "ASCII strings");
3614                 Py_DECREF(pid_str);
3615                 goto error;
3616             }
3617 
3618             if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3619                 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3620                                PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3621                 _Pickler_Write(self, "\n", 1) < 0) {
3622                 Py_DECREF(pid_str);
3623                 goto error;
3624             }
3625             Py_DECREF(pid_str);
3626         }
3627         status = 1;
3628     }
3629 
3630     if (0) {
3631   error:
3632         status = -1;
3633     }
3634     Py_XDECREF(pid);
3635 
3636     return status;
3637 }
3638 
3639 static PyObject *
get_class(PyObject * obj)3640 get_class(PyObject *obj)
3641 {
3642     PyObject *cls;
3643     _Py_IDENTIFIER(__class__);
3644 
3645     if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3646         cls = (PyObject *) Py_TYPE(obj);
3647         Py_INCREF(cls);
3648     }
3649     return cls;
3650 }
3651 
3652 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3653  * appropriate __reduce__ method for obj.
3654  */
3655 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3656 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3657 {
3658     PyObject *callable;
3659     PyObject *argtup;
3660     PyObject *state = NULL;
3661     PyObject *listitems = Py_None;
3662     PyObject *dictitems = Py_None;
3663     PickleState *st = _Pickle_GetGlobalState();
3664     Py_ssize_t size;
3665     int use_newobj = 0, use_newobj_ex = 0;
3666 
3667     const char reduce_op = REDUCE;
3668     const char build_op = BUILD;
3669     const char newobj_op = NEWOBJ;
3670     const char newobj_ex_op = NEWOBJ_EX;
3671 
3672     size = PyTuple_Size(args);
3673     if (size < 2 || size > 5) {
3674         PyErr_SetString(st->PicklingError, "tuple returned by "
3675                         "__reduce__ must contain 2 through 5 elements");
3676         return -1;
3677     }
3678 
3679     if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
3680                            &callable, &argtup, &state, &listitems, &dictitems))
3681         return -1;
3682 
3683     if (!PyCallable_Check(callable)) {
3684         PyErr_SetString(st->PicklingError, "first item of the tuple "
3685                         "returned by __reduce__ must be callable");
3686         return -1;
3687     }
3688     if (!PyTuple_Check(argtup)) {
3689         PyErr_SetString(st->PicklingError, "second item of the tuple "
3690                         "returned by __reduce__ must be a tuple");
3691         return -1;
3692     }
3693 
3694     if (state == Py_None)
3695         state = NULL;
3696 
3697     if (listitems == Py_None)
3698         listitems = NULL;
3699     else if (!PyIter_Check(listitems)) {
3700         PyErr_Format(st->PicklingError, "fourth element of the tuple "
3701                      "returned by __reduce__ must be an iterator, not %s",
3702                      Py_TYPE(listitems)->tp_name);
3703         return -1;
3704     }
3705 
3706     if (dictitems == Py_None)
3707         dictitems = NULL;
3708     else if (!PyIter_Check(dictitems)) {
3709         PyErr_Format(st->PicklingError, "fifth element of the tuple "
3710                      "returned by __reduce__ must be an iterator, not %s",
3711                      Py_TYPE(dictitems)->tp_name);
3712         return -1;
3713     }
3714 
3715     if (self->proto >= 2) {
3716         PyObject *name;
3717         _Py_IDENTIFIER(__name__);
3718 
3719         if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
3720             return -1;
3721         }
3722         if (name != NULL && PyUnicode_Check(name)) {
3723             _Py_IDENTIFIER(__newobj_ex__);
3724             use_newobj_ex = _PyUnicode_EqualToASCIIId(
3725                     name, &PyId___newobj_ex__);
3726             if (!use_newobj_ex) {
3727                 _Py_IDENTIFIER(__newobj__);
3728                 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
3729             }
3730         }
3731         Py_XDECREF(name);
3732     }
3733 
3734     if (use_newobj_ex) {
3735         PyObject *cls;
3736         PyObject *args;
3737         PyObject *kwargs;
3738 
3739         if (PyTuple_GET_SIZE(argtup) != 3) {
3740             PyErr_Format(st->PicklingError,
3741                          "length of the NEWOBJ_EX argument tuple must be "
3742                          "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
3743             return -1;
3744         }
3745 
3746         cls = PyTuple_GET_ITEM(argtup, 0);
3747         if (!PyType_Check(cls)) {
3748             PyErr_Format(st->PicklingError,
3749                          "first item from NEWOBJ_EX argument tuple must "
3750                          "be a class, not %.200s", Py_TYPE(cls)->tp_name);
3751             return -1;
3752         }
3753         args = PyTuple_GET_ITEM(argtup, 1);
3754         if (!PyTuple_Check(args)) {
3755             PyErr_Format(st->PicklingError,
3756                          "second item from NEWOBJ_EX argument tuple must "
3757                          "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
3758             return -1;
3759         }
3760         kwargs = PyTuple_GET_ITEM(argtup, 2);
3761         if (!PyDict_Check(kwargs)) {
3762             PyErr_Format(st->PicklingError,
3763                          "third item from NEWOBJ_EX argument tuple must "
3764                          "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
3765             return -1;
3766         }
3767 
3768         if (self->proto >= 4) {
3769             if (save(self, cls, 0) < 0 ||
3770                 save(self, args, 0) < 0 ||
3771                 save(self, kwargs, 0) < 0 ||
3772                 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
3773                 return -1;
3774             }
3775         }
3776         else {
3777             PyObject *newargs;
3778             PyObject *cls_new;
3779             Py_ssize_t i;
3780             _Py_IDENTIFIER(__new__);
3781 
3782             newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
3783             if (newargs == NULL)
3784                 return -1;
3785 
3786             cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
3787             if (cls_new == NULL) {
3788                 Py_DECREF(newargs);
3789                 return -1;
3790             }
3791             PyTuple_SET_ITEM(newargs, 0, cls_new);
3792             Py_INCREF(cls);
3793             PyTuple_SET_ITEM(newargs, 1, cls);
3794             for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
3795                 PyObject *item = PyTuple_GET_ITEM(args, i);
3796                 Py_INCREF(item);
3797                 PyTuple_SET_ITEM(newargs, i + 2, item);
3798             }
3799 
3800             callable = PyObject_Call(st->partial, newargs, kwargs);
3801             Py_DECREF(newargs);
3802             if (callable == NULL)
3803                 return -1;
3804 
3805             newargs = PyTuple_New(0);
3806             if (newargs == NULL) {
3807                 Py_DECREF(callable);
3808                 return -1;
3809             }
3810 
3811             if (save(self, callable, 0) < 0 ||
3812                 save(self, newargs, 0) < 0 ||
3813                 _Pickler_Write(self, &reduce_op, 1) < 0) {
3814                 Py_DECREF(newargs);
3815                 Py_DECREF(callable);
3816                 return -1;
3817             }
3818             Py_DECREF(newargs);
3819             Py_DECREF(callable);
3820         }
3821     }
3822     else if (use_newobj) {
3823         PyObject *cls;
3824         PyObject *newargtup;
3825         PyObject *obj_class;
3826         int p;
3827 
3828         /* Sanity checks. */
3829         if (PyTuple_GET_SIZE(argtup) < 1) {
3830             PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
3831             return -1;
3832         }
3833 
3834         cls = PyTuple_GET_ITEM(argtup, 0);
3835         if (!PyType_Check(cls)) {
3836             PyErr_SetString(st->PicklingError, "args[0] from "
3837                             "__newobj__ args is not a type");
3838             return -1;
3839         }
3840 
3841         if (obj != NULL) {
3842             obj_class = get_class(obj);
3843             if (obj_class == NULL) {
3844                 return -1;
3845             }
3846             p = obj_class != cls;
3847             Py_DECREF(obj_class);
3848             if (p) {
3849                 PyErr_SetString(st->PicklingError, "args[0] from "
3850                                 "__newobj__ args has the wrong class");
3851                 return -1;
3852             }
3853         }
3854         /* XXX: These calls save() are prone to infinite recursion. Imagine
3855            what happen if the value returned by the __reduce__() method of
3856            some extension type contains another object of the same type. Ouch!
3857 
3858            Here is a quick example, that I ran into, to illustrate what I
3859            mean:
3860 
3861              >>> import pickle, copyreg
3862              >>> copyreg.dispatch_table.pop(complex)
3863              >>> pickle.dumps(1+2j)
3864              Traceback (most recent call last):
3865                ...
3866              RecursionError: maximum recursion depth exceeded
3867 
3868            Removing the complex class from copyreg.dispatch_table made the
3869            __reduce_ex__() method emit another complex object:
3870 
3871              >>> (1+1j).__reduce_ex__(2)
3872              (<function __newobj__ at 0xb7b71c3c>,
3873                (<class 'complex'>, (1+1j)), None, None, None)
3874 
3875            Thus when save() was called on newargstup (the 2nd item) recursion
3876            ensued. Of course, the bug was in the complex class which had a
3877            broken __getnewargs__() that emitted another complex object. But,
3878            the point, here, is it is quite easy to end up with a broken reduce
3879            function. */
3880 
3881         /* Save the class and its __new__ arguments. */
3882         if (save(self, cls, 0) < 0)
3883             return -1;
3884 
3885         newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
3886         if (newargtup == NULL)
3887             return -1;
3888 
3889         p = save(self, newargtup, 0);
3890         Py_DECREF(newargtup);
3891         if (p < 0)
3892             return -1;
3893 
3894         /* Add NEWOBJ opcode. */
3895         if (_Pickler_Write(self, &newobj_op, 1) < 0)
3896             return -1;
3897     }
3898     else { /* Not using NEWOBJ. */
3899         if (save(self, callable, 0) < 0 ||
3900             save(self, argtup, 0) < 0 ||
3901             _Pickler_Write(self, &reduce_op, 1) < 0)
3902             return -1;
3903     }
3904 
3905     /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3906        the caller do not want to memoize the object. Not particularly useful,
3907        but that is to mimic the behavior save_reduce() in pickle.py when
3908        obj is None. */
3909     if (obj != NULL) {
3910         /* If the object is already in the memo, this means it is
3911            recursive. In this case, throw away everything we put on the
3912            stack, and fetch the object back from the memo. */
3913         if (PyMemoTable_Get(self->memo, obj)) {
3914             const char pop_op = POP;
3915 
3916             if (_Pickler_Write(self, &pop_op, 1) < 0)
3917                 return -1;
3918             if (memo_get(self, obj) < 0)
3919                 return -1;
3920 
3921             return 0;
3922         }
3923         else if (memo_put(self, obj) < 0)
3924             return -1;
3925     }
3926 
3927     if (listitems && batch_list(self, listitems) < 0)
3928         return -1;
3929 
3930     if (dictitems && batch_dict(self, dictitems) < 0)
3931         return -1;
3932 
3933     if (state) {
3934         if (save(self, state, 0) < 0 ||
3935             _Pickler_Write(self, &build_op, 1) < 0)
3936             return -1;
3937     }
3938 
3939     return 0;
3940 }
3941 
3942 static int
save(PicklerObject * self,PyObject * obj,int pers_save)3943 save(PicklerObject *self, PyObject *obj, int pers_save)
3944 {
3945     PyTypeObject *type;
3946     PyObject *reduce_func = NULL;
3947     PyObject *reduce_value = NULL;
3948     int status = 0;
3949 
3950     if (_Pickler_OpcodeBoundary(self) < 0)
3951         return -1;
3952 
3953     if (Py_EnterRecursiveCall(" while pickling an object"))
3954         return -1;
3955 
3956     /* The extra pers_save argument is necessary to avoid calling save_pers()
3957        on its returned object. */
3958     if (!pers_save && self->pers_func) {
3959         /* save_pers() returns:
3960             -1   to signal an error;
3961              0   if it did nothing successfully;
3962              1   if a persistent id was saved.
3963          */
3964         if ((status = save_pers(self, obj)) != 0)
3965             goto done;
3966     }
3967 
3968     type = Py_TYPE(obj);
3969 
3970     /* The old cPickle had an optimization that used switch-case statement
3971        dispatching on the first letter of the type name.  This has was removed
3972        since benchmarks shown that this optimization was actually slowing
3973        things down. */
3974 
3975     /* Atom types; these aren't memoized, so don't check the memo. */
3976 
3977     if (obj == Py_None) {
3978         status = save_none(self, obj);
3979         goto done;
3980     }
3981     else if (obj == Py_False || obj == Py_True) {
3982         status = save_bool(self, obj);
3983         goto done;
3984     }
3985     else if (type == &PyLong_Type) {
3986         status = save_long(self, obj);
3987         goto done;
3988     }
3989     else if (type == &PyFloat_Type) {
3990         status = save_float(self, obj);
3991         goto done;
3992     }
3993 
3994     /* Check the memo to see if it has the object. If so, generate
3995        a GET (or BINGET) opcode, instead of pickling the object
3996        once again. */
3997     if (PyMemoTable_Get(self->memo, obj)) {
3998         if (memo_get(self, obj) < 0)
3999             goto error;
4000         goto done;
4001     }
4002 
4003     if (type == &PyBytes_Type) {
4004         status = save_bytes(self, obj);
4005         goto done;
4006     }
4007     else if (type == &PyUnicode_Type) {
4008         status = save_unicode(self, obj);
4009         goto done;
4010     }
4011     else if (type == &PyDict_Type) {
4012         status = save_dict(self, obj);
4013         goto done;
4014     }
4015     else if (type == &PySet_Type) {
4016         status = save_set(self, obj);
4017         goto done;
4018     }
4019     else if (type == &PyFrozenSet_Type) {
4020         status = save_frozenset(self, obj);
4021         goto done;
4022     }
4023     else if (type == &PyList_Type) {
4024         status = save_list(self, obj);
4025         goto done;
4026     }
4027     else if (type == &PyTuple_Type) {
4028         status = save_tuple(self, obj);
4029         goto done;
4030     }
4031     else if (type == &PyType_Type) {
4032         status = save_type(self, obj);
4033         goto done;
4034     }
4035     else if (type == &PyFunction_Type) {
4036         status = save_global(self, obj, NULL);
4037         goto done;
4038     }
4039 
4040     /* XXX: This part needs some unit tests. */
4041 
4042     /* Get a reduction callable, and call it.  This may come from
4043      * self.dispatch_table, copyreg.dispatch_table, the object's
4044      * __reduce_ex__ method, or the object's __reduce__ method.
4045      */
4046     if (self->dispatch_table == NULL) {
4047         PickleState *st = _Pickle_GetGlobalState();
4048         reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4049                                               (PyObject *)type);
4050         if (reduce_func == NULL) {
4051             if (PyErr_Occurred()) {
4052                 goto error;
4053             }
4054         } else {
4055             /* PyDict_GetItemWithError() returns a borrowed reference.
4056                Increase the reference count to be consistent with
4057                PyObject_GetItem and _PyObject_GetAttrId used below. */
4058             Py_INCREF(reduce_func);
4059         }
4060     } else {
4061         reduce_func = PyObject_GetItem(self->dispatch_table,
4062                                        (PyObject *)type);
4063         if (reduce_func == NULL) {
4064             if (PyErr_ExceptionMatches(PyExc_KeyError))
4065                 PyErr_Clear();
4066             else
4067                 goto error;
4068         }
4069     }
4070     if (reduce_func != NULL) {
4071         Py_INCREF(obj);
4072         reduce_value = _Pickle_FastCall(reduce_func, obj);
4073     }
4074     else if (PyType_IsSubtype(type, &PyType_Type)) {
4075         status = save_global(self, obj, NULL);
4076         goto done;
4077     }
4078     else {
4079         _Py_IDENTIFIER(__reduce__);
4080         _Py_IDENTIFIER(__reduce_ex__);
4081 
4082 
4083         /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4084            automatically defined as __reduce__. While this is convenient, this
4085            make it impossible to know which method was actually called. Of
4086            course, this is not a big deal. But still, it would be nice to let
4087            the user know which method was called when something go
4088            wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4089            don't actually have to check for a __reduce__ method. */
4090 
4091         /* Check for a __reduce_ex__ method. */
4092         if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4093             goto error;
4094         }
4095         if (reduce_func != NULL) {
4096             PyObject *proto;
4097             proto = PyLong_FromLong(self->proto);
4098             if (proto != NULL) {
4099                 reduce_value = _Pickle_FastCall(reduce_func, proto);
4100             }
4101         }
4102         else {
4103             PickleState *st = _Pickle_GetGlobalState();
4104 
4105             /* Check for a __reduce__ method. */
4106             reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce__);
4107             if (reduce_func != NULL) {
4108                 reduce_value = _PyObject_CallNoArg(reduce_func);
4109             }
4110             else {
4111                 PyErr_Format(st->PicklingError,
4112                              "can't pickle '%.200s' object: %R",
4113                              type->tp_name, obj);
4114                 goto error;
4115             }
4116         }
4117     }
4118 
4119     if (reduce_value == NULL)
4120         goto error;
4121 
4122     if (PyUnicode_Check(reduce_value)) {
4123         status = save_global(self, obj, reduce_value);
4124         goto done;
4125     }
4126 
4127     if (!PyTuple_Check(reduce_value)) {
4128         PickleState *st = _Pickle_GetGlobalState();
4129         PyErr_SetString(st->PicklingError,
4130                         "__reduce__ must return a string or tuple");
4131         goto error;
4132     }
4133 
4134     status = save_reduce(self, reduce_value, obj);
4135 
4136     if (0) {
4137   error:
4138         status = -1;
4139     }
4140   done:
4141 
4142     Py_LeaveRecursiveCall();
4143     Py_XDECREF(reduce_func);
4144     Py_XDECREF(reduce_value);
4145 
4146     return status;
4147 }
4148 
4149 static int
dump(PicklerObject * self,PyObject * obj)4150 dump(PicklerObject *self, PyObject *obj)
4151 {
4152     const char stop_op = STOP;
4153 
4154     if (self->proto >= 2) {
4155         char header[2];
4156 
4157         header[0] = PROTO;
4158         assert(self->proto >= 0 && self->proto < 256);
4159         header[1] = (unsigned char)self->proto;
4160         if (_Pickler_Write(self, header, 2) < 0)
4161             return -1;
4162         if (self->proto >= 4)
4163             self->framing = 1;
4164     }
4165 
4166     if (save(self, obj, 0) < 0 ||
4167         _Pickler_Write(self, &stop_op, 1) < 0 ||
4168         _Pickler_CommitFrame(self) < 0)
4169         return -1;
4170     self->framing = 0;
4171     return 0;
4172 }
4173 
4174 /*[clinic input]
4175 
4176 _pickle.Pickler.clear_memo
4177 
4178 Clears the pickler's "memo".
4179 
4180 The memo is the data structure that remembers which objects the
4181 pickler has already seen, so that shared or recursive objects are
4182 pickled by reference and not by value.  This method is useful when
4183 re-using picklers.
4184 [clinic start generated code]*/
4185 
4186 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4187 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4188 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4189 {
4190     if (self->memo)
4191         PyMemoTable_Clear(self->memo);
4192 
4193     Py_RETURN_NONE;
4194 }
4195 
4196 /*[clinic input]
4197 
4198 _pickle.Pickler.dump
4199 
4200   obj: object
4201   /
4202 
4203 Write a pickled representation of the given object to the open file.
4204 [clinic start generated code]*/
4205 
4206 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4207 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4208 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4209 {
4210     /* Check whether the Pickler was initialized correctly (issue3664).
4211        Developers often forget to call __init__() in their subclasses, which
4212        would trigger a segfault without this check. */
4213     if (self->write == NULL) {
4214         PickleState *st = _Pickle_GetGlobalState();
4215         PyErr_Format(st->PicklingError,
4216                      "Pickler.__init__() was not called by %s.__init__()",
4217                      Py_TYPE(self)->tp_name);
4218         return NULL;
4219     }
4220 
4221     if (_Pickler_ClearBuffer(self) < 0)
4222         return NULL;
4223 
4224     if (dump(self, obj) < 0)
4225         return NULL;
4226 
4227     if (_Pickler_FlushToFile(self) < 0)
4228         return NULL;
4229 
4230     Py_RETURN_NONE;
4231 }
4232 
4233 /*[clinic input]
4234 
4235 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4236 
4237 Returns size in memory, in bytes.
4238 [clinic start generated code]*/
4239 
4240 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4241 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4242 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4243 {
4244     Py_ssize_t res, s;
4245 
4246     res = _PyObject_SIZE(Py_TYPE(self));
4247     if (self->memo != NULL) {
4248         res += sizeof(PyMemoTable);
4249         res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4250     }
4251     if (self->output_buffer != NULL) {
4252         s = _PySys_GetSizeOf(self->output_buffer);
4253         if (s == -1)
4254             return -1;
4255         res += s;
4256     }
4257     return res;
4258 }
4259 
4260 static struct PyMethodDef Pickler_methods[] = {
4261     _PICKLE_PICKLER_DUMP_METHODDEF
4262     _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4263     _PICKLE_PICKLER___SIZEOF___METHODDEF
4264     {NULL, NULL}                /* sentinel */
4265 };
4266 
4267 static void
Pickler_dealloc(PicklerObject * self)4268 Pickler_dealloc(PicklerObject *self)
4269 {
4270     PyObject_GC_UnTrack(self);
4271 
4272     Py_XDECREF(self->output_buffer);
4273     Py_XDECREF(self->write);
4274     Py_XDECREF(self->pers_func);
4275     Py_XDECREF(self->dispatch_table);
4276     Py_XDECREF(self->fast_memo);
4277 
4278     PyMemoTable_Del(self->memo);
4279 
4280     Py_TYPE(self)->tp_free((PyObject *)self);
4281 }
4282 
4283 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4284 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4285 {
4286     Py_VISIT(self->write);
4287     Py_VISIT(self->pers_func);
4288     Py_VISIT(self->dispatch_table);
4289     Py_VISIT(self->fast_memo);
4290     return 0;
4291 }
4292 
4293 static int
Pickler_clear(PicklerObject * self)4294 Pickler_clear(PicklerObject *self)
4295 {
4296     Py_CLEAR(self->output_buffer);
4297     Py_CLEAR(self->write);
4298     Py_CLEAR(self->pers_func);
4299     Py_CLEAR(self->dispatch_table);
4300     Py_CLEAR(self->fast_memo);
4301 
4302     if (self->memo != NULL) {
4303         PyMemoTable *memo = self->memo;
4304         self->memo = NULL;
4305         PyMemoTable_Del(memo);
4306     }
4307     return 0;
4308 }
4309 
4310 
4311 /*[clinic input]
4312 
4313 _pickle.Pickler.__init__
4314 
4315   file: object
4316   protocol: object = NULL
4317   fix_imports: bool = True
4318 
4319 This takes a binary file for writing a pickle data stream.
4320 
4321 The optional *protocol* argument tells the pickler to use the given
4322 protocol; supported protocols are 0, 1, 2, 3 and 4.  The default
4323 protocol is 3; a backward-incompatible protocol designed for Python 3.
4324 
4325 Specifying a negative protocol version selects the highest protocol
4326 version supported.  The higher the protocol used, the more recent the
4327 version of Python needed to read the pickle produced.
4328 
4329 The *file* argument must have a write() method that accepts a single
4330 bytes argument. It can thus be a file object opened for binary
4331 writing, an io.BytesIO instance, or any other custom object that meets
4332 this interface.
4333 
4334 If *fix_imports* is True and protocol is less than 3, pickle will try
4335 to map the new Python 3 names to the old module names used in Python
4336 2, so that the pickle data stream is readable with Python 2.
4337 [clinic start generated code]*/
4338 
4339 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports)4340 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4341                               PyObject *protocol, int fix_imports)
4342 /*[clinic end generated code: output=b5f31078dab17fb0 input=4faabdbc763c2389]*/
4343 {
4344     _Py_IDENTIFIER(persistent_id);
4345     _Py_IDENTIFIER(dispatch_table);
4346 
4347     /* In case of multiple __init__() calls, clear previous content. */
4348     if (self->write != NULL)
4349         (void)Pickler_clear(self);
4350 
4351     if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4352         return -1;
4353 
4354     if (_Pickler_SetOutputStream(self, file) < 0)
4355         return -1;
4356 
4357     /* memo and output_buffer may have already been created in _Pickler_New */
4358     if (self->memo == NULL) {
4359         self->memo = PyMemoTable_New();
4360         if (self->memo == NULL)
4361             return -1;
4362     }
4363     self->output_len = 0;
4364     if (self->output_buffer == NULL) {
4365         self->max_output_len = WRITE_BUF_SIZE;
4366         self->output_buffer = PyBytes_FromStringAndSize(NULL,
4367                                                         self->max_output_len);
4368         if (self->output_buffer == NULL)
4369             return -1;
4370     }
4371 
4372     self->fast = 0;
4373     self->fast_nesting = 0;
4374     self->fast_memo = NULL;
4375 
4376     if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4377                         &self->pers_func, &self->pers_func_self) < 0)
4378     {
4379         return -1;
4380     }
4381 
4382     if (_PyObject_LookupAttrId((PyObject *)self,
4383                                     &PyId_dispatch_table, &self->dispatch_table) < 0) {
4384         return -1;
4385     }
4386 
4387     return 0;
4388 }
4389 
4390 
4391 /* Define a proxy object for the Pickler's internal memo object. This is to
4392  * avoid breaking code like:
4393  *  pickler.memo.clear()
4394  * and
4395  *  pickler.memo = saved_memo
4396  * Is this a good idea? Not really, but we don't want to break code that uses
4397  * it. Note that we don't implement the entire mapping API here. This is
4398  * intentional, as these should be treated as black-box implementation details.
4399  */
4400 
4401 /*[clinic input]
4402 _pickle.PicklerMemoProxy.clear
4403 
4404 Remove all items from memo.
4405 [clinic start generated code]*/
4406 
4407 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4408 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4409 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4410 {
4411     if (self->pickler->memo)
4412         PyMemoTable_Clear(self->pickler->memo);
4413     Py_RETURN_NONE;
4414 }
4415 
4416 /*[clinic input]
4417 _pickle.PicklerMemoProxy.copy
4418 
4419 Copy the memo to a new object.
4420 [clinic start generated code]*/
4421 
4422 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4423 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4424 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4425 {
4426     PyMemoTable *memo;
4427     PyObject *new_memo = PyDict_New();
4428     if (new_memo == NULL)
4429         return NULL;
4430 
4431     memo = self->pickler->memo;
4432     for (size_t i = 0; i < memo->mt_allocated; ++i) {
4433         PyMemoEntry entry = memo->mt_table[i];
4434         if (entry.me_key != NULL) {
4435             int status;
4436             PyObject *key, *value;
4437 
4438             key = PyLong_FromVoidPtr(entry.me_key);
4439             value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4440 
4441             if (key == NULL || value == NULL) {
4442                 Py_XDECREF(key);
4443                 Py_XDECREF(value);
4444                 goto error;
4445             }
4446             status = PyDict_SetItem(new_memo, key, value);
4447             Py_DECREF(key);
4448             Py_DECREF(value);
4449             if (status < 0)
4450                 goto error;
4451         }
4452     }
4453     return new_memo;
4454 
4455   error:
4456     Py_XDECREF(new_memo);
4457     return NULL;
4458 }
4459 
4460 /*[clinic input]
4461 _pickle.PicklerMemoProxy.__reduce__
4462 
4463 Implement pickle support.
4464 [clinic start generated code]*/
4465 
4466 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4467 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4468 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4469 {
4470     PyObject *reduce_value, *dict_args;
4471     PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4472     if (contents == NULL)
4473         return NULL;
4474 
4475     reduce_value = PyTuple_New(2);
4476     if (reduce_value == NULL) {
4477         Py_DECREF(contents);
4478         return NULL;
4479     }
4480     dict_args = PyTuple_New(1);
4481     if (dict_args == NULL) {
4482         Py_DECREF(contents);
4483         Py_DECREF(reduce_value);
4484         return NULL;
4485     }
4486     PyTuple_SET_ITEM(dict_args, 0, contents);
4487     Py_INCREF((PyObject *)&PyDict_Type);
4488     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4489     PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4490     return reduce_value;
4491 }
4492 
4493 static PyMethodDef picklerproxy_methods[] = {
4494     _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4495     _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4496     _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4497     {NULL, NULL} /* sentinel */
4498 };
4499 
4500 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4501 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4502 {
4503     PyObject_GC_UnTrack(self);
4504     Py_XDECREF(self->pickler);
4505     PyObject_GC_Del((PyObject *)self);
4506 }
4507 
4508 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4509 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4510                           visitproc visit, void *arg)
4511 {
4512     Py_VISIT(self->pickler);
4513     return 0;
4514 }
4515 
4516 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4517 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4518 {
4519     Py_CLEAR(self->pickler);
4520     return 0;
4521 }
4522 
4523 static PyTypeObject PicklerMemoProxyType = {
4524     PyVarObject_HEAD_INIT(NULL, 0)
4525     "_pickle.PicklerMemoProxy",                 /*tp_name*/
4526     sizeof(PicklerMemoProxyObject),             /*tp_basicsize*/
4527     0,
4528     (destructor)PicklerMemoProxy_dealloc,       /* tp_dealloc */
4529     0,                                          /* tp_print */
4530     0,                                          /* tp_getattr */
4531     0,                                          /* tp_setattr */
4532     0,                                          /* tp_compare */
4533     0,                                          /* tp_repr */
4534     0,                                          /* tp_as_number */
4535     0,                                          /* tp_as_sequence */
4536     0,                                          /* tp_as_mapping */
4537     PyObject_HashNotImplemented,                /* tp_hash */
4538     0,                                          /* tp_call */
4539     0,                                          /* tp_str */
4540     PyObject_GenericGetAttr,                    /* tp_getattro */
4541     PyObject_GenericSetAttr,                    /* tp_setattro */
4542     0,                                          /* tp_as_buffer */
4543     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4544     0,                                          /* tp_doc */
4545     (traverseproc)PicklerMemoProxy_traverse,    /* tp_traverse */
4546     (inquiry)PicklerMemoProxy_clear,            /* tp_clear */
4547     0,                                          /* tp_richcompare */
4548     0,                                          /* tp_weaklistoffset */
4549     0,                                          /* tp_iter */
4550     0,                                          /* tp_iternext */
4551     picklerproxy_methods,                       /* tp_methods */
4552 };
4553 
4554 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4555 PicklerMemoProxy_New(PicklerObject *pickler)
4556 {
4557     PicklerMemoProxyObject *self;
4558 
4559     self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4560     if (self == NULL)
4561         return NULL;
4562     Py_INCREF(pickler);
4563     self->pickler = pickler;
4564     PyObject_GC_Track(self);
4565     return (PyObject *)self;
4566 }
4567 
4568 /*****************************************************************************/
4569 
4570 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4571 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4572 {
4573     return PicklerMemoProxy_New(self);
4574 }
4575 
4576 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4577 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4578 {
4579     PyMemoTable *new_memo = NULL;
4580 
4581     if (obj == NULL) {
4582         PyErr_SetString(PyExc_TypeError,
4583                         "attribute deletion is not supported");
4584         return -1;
4585     }
4586 
4587     if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4588         PicklerObject *pickler =
4589             ((PicklerMemoProxyObject *)obj)->pickler;
4590 
4591         new_memo = PyMemoTable_Copy(pickler->memo);
4592         if (new_memo == NULL)
4593             return -1;
4594     }
4595     else if (PyDict_Check(obj)) {
4596         Py_ssize_t i = 0;
4597         PyObject *key, *value;
4598 
4599         new_memo = PyMemoTable_New();
4600         if (new_memo == NULL)
4601             return -1;
4602 
4603         while (PyDict_Next(obj, &i, &key, &value)) {
4604             Py_ssize_t memo_id;
4605             PyObject *memo_obj;
4606 
4607             if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4608                 PyErr_SetString(PyExc_TypeError,
4609                                 "'memo' values must be 2-item tuples");
4610                 goto error;
4611             }
4612             memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
4613             if (memo_id == -1 && PyErr_Occurred())
4614                 goto error;
4615             memo_obj = PyTuple_GET_ITEM(value, 1);
4616             if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
4617                 goto error;
4618         }
4619     }
4620     else {
4621         PyErr_Format(PyExc_TypeError,
4622                      "'memo' attribute must be a PicklerMemoProxy object "
4623                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
4624         return -1;
4625     }
4626 
4627     PyMemoTable_Del(self->memo);
4628     self->memo = new_memo;
4629 
4630     return 0;
4631 
4632   error:
4633     if (new_memo)
4634         PyMemoTable_Del(new_memo);
4635     return -1;
4636 }
4637 
4638 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))4639 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
4640 {
4641     if (self->pers_func == NULL) {
4642         PyErr_SetString(PyExc_AttributeError, "persistent_id");
4643         return NULL;
4644     }
4645     return reconstruct_method(self->pers_func, self->pers_func_self);
4646 }
4647 
4648 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))4649 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
4650 {
4651     if (value == NULL) {
4652         PyErr_SetString(PyExc_TypeError,
4653                         "attribute deletion is not supported");
4654         return -1;
4655     }
4656     if (!PyCallable_Check(value)) {
4657         PyErr_SetString(PyExc_TypeError,
4658                         "persistent_id must be a callable taking one argument");
4659         return -1;
4660     }
4661 
4662     self->pers_func_self = NULL;
4663     Py_INCREF(value);
4664     Py_XSETREF(self->pers_func, value);
4665 
4666     return 0;
4667 }
4668 
4669 static PyMemberDef Pickler_members[] = {
4670     {"bin", T_INT, offsetof(PicklerObject, bin)},
4671     {"fast", T_INT, offsetof(PicklerObject, fast)},
4672     {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
4673     {NULL}
4674 };
4675 
4676 static PyGetSetDef Pickler_getsets[] = {
4677     {"memo",          (getter)Pickler_get_memo,
4678                       (setter)Pickler_set_memo},
4679     {"persistent_id", (getter)Pickler_get_persid,
4680                       (setter)Pickler_set_persid},
4681     {NULL}
4682 };
4683 
4684 static PyTypeObject Pickler_Type = {
4685     PyVarObject_HEAD_INIT(NULL, 0)
4686     "_pickle.Pickler"  ,                /*tp_name*/
4687     sizeof(PicklerObject),              /*tp_basicsize*/
4688     0,                                  /*tp_itemsize*/
4689     (destructor)Pickler_dealloc,        /*tp_dealloc*/
4690     0,                                  /*tp_print*/
4691     0,                                  /*tp_getattr*/
4692     0,                                  /*tp_setattr*/
4693     0,                                  /*tp_reserved*/
4694     0,                                  /*tp_repr*/
4695     0,                                  /*tp_as_number*/
4696     0,                                  /*tp_as_sequence*/
4697     0,                                  /*tp_as_mapping*/
4698     0,                                  /*tp_hash*/
4699     0,                                  /*tp_call*/
4700     0,                                  /*tp_str*/
4701     0,                                  /*tp_getattro*/
4702     0,                                  /*tp_setattro*/
4703     0,                                  /*tp_as_buffer*/
4704     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4705     _pickle_Pickler___init____doc__,    /*tp_doc*/
4706     (traverseproc)Pickler_traverse,     /*tp_traverse*/
4707     (inquiry)Pickler_clear,             /*tp_clear*/
4708     0,                                  /*tp_richcompare*/
4709     0,                                  /*tp_weaklistoffset*/
4710     0,                                  /*tp_iter*/
4711     0,                                  /*tp_iternext*/
4712     Pickler_methods,                    /*tp_methods*/
4713     Pickler_members,                    /*tp_members*/
4714     Pickler_getsets,                    /*tp_getset*/
4715     0,                                  /*tp_base*/
4716     0,                                  /*tp_dict*/
4717     0,                                  /*tp_descr_get*/
4718     0,                                  /*tp_descr_set*/
4719     0,                                  /*tp_dictoffset*/
4720     _pickle_Pickler___init__,           /*tp_init*/
4721     PyType_GenericAlloc,                /*tp_alloc*/
4722     PyType_GenericNew,                  /*tp_new*/
4723     PyObject_GC_Del,                    /*tp_free*/
4724     0,                                  /*tp_is_gc*/
4725 };
4726 
4727 /* Temporary helper for calling self.find_class().
4728 
4729    XXX: It would be nice to able to avoid Python function call overhead, by
4730    using directly the C version of find_class(), when find_class() is not
4731    overridden by a subclass. Although, this could become rather hackish. A
4732    simpler optimization would be to call the C function when self is not a
4733    subclass instance. */
4734 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)4735 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
4736 {
4737     _Py_IDENTIFIER(find_class);
4738 
4739     return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
4740                                          module_name, global_name, NULL);
4741 }
4742 
4743 static Py_ssize_t
marker(UnpicklerObject * self)4744 marker(UnpicklerObject *self)
4745 {
4746     Py_ssize_t mark;
4747 
4748     if (self->num_marks < 1) {
4749         PickleState *st = _Pickle_GetGlobalState();
4750         PyErr_SetString(st->UnpicklingError, "could not find MARK");
4751         return -1;
4752     }
4753 
4754     mark = self->marks[--self->num_marks];
4755     self->stack->mark_set = self->num_marks != 0;
4756     self->stack->fence = self->num_marks ?
4757             self->marks[self->num_marks - 1] : 0;
4758     return mark;
4759 }
4760 
4761 static int
load_none(UnpicklerObject * self)4762 load_none(UnpicklerObject *self)
4763 {
4764     PDATA_APPEND(self->stack, Py_None, -1);
4765     return 0;
4766 }
4767 
4768 static int
load_int(UnpicklerObject * self)4769 load_int(UnpicklerObject *self)
4770 {
4771     PyObject *value;
4772     char *endptr, *s;
4773     Py_ssize_t len;
4774     long x;
4775 
4776     if ((len = _Unpickler_Readline(self, &s)) < 0)
4777         return -1;
4778     if (len < 2)
4779         return bad_readline();
4780 
4781     errno = 0;
4782     /* XXX: Should the base argument of strtol() be explicitly set to 10?
4783        XXX(avassalotti): Should this uses PyOS_strtol()? */
4784     x = strtol(s, &endptr, 0);
4785 
4786     if (errno || (*endptr != '\n' && *endptr != '\0')) {
4787         /* Hm, maybe we've got something long.  Let's try reading
4788          * it as a Python int object. */
4789         errno = 0;
4790         /* XXX: Same thing about the base here. */
4791         value = PyLong_FromString(s, NULL, 0);
4792         if (value == NULL) {
4793             PyErr_SetString(PyExc_ValueError,
4794                             "could not convert string to int");
4795             return -1;
4796         }
4797     }
4798     else {
4799         if (len == 3 && (x == 0 || x == 1)) {
4800             if ((value = PyBool_FromLong(x)) == NULL)
4801                 return -1;
4802         }
4803         else {
4804             if ((value = PyLong_FromLong(x)) == NULL)
4805                 return -1;
4806         }
4807     }
4808 
4809     PDATA_PUSH(self->stack, value, -1);
4810     return 0;
4811 }
4812 
4813 static int
load_bool(UnpicklerObject * self,PyObject * boolean)4814 load_bool(UnpicklerObject *self, PyObject *boolean)
4815 {
4816     assert(boolean == Py_True || boolean == Py_False);
4817     PDATA_APPEND(self->stack, boolean, -1);
4818     return 0;
4819 }
4820 
4821 /* s contains x bytes of an unsigned little-endian integer.  Return its value
4822  * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
4823  */
4824 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)4825 calc_binsize(char *bytes, int nbytes)
4826 {
4827     unsigned char *s = (unsigned char *)bytes;
4828     int i;
4829     size_t x = 0;
4830 
4831     if (nbytes > (int)sizeof(size_t)) {
4832         /* Check for integer overflow.  BINBYTES8 and BINUNICODE8 opcodes
4833          * have 64-bit size that can't be represented on 32-bit platform.
4834          */
4835         for (i = (int)sizeof(size_t); i < nbytes; i++) {
4836             if (s[i])
4837                 return -1;
4838         }
4839         nbytes = (int)sizeof(size_t);
4840     }
4841     for (i = 0; i < nbytes; i++) {
4842         x |= (size_t) s[i] << (8 * i);
4843     }
4844 
4845     if (x > PY_SSIZE_T_MAX)
4846         return -1;
4847     else
4848         return (Py_ssize_t) x;
4849 }
4850 
4851 /* s contains x bytes of a little-endian integer.  Return its value as a
4852  * C int.  Obscure:  when x is 1 or 2, this is an unsigned little-endian
4853  * int, but when x is 4 it's a signed one.  This is a historical source
4854  * of x-platform bugs.
4855  */
4856 static long
calc_binint(char * bytes,int nbytes)4857 calc_binint(char *bytes, int nbytes)
4858 {
4859     unsigned char *s = (unsigned char *)bytes;
4860     Py_ssize_t i;
4861     long x = 0;
4862 
4863     for (i = 0; i < nbytes; i++) {
4864         x |= (long)s[i] << (8 * i);
4865     }
4866 
4867     /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4868      * is signed, so on a box with longs bigger than 4 bytes we need
4869      * to extend a BININT's sign bit to the full width.
4870      */
4871     if (SIZEOF_LONG > 4 && nbytes == 4) {
4872         x |= -(x & (1L << 31));
4873     }
4874 
4875     return x;
4876 }
4877 
4878 static int
load_binintx(UnpicklerObject * self,char * s,int size)4879 load_binintx(UnpicklerObject *self, char *s, int size)
4880 {
4881     PyObject *value;
4882     long x;
4883 
4884     x = calc_binint(s, size);
4885 
4886     if ((value = PyLong_FromLong(x)) == NULL)
4887         return -1;
4888 
4889     PDATA_PUSH(self->stack, value, -1);
4890     return 0;
4891 }
4892 
4893 static int
load_binint(UnpicklerObject * self)4894 load_binint(UnpicklerObject *self)
4895 {
4896     char *s;
4897 
4898     if (_Unpickler_Read(self, &s, 4) < 0)
4899         return -1;
4900 
4901     return load_binintx(self, s, 4);
4902 }
4903 
4904 static int
load_binint1(UnpicklerObject * self)4905 load_binint1(UnpicklerObject *self)
4906 {
4907     char *s;
4908 
4909     if (_Unpickler_Read(self, &s, 1) < 0)
4910         return -1;
4911 
4912     return load_binintx(self, s, 1);
4913 }
4914 
4915 static int
load_binint2(UnpicklerObject * self)4916 load_binint2(UnpicklerObject *self)
4917 {
4918     char *s;
4919 
4920     if (_Unpickler_Read(self, &s, 2) < 0)
4921         return -1;
4922 
4923     return load_binintx(self, s, 2);
4924 }
4925 
4926 static int
load_long(UnpicklerObject * self)4927 load_long(UnpicklerObject *self)
4928 {
4929     PyObject *value;
4930     char *s = NULL;
4931     Py_ssize_t len;
4932 
4933     if ((len = _Unpickler_Readline(self, &s)) < 0)
4934         return -1;
4935     if (len < 2)
4936         return bad_readline();
4937 
4938     /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4939        the 'L' before calling PyLong_FromString.  In order to maintain
4940        compatibility with Python 3.0.0, we don't actually *require*
4941        the 'L' to be present. */
4942     if (s[len-2] == 'L')
4943         s[len-2] = '\0';
4944     /* XXX: Should the base argument explicitly set to 10? */
4945     value = PyLong_FromString(s, NULL, 0);
4946     if (value == NULL)
4947         return -1;
4948 
4949     PDATA_PUSH(self->stack, value, -1);
4950     return 0;
4951 }
4952 
4953 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
4954  * data following.
4955  */
4956 static int
load_counted_long(UnpicklerObject * self,int size)4957 load_counted_long(UnpicklerObject *self, int size)
4958 {
4959     PyObject *value;
4960     char *nbytes;
4961     char *pdata;
4962 
4963     assert(size == 1 || size == 4);
4964     if (_Unpickler_Read(self, &nbytes, size) < 0)
4965         return -1;
4966 
4967     size = calc_binint(nbytes, size);
4968     if (size < 0) {
4969         PickleState *st = _Pickle_GetGlobalState();
4970         /* Corrupt or hostile pickle -- we never write one like this */
4971         PyErr_SetString(st->UnpicklingError,
4972                         "LONG pickle has negative byte count");
4973         return -1;
4974     }
4975 
4976     if (size == 0)
4977         value = PyLong_FromLong(0L);
4978     else {
4979         /* Read the raw little-endian bytes and convert. */
4980         if (_Unpickler_Read(self, &pdata, size) < 0)
4981             return -1;
4982         value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4983                                       1 /* little endian */ , 1 /* signed */ );
4984     }
4985     if (value == NULL)
4986         return -1;
4987     PDATA_PUSH(self->stack, value, -1);
4988     return 0;
4989 }
4990 
4991 static int
load_float(UnpicklerObject * self)4992 load_float(UnpicklerObject *self)
4993 {
4994     PyObject *value;
4995     char *endptr, *s;
4996     Py_ssize_t len;
4997     double d;
4998 
4999     if ((len = _Unpickler_Readline(self, &s)) < 0)
5000         return -1;
5001     if (len < 2)
5002         return bad_readline();
5003 
5004     errno = 0;
5005     d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5006     if (d == -1.0 && PyErr_Occurred())
5007         return -1;
5008     if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5009         PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5010         return -1;
5011     }
5012     value = PyFloat_FromDouble(d);
5013     if (value == NULL)
5014         return -1;
5015 
5016     PDATA_PUSH(self->stack, value, -1);
5017     return 0;
5018 }
5019 
5020 static int
load_binfloat(UnpicklerObject * self)5021 load_binfloat(UnpicklerObject *self)
5022 {
5023     PyObject *value;
5024     double x;
5025     char *s;
5026 
5027     if (_Unpickler_Read(self, &s, 8) < 0)
5028         return -1;
5029 
5030     x = _PyFloat_Unpack8((unsigned char *)s, 0);
5031     if (x == -1.0 && PyErr_Occurred())
5032         return -1;
5033 
5034     if ((value = PyFloat_FromDouble(x)) == NULL)
5035         return -1;
5036 
5037     PDATA_PUSH(self->stack, value, -1);
5038     return 0;
5039 }
5040 
5041 static int
load_string(UnpicklerObject * self)5042 load_string(UnpicklerObject *self)
5043 {
5044     PyObject *bytes;
5045     PyObject *obj;
5046     Py_ssize_t len;
5047     char *s, *p;
5048 
5049     if ((len = _Unpickler_Readline(self, &s)) < 0)
5050         return -1;
5051     /* Strip the newline */
5052     len--;
5053     /* Strip outermost quotes */
5054     if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5055         p = s + 1;
5056         len -= 2;
5057     }
5058     else {
5059         PickleState *st = _Pickle_GetGlobalState();
5060         PyErr_SetString(st->UnpicklingError,
5061                         "the STRING opcode argument must be quoted");
5062         return -1;
5063     }
5064     assert(len >= 0);
5065 
5066     /* Use the PyBytes API to decode the string, since that is what is used
5067        to encode, and then coerce the result to Unicode. */
5068     bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5069     if (bytes == NULL)
5070         return -1;
5071 
5072     /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5073        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5074     if (strcmp(self->encoding, "bytes") == 0) {
5075         obj = bytes;
5076     }
5077     else {
5078         obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5079         Py_DECREF(bytes);
5080         if (obj == NULL) {
5081             return -1;
5082         }
5083     }
5084 
5085     PDATA_PUSH(self->stack, obj, -1);
5086     return 0;
5087 }
5088 
5089 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5090 load_counted_binstring(UnpicklerObject *self, int nbytes)
5091 {
5092     PyObject *obj;
5093     Py_ssize_t size;
5094     char *s;
5095 
5096     if (_Unpickler_Read(self, &s, nbytes) < 0)
5097         return -1;
5098 
5099     size = calc_binsize(s, nbytes);
5100     if (size < 0) {
5101         PickleState *st = _Pickle_GetGlobalState();
5102         PyErr_Format(st->UnpicklingError,
5103                      "BINSTRING exceeds system's maximum size of %zd bytes",
5104                      PY_SSIZE_T_MAX);
5105         return -1;
5106     }
5107 
5108     if (_Unpickler_Read(self, &s, size) < 0)
5109         return -1;
5110 
5111     /* Convert Python 2.x strings to bytes if the *encoding* given to the
5112        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5113     if (strcmp(self->encoding, "bytes") == 0) {
5114         obj = PyBytes_FromStringAndSize(s, size);
5115     }
5116     else {
5117         obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5118     }
5119     if (obj == NULL) {
5120         return -1;
5121     }
5122 
5123     PDATA_PUSH(self->stack, obj, -1);
5124     return 0;
5125 }
5126 
5127 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5128 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5129 {
5130     PyObject *bytes;
5131     Py_ssize_t size;
5132     char *s;
5133 
5134     if (_Unpickler_Read(self, &s, nbytes) < 0)
5135         return -1;
5136 
5137     size = calc_binsize(s, nbytes);
5138     if (size < 0) {
5139         PyErr_Format(PyExc_OverflowError,
5140                      "BINBYTES exceeds system's maximum size of %zd bytes",
5141                      PY_SSIZE_T_MAX);
5142         return -1;
5143     }
5144 
5145     if (_Unpickler_Read(self, &s, size) < 0)
5146         return -1;
5147 
5148     bytes = PyBytes_FromStringAndSize(s, size);
5149     if (bytes == NULL)
5150         return -1;
5151 
5152     PDATA_PUSH(self->stack, bytes, -1);
5153     return 0;
5154 }
5155 
5156 static int
load_unicode(UnpicklerObject * self)5157 load_unicode(UnpicklerObject *self)
5158 {
5159     PyObject *str;
5160     Py_ssize_t len;
5161     char *s = NULL;
5162 
5163     if ((len = _Unpickler_Readline(self, &s)) < 0)
5164         return -1;
5165     if (len < 1)
5166         return bad_readline();
5167 
5168     str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5169     if (str == NULL)
5170         return -1;
5171 
5172     PDATA_PUSH(self->stack, str, -1);
5173     return 0;
5174 }
5175 
5176 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5177 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5178 {
5179     PyObject *str;
5180     Py_ssize_t size;
5181     char *s;
5182 
5183     if (_Unpickler_Read(self, &s, nbytes) < 0)
5184         return -1;
5185 
5186     size = calc_binsize(s, nbytes);
5187     if (size < 0) {
5188         PyErr_Format(PyExc_OverflowError,
5189                      "BINUNICODE exceeds system's maximum size of %zd bytes",
5190                      PY_SSIZE_T_MAX);
5191         return -1;
5192     }
5193 
5194     if (_Unpickler_Read(self, &s, size) < 0)
5195         return -1;
5196 
5197     str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5198     if (str == NULL)
5199         return -1;
5200 
5201     PDATA_PUSH(self->stack, str, -1);
5202     return 0;
5203 }
5204 
5205 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5206 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5207 {
5208     PyObject *tuple;
5209 
5210     if (Py_SIZE(self->stack) < len)
5211         return Pdata_stack_underflow(self->stack);
5212 
5213     tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5214     if (tuple == NULL)
5215         return -1;
5216     PDATA_PUSH(self->stack, tuple, -1);
5217     return 0;
5218 }
5219 
5220 static int
load_tuple(UnpicklerObject * self)5221 load_tuple(UnpicklerObject *self)
5222 {
5223     Py_ssize_t i;
5224 
5225     if ((i = marker(self)) < 0)
5226         return -1;
5227 
5228     return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5229 }
5230 
5231 static int
load_empty_list(UnpicklerObject * self)5232 load_empty_list(UnpicklerObject *self)
5233 {
5234     PyObject *list;
5235 
5236     if ((list = PyList_New(0)) == NULL)
5237         return -1;
5238     PDATA_PUSH(self->stack, list, -1);
5239     return 0;
5240 }
5241 
5242 static int
load_empty_dict(UnpicklerObject * self)5243 load_empty_dict(UnpicklerObject *self)
5244 {
5245     PyObject *dict;
5246 
5247     if ((dict = PyDict_New()) == NULL)
5248         return -1;
5249     PDATA_PUSH(self->stack, dict, -1);
5250     return 0;
5251 }
5252 
5253 static int
load_empty_set(UnpicklerObject * self)5254 load_empty_set(UnpicklerObject *self)
5255 {
5256     PyObject *set;
5257 
5258     if ((set = PySet_New(NULL)) == NULL)
5259         return -1;
5260     PDATA_PUSH(self->stack, set, -1);
5261     return 0;
5262 }
5263 
5264 static int
load_list(UnpicklerObject * self)5265 load_list(UnpicklerObject *self)
5266 {
5267     PyObject *list;
5268     Py_ssize_t i;
5269 
5270     if ((i = marker(self)) < 0)
5271         return -1;
5272 
5273     list = Pdata_poplist(self->stack, i);
5274     if (list == NULL)
5275         return -1;
5276     PDATA_PUSH(self->stack, list, -1);
5277     return 0;
5278 }
5279 
5280 static int
load_dict(UnpicklerObject * self)5281 load_dict(UnpicklerObject *self)
5282 {
5283     PyObject *dict, *key, *value;
5284     Py_ssize_t i, j, k;
5285 
5286     if ((i = marker(self)) < 0)
5287         return -1;
5288     j = Py_SIZE(self->stack);
5289 
5290     if ((dict = PyDict_New()) == NULL)
5291         return -1;
5292 
5293     if ((j - i) % 2 != 0) {
5294         PickleState *st = _Pickle_GetGlobalState();
5295         PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5296         Py_DECREF(dict);
5297         return -1;
5298     }
5299 
5300     for (k = i + 1; k < j; k += 2) {
5301         key = self->stack->data[k - 1];
5302         value = self->stack->data[k];
5303         if (PyDict_SetItem(dict, key, value) < 0) {
5304             Py_DECREF(dict);
5305             return -1;
5306         }
5307     }
5308     Pdata_clear(self->stack, i);
5309     PDATA_PUSH(self->stack, dict, -1);
5310     return 0;
5311 }
5312 
5313 static int
load_frozenset(UnpicklerObject * self)5314 load_frozenset(UnpicklerObject *self)
5315 {
5316     PyObject *items;
5317     PyObject *frozenset;
5318     Py_ssize_t i;
5319 
5320     if ((i = marker(self)) < 0)
5321         return -1;
5322 
5323     items = Pdata_poptuple(self->stack, i);
5324     if (items == NULL)
5325         return -1;
5326 
5327     frozenset = PyFrozenSet_New(items);
5328     Py_DECREF(items);
5329     if (frozenset == NULL)
5330         return -1;
5331 
5332     PDATA_PUSH(self->stack, frozenset, -1);
5333     return 0;
5334 }
5335 
5336 static PyObject *
instantiate(PyObject * cls,PyObject * args)5337 instantiate(PyObject *cls, PyObject *args)
5338 {
5339     /* Caller must assure args are a tuple.  Normally, args come from
5340        Pdata_poptuple which packs objects from the top of the stack
5341        into a newly created tuple. */
5342     assert(PyTuple_Check(args));
5343     if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5344         _Py_IDENTIFIER(__getinitargs__);
5345         _Py_IDENTIFIER(__new__);
5346         PyObject *func;
5347         if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5348             return NULL;
5349         }
5350         if (func == NULL) {
5351             return _PyObject_CallMethodIdObjArgs(cls, &PyId___new__, cls, NULL);
5352         }
5353         Py_DECREF(func);
5354     }
5355     return PyObject_CallObject(cls, args);
5356 }
5357 
5358 static int
load_obj(UnpicklerObject * self)5359 load_obj(UnpicklerObject *self)
5360 {
5361     PyObject *cls, *args, *obj = NULL;
5362     Py_ssize_t i;
5363 
5364     if ((i = marker(self)) < 0)
5365         return -1;
5366 
5367     if (Py_SIZE(self->stack) - i < 1)
5368         return Pdata_stack_underflow(self->stack);
5369 
5370     args = Pdata_poptuple(self->stack, i + 1);
5371     if (args == NULL)
5372         return -1;
5373 
5374     PDATA_POP(self->stack, cls);
5375     if (cls) {
5376         obj = instantiate(cls, args);
5377         Py_DECREF(cls);
5378     }
5379     Py_DECREF(args);
5380     if (obj == NULL)
5381         return -1;
5382 
5383     PDATA_PUSH(self->stack, obj, -1);
5384     return 0;
5385 }
5386 
5387 static int
load_inst(UnpicklerObject * self)5388 load_inst(UnpicklerObject *self)
5389 {
5390     PyObject *cls = NULL;
5391     PyObject *args = NULL;
5392     PyObject *obj = NULL;
5393     PyObject *module_name;
5394     PyObject *class_name;
5395     Py_ssize_t len;
5396     Py_ssize_t i;
5397     char *s;
5398 
5399     if ((i = marker(self)) < 0)
5400         return -1;
5401     if ((len = _Unpickler_Readline(self, &s)) < 0)
5402         return -1;
5403     if (len < 2)
5404         return bad_readline();
5405 
5406     /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5407        identifiers are permitted in Python 3.0, since the INST opcode is only
5408        supported by older protocols on Python 2.x. */
5409     module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5410     if (module_name == NULL)
5411         return -1;
5412 
5413     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5414         if (len < 2) {
5415             Py_DECREF(module_name);
5416             return bad_readline();
5417         }
5418         class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5419         if (class_name != NULL) {
5420             cls = find_class(self, module_name, class_name);
5421             Py_DECREF(class_name);
5422         }
5423     }
5424     Py_DECREF(module_name);
5425 
5426     if (cls == NULL)
5427         return -1;
5428 
5429     if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5430         obj = instantiate(cls, args);
5431         Py_DECREF(args);
5432     }
5433     Py_DECREF(cls);
5434 
5435     if (obj == NULL)
5436         return -1;
5437 
5438     PDATA_PUSH(self->stack, obj, -1);
5439     return 0;
5440 }
5441 
5442 static int
load_newobj(UnpicklerObject * self)5443 load_newobj(UnpicklerObject *self)
5444 {
5445     PyObject *args = NULL;
5446     PyObject *clsraw = NULL;
5447     PyTypeObject *cls;          /* clsraw cast to its true type */
5448     PyObject *obj;
5449     PickleState *st = _Pickle_GetGlobalState();
5450 
5451     /* Stack is ... cls argtuple, and we want to call
5452      * cls.__new__(cls, *argtuple).
5453      */
5454     PDATA_POP(self->stack, args);
5455     if (args == NULL)
5456         goto error;
5457     if (!PyTuple_Check(args)) {
5458         PyErr_SetString(st->UnpicklingError,
5459                         "NEWOBJ expected an arg " "tuple.");
5460         goto error;
5461     }
5462 
5463     PDATA_POP(self->stack, clsraw);
5464     cls = (PyTypeObject *)clsraw;
5465     if (cls == NULL)
5466         goto error;
5467     if (!PyType_Check(cls)) {
5468         PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5469                         "isn't a type object");
5470         goto error;
5471     }
5472     if (cls->tp_new == NULL) {
5473         PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5474                         "has NULL tp_new");
5475         goto error;
5476     }
5477 
5478     /* Call __new__. */
5479     obj = cls->tp_new(cls, args, NULL);
5480     if (obj == NULL)
5481         goto error;
5482 
5483     Py_DECREF(args);
5484     Py_DECREF(clsraw);
5485     PDATA_PUSH(self->stack, obj, -1);
5486     return 0;
5487 
5488   error:
5489     Py_XDECREF(args);
5490     Py_XDECREF(clsraw);
5491     return -1;
5492 }
5493 
5494 static int
load_newobj_ex(UnpicklerObject * self)5495 load_newobj_ex(UnpicklerObject *self)
5496 {
5497     PyObject *cls, *args, *kwargs;
5498     PyObject *obj;
5499     PickleState *st = _Pickle_GetGlobalState();
5500 
5501     PDATA_POP(self->stack, kwargs);
5502     if (kwargs == NULL) {
5503         return -1;
5504     }
5505     PDATA_POP(self->stack, args);
5506     if (args == NULL) {
5507         Py_DECREF(kwargs);
5508         return -1;
5509     }
5510     PDATA_POP(self->stack, cls);
5511     if (cls == NULL) {
5512         Py_DECREF(kwargs);
5513         Py_DECREF(args);
5514         return -1;
5515     }
5516 
5517     if (!PyType_Check(cls)) {
5518         PyErr_Format(st->UnpicklingError,
5519                      "NEWOBJ_EX class argument must be a type, not %.200s",
5520                      Py_TYPE(cls)->tp_name);
5521         goto error;
5522     }
5523 
5524     if (((PyTypeObject *)cls)->tp_new == NULL) {
5525         PyErr_SetString(st->UnpicklingError,
5526                         "NEWOBJ_EX class argument doesn't have __new__");
5527         goto error;
5528     }
5529     if (!PyTuple_Check(args)) {
5530         PyErr_Format(st->UnpicklingError,
5531                      "NEWOBJ_EX args argument must be a tuple, not %.200s",
5532                      Py_TYPE(args)->tp_name);
5533         goto error;
5534     }
5535     if (!PyDict_Check(kwargs)) {
5536         PyErr_Format(st->UnpicklingError,
5537                      "NEWOBJ_EX kwargs argument must be a dict, not %.200s",
5538                      Py_TYPE(kwargs)->tp_name);
5539         goto error;
5540     }
5541 
5542     obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5543     Py_DECREF(kwargs);
5544     Py_DECREF(args);
5545     Py_DECREF(cls);
5546     if (obj == NULL) {
5547         return -1;
5548     }
5549     PDATA_PUSH(self->stack, obj, -1);
5550     return 0;
5551 
5552 error:
5553     Py_DECREF(kwargs);
5554     Py_DECREF(args);
5555     Py_DECREF(cls);
5556     return -1;
5557 }
5558 
5559 static int
load_global(UnpicklerObject * self)5560 load_global(UnpicklerObject *self)
5561 {
5562     PyObject *global = NULL;
5563     PyObject *module_name;
5564     PyObject *global_name;
5565     Py_ssize_t len;
5566     char *s;
5567 
5568     if ((len = _Unpickler_Readline(self, &s)) < 0)
5569         return -1;
5570     if (len < 2)
5571         return bad_readline();
5572     module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5573     if (!module_name)
5574         return -1;
5575 
5576     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5577         if (len < 2) {
5578             Py_DECREF(module_name);
5579             return bad_readline();
5580         }
5581         global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5582         if (global_name) {
5583             global = find_class(self, module_name, global_name);
5584             Py_DECREF(global_name);
5585         }
5586     }
5587     Py_DECREF(module_name);
5588 
5589     if (global == NULL)
5590         return -1;
5591     PDATA_PUSH(self->stack, global, -1);
5592     return 0;
5593 }
5594 
5595 static int
load_stack_global(UnpicklerObject * self)5596 load_stack_global(UnpicklerObject *self)
5597 {
5598     PyObject *global;
5599     PyObject *module_name;
5600     PyObject *global_name;
5601 
5602     PDATA_POP(self->stack, global_name);
5603     PDATA_POP(self->stack, module_name);
5604     if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
5605         global_name == NULL || !PyUnicode_CheckExact(global_name)) {
5606         PickleState *st = _Pickle_GetGlobalState();
5607         PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
5608         Py_XDECREF(global_name);
5609         Py_XDECREF(module_name);
5610         return -1;
5611     }
5612     global = find_class(self, module_name, global_name);
5613     Py_DECREF(global_name);
5614     Py_DECREF(module_name);
5615     if (global == NULL)
5616         return -1;
5617     PDATA_PUSH(self->stack, global, -1);
5618     return 0;
5619 }
5620 
5621 static int
load_persid(UnpicklerObject * self)5622 load_persid(UnpicklerObject *self)
5623 {
5624     PyObject *pid, *obj;
5625     Py_ssize_t len;
5626     char *s;
5627 
5628     if (self->pers_func) {
5629         if ((len = _Unpickler_Readline(self, &s)) < 0)
5630             return -1;
5631         if (len < 1)
5632             return bad_readline();
5633 
5634         pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
5635         if (pid == NULL) {
5636             if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
5637                 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
5638                                 "persistent IDs in protocol 0 must be "
5639                                 "ASCII strings");
5640             }
5641             return -1;
5642         }
5643 
5644         obj = call_method(self->pers_func, self->pers_func_self, pid);
5645         Py_DECREF(pid);
5646         if (obj == NULL)
5647             return -1;
5648 
5649         PDATA_PUSH(self->stack, obj, -1);
5650         return 0;
5651     }
5652     else {
5653         PickleState *st = _Pickle_GetGlobalState();
5654         PyErr_SetString(st->UnpicklingError,
5655                         "A load persistent id instruction was encountered,\n"
5656                         "but no persistent_load function was specified.");
5657         return -1;
5658     }
5659 }
5660 
5661 static int
load_binpersid(UnpicklerObject * self)5662 load_binpersid(UnpicklerObject *self)
5663 {
5664     PyObject *pid, *obj;
5665 
5666     if (self->pers_func) {
5667         PDATA_POP(self->stack, pid);
5668         if (pid == NULL)
5669             return -1;
5670 
5671         obj = call_method(self->pers_func, self->pers_func_self, pid);
5672         Py_DECREF(pid);
5673         if (obj == NULL)
5674             return -1;
5675 
5676         PDATA_PUSH(self->stack, obj, -1);
5677         return 0;
5678     }
5679     else {
5680         PickleState *st = _Pickle_GetGlobalState();
5681         PyErr_SetString(st->UnpicklingError,
5682                         "A load persistent id instruction was encountered,\n"
5683                         "but no persistent_load function was specified.");
5684         return -1;
5685     }
5686 }
5687 
5688 static int
load_pop(UnpicklerObject * self)5689 load_pop(UnpicklerObject *self)
5690 {
5691     Py_ssize_t len = Py_SIZE(self->stack);
5692 
5693     /* Note that we split the (pickle.py) stack into two stacks,
5694      * an object stack and a mark stack. We have to be clever and
5695      * pop the right one. We do this by looking at the top of the
5696      * mark stack first, and only signalling a stack underflow if
5697      * the object stack is empty and the mark stack doesn't match
5698      * our expectations.
5699      */
5700     if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
5701         self->num_marks--;
5702         self->stack->mark_set = self->num_marks != 0;
5703         self->stack->fence = self->num_marks ?
5704                 self->marks[self->num_marks - 1] : 0;
5705     } else if (len <= self->stack->fence)
5706         return Pdata_stack_underflow(self->stack);
5707     else {
5708         len--;
5709         Py_DECREF(self->stack->data[len]);
5710         Py_SIZE(self->stack) = len;
5711     }
5712     return 0;
5713 }
5714 
5715 static int
load_pop_mark(UnpicklerObject * self)5716 load_pop_mark(UnpicklerObject *self)
5717 {
5718     Py_ssize_t i;
5719 
5720     if ((i = marker(self)) < 0)
5721         return -1;
5722 
5723     Pdata_clear(self->stack, i);
5724 
5725     return 0;
5726 }
5727 
5728 static int
load_dup(UnpicklerObject * self)5729 load_dup(UnpicklerObject *self)
5730 {
5731     PyObject *last;
5732     Py_ssize_t len = Py_SIZE(self->stack);
5733 
5734     if (len <= self->stack->fence)
5735         return Pdata_stack_underflow(self->stack);
5736     last = self->stack->data[len - 1];
5737     PDATA_APPEND(self->stack, last, -1);
5738     return 0;
5739 }
5740 
5741 static int
load_get(UnpicklerObject * self)5742 load_get(UnpicklerObject *self)
5743 {
5744     PyObject *key, *value;
5745     Py_ssize_t idx;
5746     Py_ssize_t len;
5747     char *s;
5748 
5749     if ((len = _Unpickler_Readline(self, &s)) < 0)
5750         return -1;
5751     if (len < 2)
5752         return bad_readline();
5753 
5754     key = PyLong_FromString(s, NULL, 10);
5755     if (key == NULL)
5756         return -1;
5757     idx = PyLong_AsSsize_t(key);
5758     if (idx == -1 && PyErr_Occurred()) {
5759         Py_DECREF(key);
5760         return -1;
5761     }
5762 
5763     value = _Unpickler_MemoGet(self, idx);
5764     if (value == NULL) {
5765         if (!PyErr_Occurred())
5766             PyErr_SetObject(PyExc_KeyError, key);
5767         Py_DECREF(key);
5768         return -1;
5769     }
5770     Py_DECREF(key);
5771 
5772     PDATA_APPEND(self->stack, value, -1);
5773     return 0;
5774 }
5775 
5776 static int
load_binget(UnpicklerObject * self)5777 load_binget(UnpicklerObject *self)
5778 {
5779     PyObject *value;
5780     Py_ssize_t idx;
5781     char *s;
5782 
5783     if (_Unpickler_Read(self, &s, 1) < 0)
5784         return -1;
5785 
5786     idx = Py_CHARMASK(s[0]);
5787 
5788     value = _Unpickler_MemoGet(self, idx);
5789     if (value == NULL) {
5790         PyObject *key = PyLong_FromSsize_t(idx);
5791         if (key != NULL) {
5792             PyErr_SetObject(PyExc_KeyError, key);
5793             Py_DECREF(key);
5794         }
5795         return -1;
5796     }
5797 
5798     PDATA_APPEND(self->stack, value, -1);
5799     return 0;
5800 }
5801 
5802 static int
load_long_binget(UnpicklerObject * self)5803 load_long_binget(UnpicklerObject *self)
5804 {
5805     PyObject *value;
5806     Py_ssize_t idx;
5807     char *s;
5808 
5809     if (_Unpickler_Read(self, &s, 4) < 0)
5810         return -1;
5811 
5812     idx = calc_binsize(s, 4);
5813 
5814     value = _Unpickler_MemoGet(self, idx);
5815     if (value == NULL) {
5816         PyObject *key = PyLong_FromSsize_t(idx);
5817         if (key != NULL) {
5818             PyErr_SetObject(PyExc_KeyError, key);
5819             Py_DECREF(key);
5820         }
5821         return -1;
5822     }
5823 
5824     PDATA_APPEND(self->stack, value, -1);
5825     return 0;
5826 }
5827 
5828 /* Push an object from the extension registry (EXT[124]).  nbytes is
5829  * the number of bytes following the opcode, holding the index (code) value.
5830  */
5831 static int
load_extension(UnpicklerObject * self,int nbytes)5832 load_extension(UnpicklerObject *self, int nbytes)
5833 {
5834     char *codebytes;            /* the nbytes bytes after the opcode */
5835     long code;                  /* calc_binint returns long */
5836     PyObject *py_code;          /* code as a Python int */
5837     PyObject *obj;              /* the object to push */
5838     PyObject *pair;             /* (module_name, class_name) */
5839     PyObject *module_name, *class_name;
5840     PickleState *st = _Pickle_GetGlobalState();
5841 
5842     assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
5843     if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
5844         return -1;
5845     code = calc_binint(codebytes, nbytes);
5846     if (code <= 0) {            /* note that 0 is forbidden */
5847         /* Corrupt or hostile pickle. */
5848         PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
5849         return -1;
5850     }
5851 
5852     /* Look for the code in the cache. */
5853     py_code = PyLong_FromLong(code);
5854     if (py_code == NULL)
5855         return -1;
5856     obj = PyDict_GetItemWithError(st->extension_cache, py_code);
5857     if (obj != NULL) {
5858         /* Bingo. */
5859         Py_DECREF(py_code);
5860         PDATA_APPEND(self->stack, obj, -1);
5861         return 0;
5862     }
5863     if (PyErr_Occurred()) {
5864         Py_DECREF(py_code);
5865         return -1;
5866     }
5867 
5868     /* Look up the (module_name, class_name) pair. */
5869     pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
5870     if (pair == NULL) {
5871         Py_DECREF(py_code);
5872         if (!PyErr_Occurred()) {
5873             PyErr_Format(PyExc_ValueError, "unregistered extension "
5874                          "code %ld", code);
5875         }
5876         return -1;
5877     }
5878     /* Since the extension registry is manipulable via Python code,
5879      * confirm that pair is really a 2-tuple of strings.
5880      */
5881     if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
5882         !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
5883         !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
5884         Py_DECREF(py_code);
5885         PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
5886                      "isn't a 2-tuple of strings", code);
5887         return -1;
5888     }
5889     /* Load the object. */
5890     obj = find_class(self, module_name, class_name);
5891     if (obj == NULL) {
5892         Py_DECREF(py_code);
5893         return -1;
5894     }
5895     /* Cache code -> obj. */
5896     code = PyDict_SetItem(st->extension_cache, py_code, obj);
5897     Py_DECREF(py_code);
5898     if (code < 0) {
5899         Py_DECREF(obj);
5900         return -1;
5901     }
5902     PDATA_PUSH(self->stack, obj, -1);
5903     return 0;
5904 }
5905 
5906 static int
load_put(UnpicklerObject * self)5907 load_put(UnpicklerObject *self)
5908 {
5909     PyObject *key, *value;
5910     Py_ssize_t idx;
5911     Py_ssize_t len;
5912     char *s = NULL;
5913 
5914     if ((len = _Unpickler_Readline(self, &s)) < 0)
5915         return -1;
5916     if (len < 2)
5917         return bad_readline();
5918     if (Py_SIZE(self->stack) <= self->stack->fence)
5919         return Pdata_stack_underflow(self->stack);
5920     value = self->stack->data[Py_SIZE(self->stack) - 1];
5921 
5922     key = PyLong_FromString(s, NULL, 10);
5923     if (key == NULL)
5924         return -1;
5925     idx = PyLong_AsSsize_t(key);
5926     Py_DECREF(key);
5927     if (idx < 0) {
5928         if (!PyErr_Occurred())
5929             PyErr_SetString(PyExc_ValueError,
5930                             "negative PUT argument");
5931         return -1;
5932     }
5933 
5934     return _Unpickler_MemoPut(self, idx, value);
5935 }
5936 
5937 static int
load_binput(UnpicklerObject * self)5938 load_binput(UnpicklerObject *self)
5939 {
5940     PyObject *value;
5941     Py_ssize_t idx;
5942     char *s;
5943 
5944     if (_Unpickler_Read(self, &s, 1) < 0)
5945         return -1;
5946 
5947     if (Py_SIZE(self->stack) <= self->stack->fence)
5948         return Pdata_stack_underflow(self->stack);
5949     value = self->stack->data[Py_SIZE(self->stack) - 1];
5950 
5951     idx = Py_CHARMASK(s[0]);
5952 
5953     return _Unpickler_MemoPut(self, idx, value);
5954 }
5955 
5956 static int
load_long_binput(UnpicklerObject * self)5957 load_long_binput(UnpicklerObject *self)
5958 {
5959     PyObject *value;
5960     Py_ssize_t idx;
5961     char *s;
5962 
5963     if (_Unpickler_Read(self, &s, 4) < 0)
5964         return -1;
5965 
5966     if (Py_SIZE(self->stack) <= self->stack->fence)
5967         return Pdata_stack_underflow(self->stack);
5968     value = self->stack->data[Py_SIZE(self->stack) - 1];
5969 
5970     idx = calc_binsize(s, 4);
5971     if (idx < 0) {
5972         PyErr_SetString(PyExc_ValueError,
5973                         "negative LONG_BINPUT argument");
5974         return -1;
5975     }
5976 
5977     return _Unpickler_MemoPut(self, idx, value);
5978 }
5979 
5980 static int
load_memoize(UnpicklerObject * self)5981 load_memoize(UnpicklerObject *self)
5982 {
5983     PyObject *value;
5984 
5985     if (Py_SIZE(self->stack) <= self->stack->fence)
5986         return Pdata_stack_underflow(self->stack);
5987     value = self->stack->data[Py_SIZE(self->stack) - 1];
5988 
5989     return _Unpickler_MemoPut(self, self->memo_len, value);
5990 }
5991 
5992 static int
do_append(UnpicklerObject * self,Py_ssize_t x)5993 do_append(UnpicklerObject *self, Py_ssize_t x)
5994 {
5995     PyObject *value;
5996     PyObject *slice;
5997     PyObject *list;
5998     PyObject *result;
5999     Py_ssize_t len, i;
6000 
6001     len = Py_SIZE(self->stack);
6002     if (x > len || x <= self->stack->fence)
6003         return Pdata_stack_underflow(self->stack);
6004     if (len == x)  /* nothing to do */
6005         return 0;
6006 
6007     list = self->stack->data[x - 1];
6008 
6009     if (PyList_CheckExact(list)) {
6010         Py_ssize_t list_len;
6011         int ret;
6012 
6013         slice = Pdata_poplist(self->stack, x);
6014         if (!slice)
6015             return -1;
6016         list_len = PyList_GET_SIZE(list);
6017         ret = PyList_SetSlice(list, list_len, list_len, slice);
6018         Py_DECREF(slice);
6019         return ret;
6020     }
6021     else {
6022         PyObject *extend_func;
6023         _Py_IDENTIFIER(extend);
6024 
6025         extend_func = _PyObject_GetAttrId(list, &PyId_extend);
6026         if (extend_func != NULL) {
6027             slice = Pdata_poplist(self->stack, x);
6028             if (!slice) {
6029                 Py_DECREF(extend_func);
6030                 return -1;
6031             }
6032             result = _Pickle_FastCall(extend_func, slice);
6033             Py_DECREF(extend_func);
6034             if (result == NULL)
6035                 return -1;
6036             Py_DECREF(result);
6037         }
6038         else {
6039             PyObject *append_func;
6040             _Py_IDENTIFIER(append);
6041 
6042             /* Even if the PEP 307 requires extend() and append() methods,
6043                fall back on append() if the object has no extend() method
6044                for backward compatibility. */
6045             PyErr_Clear();
6046             append_func = _PyObject_GetAttrId(list, &PyId_append);
6047             if (append_func == NULL)
6048                 return -1;
6049             for (i = x; i < len; i++) {
6050                 value = self->stack->data[i];
6051                 result = _Pickle_FastCall(append_func, value);
6052                 if (result == NULL) {
6053                     Pdata_clear(self->stack, i + 1);
6054                     Py_SIZE(self->stack) = x;
6055                     Py_DECREF(append_func);
6056                     return -1;
6057                 }
6058                 Py_DECREF(result);
6059             }
6060             Py_SIZE(self->stack) = x;
6061             Py_DECREF(append_func);
6062         }
6063     }
6064 
6065     return 0;
6066 }
6067 
6068 static int
load_append(UnpicklerObject * self)6069 load_append(UnpicklerObject *self)
6070 {
6071     if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6072         return Pdata_stack_underflow(self->stack);
6073     return do_append(self, Py_SIZE(self->stack) - 1);
6074 }
6075 
6076 static int
load_appends(UnpicklerObject * self)6077 load_appends(UnpicklerObject *self)
6078 {
6079     Py_ssize_t i = marker(self);
6080     if (i < 0)
6081         return -1;
6082     return do_append(self, i);
6083 }
6084 
6085 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6086 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6087 {
6088     PyObject *value, *key;
6089     PyObject *dict;
6090     Py_ssize_t len, i;
6091     int status = 0;
6092 
6093     len = Py_SIZE(self->stack);
6094     if (x > len || x <= self->stack->fence)
6095         return Pdata_stack_underflow(self->stack);
6096     if (len == x)  /* nothing to do */
6097         return 0;
6098     if ((len - x) % 2 != 0) {
6099         PickleState *st = _Pickle_GetGlobalState();
6100         /* Currupt or hostile pickle -- we never write one like this. */
6101         PyErr_SetString(st->UnpicklingError,
6102                         "odd number of items for SETITEMS");
6103         return -1;
6104     }
6105 
6106     /* Here, dict does not actually need to be a PyDict; it could be anything
6107        that supports the __setitem__ attribute. */
6108     dict = self->stack->data[x - 1];
6109 
6110     for (i = x + 1; i < len; i += 2) {
6111         key = self->stack->data[i - 1];
6112         value = self->stack->data[i];
6113         if (PyObject_SetItem(dict, key, value) < 0) {
6114             status = -1;
6115             break;
6116         }
6117     }
6118 
6119     Pdata_clear(self->stack, x);
6120     return status;
6121 }
6122 
6123 static int
load_setitem(UnpicklerObject * self)6124 load_setitem(UnpicklerObject *self)
6125 {
6126     return do_setitems(self, Py_SIZE(self->stack) - 2);
6127 }
6128 
6129 static int
load_setitems(UnpicklerObject * self)6130 load_setitems(UnpicklerObject *self)
6131 {
6132     Py_ssize_t i = marker(self);
6133     if (i < 0)
6134         return -1;
6135     return do_setitems(self, i);
6136 }
6137 
6138 static int
load_additems(UnpicklerObject * self)6139 load_additems(UnpicklerObject *self)
6140 {
6141     PyObject *set;
6142     Py_ssize_t mark, len, i;
6143 
6144     mark =  marker(self);
6145     if (mark < 0)
6146         return -1;
6147     len = Py_SIZE(self->stack);
6148     if (mark > len || mark <= self->stack->fence)
6149         return Pdata_stack_underflow(self->stack);
6150     if (len == mark)  /* nothing to do */
6151         return 0;
6152 
6153     set = self->stack->data[mark - 1];
6154 
6155     if (PySet_Check(set)) {
6156         PyObject *items;
6157         int status;
6158 
6159         items = Pdata_poptuple(self->stack, mark);
6160         if (items == NULL)
6161             return -1;
6162 
6163         status = _PySet_Update(set, items);
6164         Py_DECREF(items);
6165         return status;
6166     }
6167     else {
6168         PyObject *add_func;
6169         _Py_IDENTIFIER(add);
6170 
6171         add_func = _PyObject_GetAttrId(set, &PyId_add);
6172         if (add_func == NULL)
6173             return -1;
6174         for (i = mark; i < len; i++) {
6175             PyObject *result;
6176             PyObject *item;
6177 
6178             item = self->stack->data[i];
6179             result = _Pickle_FastCall(add_func, item);
6180             if (result == NULL) {
6181                 Pdata_clear(self->stack, i + 1);
6182                 Py_SIZE(self->stack) = mark;
6183                 return -1;
6184             }
6185             Py_DECREF(result);
6186         }
6187         Py_SIZE(self->stack) = mark;
6188     }
6189 
6190     return 0;
6191 }
6192 
6193 static int
load_build(UnpicklerObject * self)6194 load_build(UnpicklerObject *self)
6195 {
6196     PyObject *state, *inst, *slotstate;
6197     PyObject *setstate;
6198     int status = 0;
6199     _Py_IDENTIFIER(__setstate__);
6200 
6201     /* Stack is ... instance, state.  We want to leave instance at
6202      * the stack top, possibly mutated via instance.__setstate__(state).
6203      */
6204     if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6205         return Pdata_stack_underflow(self->stack);
6206 
6207     PDATA_POP(self->stack, state);
6208     if (state == NULL)
6209         return -1;
6210 
6211     inst = self->stack->data[Py_SIZE(self->stack) - 1];
6212 
6213     if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6214         Py_DECREF(state);
6215         return -1;
6216     }
6217     if (setstate != NULL) {
6218         PyObject *result;
6219 
6220         /* The explicit __setstate__ is responsible for everything. */
6221         result = _Pickle_FastCall(setstate, state);
6222         Py_DECREF(setstate);
6223         if (result == NULL)
6224             return -1;
6225         Py_DECREF(result);
6226         return 0;
6227     }
6228 
6229     /* A default __setstate__.  First see whether state embeds a
6230      * slot state dict too (a proto 2 addition).
6231      */
6232     if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6233         PyObject *tmp = state;
6234 
6235         state = PyTuple_GET_ITEM(tmp, 0);
6236         slotstate = PyTuple_GET_ITEM(tmp, 1);
6237         Py_INCREF(state);
6238         Py_INCREF(slotstate);
6239         Py_DECREF(tmp);
6240     }
6241     else
6242         slotstate = NULL;
6243 
6244     /* Set inst.__dict__ from the state dict (if any). */
6245     if (state != Py_None) {
6246         PyObject *dict;
6247         PyObject *d_key, *d_value;
6248         Py_ssize_t i;
6249         _Py_IDENTIFIER(__dict__);
6250 
6251         if (!PyDict_Check(state)) {
6252             PickleState *st = _Pickle_GetGlobalState();
6253             PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6254             goto error;
6255         }
6256         dict = _PyObject_GetAttrId(inst, &PyId___dict__);
6257         if (dict == NULL)
6258             goto error;
6259 
6260         i = 0;
6261         while (PyDict_Next(state, &i, &d_key, &d_value)) {
6262             /* normally the keys for instance attributes are
6263                interned.  we should try to do that here. */
6264             Py_INCREF(d_key);
6265             if (PyUnicode_CheckExact(d_key))
6266                 PyUnicode_InternInPlace(&d_key);
6267             if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6268                 Py_DECREF(d_key);
6269                 goto error;
6270             }
6271             Py_DECREF(d_key);
6272         }
6273         Py_DECREF(dict);
6274     }
6275 
6276     /* Also set instance attributes from the slotstate dict (if any). */
6277     if (slotstate != NULL) {
6278         PyObject *d_key, *d_value;
6279         Py_ssize_t i;
6280 
6281         if (!PyDict_Check(slotstate)) {
6282             PickleState *st = _Pickle_GetGlobalState();
6283             PyErr_SetString(st->UnpicklingError,
6284                             "slot state is not a dictionary");
6285             goto error;
6286         }
6287         i = 0;
6288         while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6289             if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6290                 goto error;
6291         }
6292     }
6293 
6294     if (0) {
6295   error:
6296         status = -1;
6297     }
6298 
6299     Py_DECREF(state);
6300     Py_XDECREF(slotstate);
6301     return status;
6302 }
6303 
6304 static int
load_mark(UnpicklerObject * self)6305 load_mark(UnpicklerObject *self)
6306 {
6307 
6308     /* Note that we split the (pickle.py) stack into two stacks, an
6309      * object stack and a mark stack. Here we push a mark onto the
6310      * mark stack.
6311      */
6312 
6313     if ((self->num_marks + 1) >= self->marks_size) {
6314         size_t alloc;
6315 
6316         /* Use the size_t type to check for overflow. */
6317         alloc = ((size_t)self->num_marks << 1) + 20;
6318         if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
6319             alloc <= ((size_t)self->num_marks + 1)) {
6320             PyErr_NoMemory();
6321             return -1;
6322         }
6323 
6324         Py_ssize_t *marks_old = self->marks;
6325         PyMem_RESIZE(self->marks, Py_ssize_t, alloc);
6326         if (self->marks == NULL) {
6327             PyMem_FREE(marks_old);
6328             self->marks_size = 0;
6329             PyErr_NoMemory();
6330             return -1;
6331         }
6332         self->marks_size = (Py_ssize_t)alloc;
6333     }
6334 
6335     self->stack->mark_set = 1;
6336     self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6337 
6338     return 0;
6339 }
6340 
6341 static int
load_reduce(UnpicklerObject * self)6342 load_reduce(UnpicklerObject *self)
6343 {
6344     PyObject *callable = NULL;
6345     PyObject *argtup = NULL;
6346     PyObject *obj = NULL;
6347 
6348     PDATA_POP(self->stack, argtup);
6349     if (argtup == NULL)
6350         return -1;
6351     PDATA_POP(self->stack, callable);
6352     if (callable) {
6353         obj = PyObject_CallObject(callable, argtup);
6354         Py_DECREF(callable);
6355     }
6356     Py_DECREF(argtup);
6357 
6358     if (obj == NULL)
6359         return -1;
6360 
6361     PDATA_PUSH(self->stack, obj, -1);
6362     return 0;
6363 }
6364 
6365 /* Just raises an error if we don't know the protocol specified.  PROTO
6366  * is the first opcode for protocols >= 2.
6367  */
6368 static int
load_proto(UnpicklerObject * self)6369 load_proto(UnpicklerObject *self)
6370 {
6371     char *s;
6372     int i;
6373 
6374     if (_Unpickler_Read(self, &s, 1) < 0)
6375         return -1;
6376 
6377     i = (unsigned char)s[0];
6378     if (i <= HIGHEST_PROTOCOL) {
6379         self->proto = i;
6380         return 0;
6381     }
6382 
6383     PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6384     return -1;
6385 }
6386 
6387 static int
load_frame(UnpicklerObject * self)6388 load_frame(UnpicklerObject *self)
6389 {
6390     char *s;
6391     Py_ssize_t frame_len;
6392 
6393     if (_Unpickler_Read(self, &s, 8) < 0)
6394         return -1;
6395 
6396     frame_len = calc_binsize(s, 8);
6397     if (frame_len < 0) {
6398         PyErr_Format(PyExc_OverflowError,
6399                      "FRAME length exceeds system's maximum of %zd bytes",
6400                      PY_SSIZE_T_MAX);
6401         return -1;
6402     }
6403 
6404     if (_Unpickler_Read(self, &s, frame_len) < 0)
6405         return -1;
6406 
6407     /* Rewind to start of frame */
6408     self->next_read_idx -= frame_len;
6409     return 0;
6410 }
6411 
6412 static PyObject *
load(UnpicklerObject * self)6413 load(UnpicklerObject *self)
6414 {
6415     PyObject *value = NULL;
6416     char *s = NULL;
6417 
6418     self->num_marks = 0;
6419     self->stack->mark_set = 0;
6420     self->stack->fence = 0;
6421     self->proto = 0;
6422     if (Py_SIZE(self->stack))
6423         Pdata_clear(self->stack, 0);
6424 
6425     /* Convenient macros for the dispatch while-switch loop just below. */
6426 #define OP(opcode, load_func) \
6427     case opcode: if (load_func(self) < 0) break; continue;
6428 
6429 #define OP_ARG(opcode, load_func, arg) \
6430     case opcode: if (load_func(self, (arg)) < 0) break; continue;
6431 
6432     while (1) {
6433         if (_Unpickler_Read(self, &s, 1) < 0) {
6434             PickleState *st = _Pickle_GetGlobalState();
6435             if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6436                 PyErr_Format(PyExc_EOFError, "Ran out of input");
6437             }
6438             return NULL;
6439         }
6440 
6441         switch ((enum opcode)s[0]) {
6442         OP(NONE, load_none)
6443         OP(BININT, load_binint)
6444         OP(BININT1, load_binint1)
6445         OP(BININT2, load_binint2)
6446         OP(INT, load_int)
6447         OP(LONG, load_long)
6448         OP_ARG(LONG1, load_counted_long, 1)
6449         OP_ARG(LONG4, load_counted_long, 4)
6450         OP(FLOAT, load_float)
6451         OP(BINFLOAT, load_binfloat)
6452         OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6453         OP_ARG(BINBYTES, load_counted_binbytes, 4)
6454         OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6455         OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6456         OP_ARG(BINSTRING, load_counted_binstring, 4)
6457         OP(STRING, load_string)
6458         OP(UNICODE, load_unicode)
6459         OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6460         OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6461         OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6462         OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6463         OP_ARG(TUPLE1, load_counted_tuple, 1)
6464         OP_ARG(TUPLE2, load_counted_tuple, 2)
6465         OP_ARG(TUPLE3, load_counted_tuple, 3)
6466         OP(TUPLE, load_tuple)
6467         OP(EMPTY_LIST, load_empty_list)
6468         OP(LIST, load_list)
6469         OP(EMPTY_DICT, load_empty_dict)
6470         OP(DICT, load_dict)
6471         OP(EMPTY_SET, load_empty_set)
6472         OP(ADDITEMS, load_additems)
6473         OP(FROZENSET, load_frozenset)
6474         OP(OBJ, load_obj)
6475         OP(INST, load_inst)
6476         OP(NEWOBJ, load_newobj)
6477         OP(NEWOBJ_EX, load_newobj_ex)
6478         OP(GLOBAL, load_global)
6479         OP(STACK_GLOBAL, load_stack_global)
6480         OP(APPEND, load_append)
6481         OP(APPENDS, load_appends)
6482         OP(BUILD, load_build)
6483         OP(DUP, load_dup)
6484         OP(BINGET, load_binget)
6485         OP(LONG_BINGET, load_long_binget)
6486         OP(GET, load_get)
6487         OP(MARK, load_mark)
6488         OP(BINPUT, load_binput)
6489         OP(LONG_BINPUT, load_long_binput)
6490         OP(PUT, load_put)
6491         OP(MEMOIZE, load_memoize)
6492         OP(POP, load_pop)
6493         OP(POP_MARK, load_pop_mark)
6494         OP(SETITEM, load_setitem)
6495         OP(SETITEMS, load_setitems)
6496         OP(PERSID, load_persid)
6497         OP(BINPERSID, load_binpersid)
6498         OP(REDUCE, load_reduce)
6499         OP(PROTO, load_proto)
6500         OP(FRAME, load_frame)
6501         OP_ARG(EXT1, load_extension, 1)
6502         OP_ARG(EXT2, load_extension, 2)
6503         OP_ARG(EXT4, load_extension, 4)
6504         OP_ARG(NEWTRUE, load_bool, Py_True)
6505         OP_ARG(NEWFALSE, load_bool, Py_False)
6506 
6507         case STOP:
6508             break;
6509 
6510         default:
6511             {
6512                 PickleState *st = _Pickle_GetGlobalState();
6513                 unsigned char c = (unsigned char) *s;
6514                 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6515                     PyErr_Format(st->UnpicklingError,
6516                                  "invalid load key, '%c'.", c);
6517                 }
6518                 else {
6519                     PyErr_Format(st->UnpicklingError,
6520                                  "invalid load key, '\\x%02x'.", c);
6521                 }
6522                 return NULL;
6523             }
6524         }
6525 
6526         break;                  /* and we are done! */
6527     }
6528 
6529     if (PyErr_Occurred()) {
6530         return NULL;
6531     }
6532 
6533     if (_Unpickler_SkipConsumed(self) < 0)
6534         return NULL;
6535 
6536     PDATA_POP(self->stack, value);
6537     return value;
6538 }
6539 
6540 /*[clinic input]
6541 
6542 _pickle.Unpickler.load
6543 
6544 Load a pickle.
6545 
6546 Read a pickled object representation from the open file object given
6547 in the constructor, and return the reconstituted object hierarchy
6548 specified therein.
6549 [clinic start generated code]*/
6550 
6551 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)6552 _pickle_Unpickler_load_impl(UnpicklerObject *self)
6553 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
6554 {
6555     UnpicklerObject *unpickler = (UnpicklerObject*)self;
6556 
6557     /* Check whether the Unpickler was initialized correctly. This prevents
6558        segfaulting if a subclass overridden __init__ with a function that does
6559        not call Unpickler.__init__(). Here, we simply ensure that self->read
6560        is not NULL. */
6561     if (unpickler->read == NULL) {
6562         PickleState *st = _Pickle_GetGlobalState();
6563         PyErr_Format(st->UnpicklingError,
6564                      "Unpickler.__init__() was not called by %s.__init__()",
6565                      Py_TYPE(unpickler)->tp_name);
6566         return NULL;
6567     }
6568 
6569     return load(unpickler);
6570 }
6571 
6572 /* The name of find_class() is misleading. In newer pickle protocols, this
6573    function is used for loading any global (i.e., functions), not just
6574    classes. The name is kept only for backward compatibility. */
6575 
6576 /*[clinic input]
6577 
6578 _pickle.Unpickler.find_class
6579 
6580   module_name: object
6581   global_name: object
6582   /
6583 
6584 Return an object from a specified module.
6585 
6586 If necessary, the module will be imported. Subclasses may override
6587 this method (e.g. to restrict unpickling of arbitrary classes and
6588 functions).
6589 
6590 This method is called whenever a class or a function object is
6591 needed.  Both arguments passed are str objects.
6592 [clinic start generated code]*/
6593 
6594 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)6595 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
6596                                   PyObject *module_name,
6597                                   PyObject *global_name)
6598 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
6599 {
6600     PyObject *global;
6601     PyObject *module;
6602 
6603     /* Try to map the old names used in Python 2.x to the new ones used in
6604        Python 3.x.  We do this only with old pickle protocols and when the
6605        user has not disabled the feature. */
6606     if (self->proto < 3 && self->fix_imports) {
6607         PyObject *key;
6608         PyObject *item;
6609         PickleState *st = _Pickle_GetGlobalState();
6610 
6611         /* Check if the global (i.e., a function or a class) was renamed
6612            or moved to another module. */
6613         key = PyTuple_Pack(2, module_name, global_name);
6614         if (key == NULL)
6615             return NULL;
6616         item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
6617         Py_DECREF(key);
6618         if (item) {
6619             if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
6620                 PyErr_Format(PyExc_RuntimeError,
6621                              "_compat_pickle.NAME_MAPPING values should be "
6622                              "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
6623                 return NULL;
6624             }
6625             module_name = PyTuple_GET_ITEM(item, 0);
6626             global_name = PyTuple_GET_ITEM(item, 1);
6627             if (!PyUnicode_Check(module_name) ||
6628                 !PyUnicode_Check(global_name)) {
6629                 PyErr_Format(PyExc_RuntimeError,
6630                              "_compat_pickle.NAME_MAPPING values should be "
6631                              "pairs of str, not (%.200s, %.200s)",
6632                              Py_TYPE(module_name)->tp_name,
6633                              Py_TYPE(global_name)->tp_name);
6634                 return NULL;
6635             }
6636         }
6637         else if (PyErr_Occurred()) {
6638             return NULL;
6639         }
6640         else {
6641             /* Check if the module was renamed. */
6642             item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
6643             if (item) {
6644                 if (!PyUnicode_Check(item)) {
6645                     PyErr_Format(PyExc_RuntimeError,
6646                                 "_compat_pickle.IMPORT_MAPPING values should be "
6647                                 "strings, not %.200s", Py_TYPE(item)->tp_name);
6648                     return NULL;
6649                 }
6650                 module_name = item;
6651             }
6652             else if (PyErr_Occurred()) {
6653                 return NULL;
6654             }
6655         }
6656     }
6657 
6658     /*
6659      * we don't use PyImport_GetModule here, because it can return partially-
6660      * initialised modules, which then cause the getattribute to fail.
6661      */
6662     module = PyImport_Import(module_name);
6663     if (module == NULL) {
6664         return NULL;
6665     }
6666     global = getattribute(module, global_name, self->proto >= 4);
6667     Py_DECREF(module);
6668     return global;
6669 }
6670 
6671 /*[clinic input]
6672 
6673 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
6674 
6675 Returns size in memory, in bytes.
6676 [clinic start generated code]*/
6677 
6678 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)6679 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
6680 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
6681 {
6682     Py_ssize_t res;
6683 
6684     res = _PyObject_SIZE(Py_TYPE(self));
6685     if (self->memo != NULL)
6686         res += self->memo_size * sizeof(PyObject *);
6687     if (self->marks != NULL)
6688         res += self->marks_size * sizeof(Py_ssize_t);
6689     if (self->input_line != NULL)
6690         res += strlen(self->input_line) + 1;
6691     if (self->encoding != NULL)
6692         res += strlen(self->encoding) + 1;
6693     if (self->errors != NULL)
6694         res += strlen(self->errors) + 1;
6695     return res;
6696 }
6697 
6698 static struct PyMethodDef Unpickler_methods[] = {
6699     _PICKLE_UNPICKLER_LOAD_METHODDEF
6700     _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
6701     _PICKLE_UNPICKLER___SIZEOF___METHODDEF
6702     {NULL, NULL}                /* sentinel */
6703 };
6704 
6705 static void
Unpickler_dealloc(UnpicklerObject * self)6706 Unpickler_dealloc(UnpicklerObject *self)
6707 {
6708     PyObject_GC_UnTrack((PyObject *)self);
6709     Py_XDECREF(self->readline);
6710     Py_XDECREF(self->read);
6711     Py_XDECREF(self->peek);
6712     Py_XDECREF(self->stack);
6713     Py_XDECREF(self->pers_func);
6714     if (self->buffer.buf != NULL) {
6715         PyBuffer_Release(&self->buffer);
6716         self->buffer.buf = NULL;
6717     }
6718 
6719     _Unpickler_MemoCleanup(self);
6720     PyMem_Free(self->marks);
6721     PyMem_Free(self->input_line);
6722     PyMem_Free(self->encoding);
6723     PyMem_Free(self->errors);
6724 
6725     Py_TYPE(self)->tp_free((PyObject *)self);
6726 }
6727 
6728 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)6729 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
6730 {
6731     Py_VISIT(self->readline);
6732     Py_VISIT(self->read);
6733     Py_VISIT(self->peek);
6734     Py_VISIT(self->stack);
6735     Py_VISIT(self->pers_func);
6736     return 0;
6737 }
6738 
6739 static int
Unpickler_clear(UnpicklerObject * self)6740 Unpickler_clear(UnpicklerObject *self)
6741 {
6742     Py_CLEAR(self->readline);
6743     Py_CLEAR(self->read);
6744     Py_CLEAR(self->peek);
6745     Py_CLEAR(self->stack);
6746     Py_CLEAR(self->pers_func);
6747     if (self->buffer.buf != NULL) {
6748         PyBuffer_Release(&self->buffer);
6749         self->buffer.buf = NULL;
6750     }
6751 
6752     _Unpickler_MemoCleanup(self);
6753     PyMem_Free(self->marks);
6754     self->marks = NULL;
6755     PyMem_Free(self->input_line);
6756     self->input_line = NULL;
6757     PyMem_Free(self->encoding);
6758     self->encoding = NULL;
6759     PyMem_Free(self->errors);
6760     self->errors = NULL;
6761 
6762     return 0;
6763 }
6764 
6765 /*[clinic input]
6766 
6767 _pickle.Unpickler.__init__
6768 
6769   file: object
6770   *
6771   fix_imports: bool = True
6772   encoding: str = 'ASCII'
6773   errors: str = 'strict'
6774 
6775 This takes a binary file for reading a pickle data stream.
6776 
6777 The protocol version of the pickle is detected automatically, so no
6778 protocol argument is needed.  Bytes past the pickled object's
6779 representation are ignored.
6780 
6781 The argument *file* must have two methods, a read() method that takes
6782 an integer argument, and a readline() method that requires no
6783 arguments.  Both methods should return bytes.  Thus *file* can be a
6784 binary file object opened for reading, an io.BytesIO object, or any
6785 other custom object that meets this interface.
6786 
6787 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
6788 which are used to control compatibility support for pickle stream
6789 generated by Python 2.  If *fix_imports* is True, pickle will try to
6790 map the old Python 2 names to the new names used in Python 3.  The
6791 *encoding* and *errors* tell pickle how to decode 8-bit string
6792 instances pickled by Python 2; these default to 'ASCII' and 'strict',
6793 respectively.  The *encoding* can be 'bytes' to read these 8-bit
6794 string instances as bytes objects.
6795 [clinic start generated code]*/
6796 
6797 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors)6798 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
6799                                 int fix_imports, const char *encoding,
6800                                 const char *errors)
6801 /*[clinic end generated code: output=e2c8ce748edc57b0 input=f9b7da04f5f4f335]*/
6802 {
6803     _Py_IDENTIFIER(persistent_load);
6804 
6805     /* In case of multiple __init__() calls, clear previous content. */
6806     if (self->read != NULL)
6807         (void)Unpickler_clear(self);
6808 
6809     if (_Unpickler_SetInputStream(self, file) < 0)
6810         return -1;
6811 
6812     if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
6813         return -1;
6814 
6815     self->fix_imports = fix_imports;
6816 
6817     if (init_method_ref((PyObject *)self, &PyId_persistent_load,
6818                         &self->pers_func, &self->pers_func_self) < 0)
6819     {
6820         return -1;
6821     }
6822 
6823     self->stack = (Pdata *)Pdata_New();
6824     if (self->stack == NULL)
6825         return -1;
6826 
6827     self->memo_size = 32;
6828     self->memo = _Unpickler_NewMemo(self->memo_size);
6829     if (self->memo == NULL)
6830         return -1;
6831 
6832     self->proto = 0;
6833 
6834     return 0;
6835 }
6836 
6837 
6838 /* Define a proxy object for the Unpickler's internal memo object. This is to
6839  * avoid breaking code like:
6840  *  unpickler.memo.clear()
6841  * and
6842  *  unpickler.memo = saved_memo
6843  * Is this a good idea? Not really, but we don't want to break code that uses
6844  * it. Note that we don't implement the entire mapping API here. This is
6845  * intentional, as these should be treated as black-box implementation details.
6846  *
6847  * We do, however, have to implement pickling/unpickling support because of
6848  * real-world code like cvs2svn.
6849  */
6850 
6851 /*[clinic input]
6852 _pickle.UnpicklerMemoProxy.clear
6853 
6854 Remove all items from memo.
6855 [clinic start generated code]*/
6856 
6857 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)6858 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
6859 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
6860 {
6861     _Unpickler_MemoCleanup(self->unpickler);
6862     self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
6863     if (self->unpickler->memo == NULL)
6864         return NULL;
6865     Py_RETURN_NONE;
6866 }
6867 
6868 /*[clinic input]
6869 _pickle.UnpicklerMemoProxy.copy
6870 
6871 Copy the memo to a new object.
6872 [clinic start generated code]*/
6873 
6874 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)6875 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
6876 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
6877 {
6878     size_t i;
6879     PyObject *new_memo = PyDict_New();
6880     if (new_memo == NULL)
6881         return NULL;
6882 
6883     for (i = 0; i < self->unpickler->memo_size; i++) {
6884         int status;
6885         PyObject *key, *value;
6886 
6887         value = self->unpickler->memo[i];
6888         if (value == NULL)
6889             continue;
6890 
6891         key = PyLong_FromSsize_t(i);
6892         if (key == NULL)
6893             goto error;
6894         status = PyDict_SetItem(new_memo, key, value);
6895         Py_DECREF(key);
6896         if (status < 0)
6897             goto error;
6898     }
6899     return new_memo;
6900 
6901 error:
6902     Py_DECREF(new_memo);
6903     return NULL;
6904 }
6905 
6906 /*[clinic input]
6907 _pickle.UnpicklerMemoProxy.__reduce__
6908 
6909 Implement pickling support.
6910 [clinic start generated code]*/
6911 
6912 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)6913 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
6914 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
6915 {
6916     PyObject *reduce_value;
6917     PyObject *constructor_args;
6918     PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
6919     if (contents == NULL)
6920         return NULL;
6921 
6922     reduce_value = PyTuple_New(2);
6923     if (reduce_value == NULL) {
6924         Py_DECREF(contents);
6925         return NULL;
6926     }
6927     constructor_args = PyTuple_New(1);
6928     if (constructor_args == NULL) {
6929         Py_DECREF(contents);
6930         Py_DECREF(reduce_value);
6931         return NULL;
6932     }
6933     PyTuple_SET_ITEM(constructor_args, 0, contents);
6934     Py_INCREF((PyObject *)&PyDict_Type);
6935     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
6936     PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
6937     return reduce_value;
6938 }
6939 
6940 static PyMethodDef unpicklerproxy_methods[] = {
6941     _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
6942     _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
6943     _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
6944     {NULL, NULL}    /* sentinel */
6945 };
6946 
6947 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)6948 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
6949 {
6950     PyObject_GC_UnTrack(self);
6951     Py_XDECREF(self->unpickler);
6952     PyObject_GC_Del((PyObject *)self);
6953 }
6954 
6955 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)6956 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
6957                             visitproc visit, void *arg)
6958 {
6959     Py_VISIT(self->unpickler);
6960     return 0;
6961 }
6962 
6963 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)6964 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
6965 {
6966     Py_CLEAR(self->unpickler);
6967     return 0;
6968 }
6969 
6970 static PyTypeObject UnpicklerMemoProxyType = {
6971     PyVarObject_HEAD_INIT(NULL, 0)
6972     "_pickle.UnpicklerMemoProxy",               /*tp_name*/
6973     sizeof(UnpicklerMemoProxyObject),           /*tp_basicsize*/
6974     0,
6975     (destructor)UnpicklerMemoProxy_dealloc,     /* tp_dealloc */
6976     0,                                          /* tp_print */
6977     0,                                          /* tp_getattr */
6978     0,                                          /* tp_setattr */
6979     0,                                          /* tp_compare */
6980     0,                                          /* tp_repr */
6981     0,                                          /* tp_as_number */
6982     0,                                          /* tp_as_sequence */
6983     0,                                          /* tp_as_mapping */
6984     PyObject_HashNotImplemented,                /* tp_hash */
6985     0,                                          /* tp_call */
6986     0,                                          /* tp_str */
6987     PyObject_GenericGetAttr,                    /* tp_getattro */
6988     PyObject_GenericSetAttr,                    /* tp_setattro */
6989     0,                                          /* tp_as_buffer */
6990     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6991     0,                                          /* tp_doc */
6992     (traverseproc)UnpicklerMemoProxy_traverse,  /* tp_traverse */
6993     (inquiry)UnpicklerMemoProxy_clear,          /* tp_clear */
6994     0,                                          /* tp_richcompare */
6995     0,                                          /* tp_weaklistoffset */
6996     0,                                          /* tp_iter */
6997     0,                                          /* tp_iternext */
6998     unpicklerproxy_methods,                     /* tp_methods */
6999 };
7000 
7001 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7002 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7003 {
7004     UnpicklerMemoProxyObject *self;
7005 
7006     self = PyObject_GC_New(UnpicklerMemoProxyObject,
7007                            &UnpicklerMemoProxyType);
7008     if (self == NULL)
7009         return NULL;
7010     Py_INCREF(unpickler);
7011     self->unpickler = unpickler;
7012     PyObject_GC_Track(self);
7013     return (PyObject *)self;
7014 }
7015 
7016 /*****************************************************************************/
7017 
7018 
7019 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7020 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7021 {
7022     return UnpicklerMemoProxy_New(self);
7023 }
7024 
7025 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7026 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7027 {
7028     PyObject **new_memo;
7029     size_t new_memo_size = 0;
7030 
7031     if (obj == NULL) {
7032         PyErr_SetString(PyExc_TypeError,
7033                         "attribute deletion is not supported");
7034         return -1;
7035     }
7036 
7037     if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
7038         UnpicklerObject *unpickler =
7039             ((UnpicklerMemoProxyObject *)obj)->unpickler;
7040 
7041         new_memo_size = unpickler->memo_size;
7042         new_memo = _Unpickler_NewMemo(new_memo_size);
7043         if (new_memo == NULL)
7044             return -1;
7045 
7046         for (size_t i = 0; i < new_memo_size; i++) {
7047             Py_XINCREF(unpickler->memo[i]);
7048             new_memo[i] = unpickler->memo[i];
7049         }
7050     }
7051     else if (PyDict_Check(obj)) {
7052         Py_ssize_t i = 0;
7053         PyObject *key, *value;
7054 
7055         new_memo_size = PyDict_GET_SIZE(obj);
7056         new_memo = _Unpickler_NewMemo(new_memo_size);
7057         if (new_memo == NULL)
7058             return -1;
7059 
7060         while (PyDict_Next(obj, &i, &key, &value)) {
7061             Py_ssize_t idx;
7062             if (!PyLong_Check(key)) {
7063                 PyErr_SetString(PyExc_TypeError,
7064                                 "memo key must be integers");
7065                 goto error;
7066             }
7067             idx = PyLong_AsSsize_t(key);
7068             if (idx == -1 && PyErr_Occurred())
7069                 goto error;
7070             if (idx < 0) {
7071                 PyErr_SetString(PyExc_ValueError,
7072                                 "memo key must be positive integers.");
7073                 goto error;
7074             }
7075             if (_Unpickler_MemoPut(self, idx, value) < 0)
7076                 goto error;
7077         }
7078     }
7079     else {
7080         PyErr_Format(PyExc_TypeError,
7081                      "'memo' attribute must be an UnpicklerMemoProxy object "
7082                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7083         return -1;
7084     }
7085 
7086     _Unpickler_MemoCleanup(self);
7087     self->memo_size = new_memo_size;
7088     self->memo = new_memo;
7089 
7090     return 0;
7091 
7092   error:
7093     if (new_memo_size) {
7094         for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7095             Py_XDECREF(new_memo[i]);
7096         }
7097         PyMem_FREE(new_memo);
7098     }
7099     return -1;
7100 }
7101 
7102 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7103 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7104 {
7105     if (self->pers_func == NULL) {
7106         PyErr_SetString(PyExc_AttributeError, "persistent_load");
7107         return NULL;
7108     }
7109     return reconstruct_method(self->pers_func, self->pers_func_self);
7110 }
7111 
7112 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7113 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7114 {
7115     if (value == NULL) {
7116         PyErr_SetString(PyExc_TypeError,
7117                         "attribute deletion is not supported");
7118         return -1;
7119     }
7120     if (!PyCallable_Check(value)) {
7121         PyErr_SetString(PyExc_TypeError,
7122                         "persistent_load must be a callable taking "
7123                         "one argument");
7124         return -1;
7125     }
7126 
7127     self->pers_func_self = NULL;
7128     Py_INCREF(value);
7129     Py_XSETREF(self->pers_func, value);
7130 
7131     return 0;
7132 }
7133 
7134 static PyGetSetDef Unpickler_getsets[] = {
7135     {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7136     {"persistent_load", (getter)Unpickler_get_persload,
7137                         (setter)Unpickler_set_persload},
7138     {NULL}
7139 };
7140 
7141 static PyTypeObject Unpickler_Type = {
7142     PyVarObject_HEAD_INIT(NULL, 0)
7143     "_pickle.Unpickler",                /*tp_name*/
7144     sizeof(UnpicklerObject),            /*tp_basicsize*/
7145     0,                                  /*tp_itemsize*/
7146     (destructor)Unpickler_dealloc,      /*tp_dealloc*/
7147     0,                                  /*tp_print*/
7148     0,                                  /*tp_getattr*/
7149     0,                                  /*tp_setattr*/
7150     0,                                  /*tp_reserved*/
7151     0,                                  /*tp_repr*/
7152     0,                                  /*tp_as_number*/
7153     0,                                  /*tp_as_sequence*/
7154     0,                                  /*tp_as_mapping*/
7155     0,                                  /*tp_hash*/
7156     0,                                  /*tp_call*/
7157     0,                                  /*tp_str*/
7158     0,                                  /*tp_getattro*/
7159     0,                                  /*tp_setattro*/
7160     0,                                  /*tp_as_buffer*/
7161     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7162     _pickle_Unpickler___init____doc__,  /*tp_doc*/
7163     (traverseproc)Unpickler_traverse,   /*tp_traverse*/
7164     (inquiry)Unpickler_clear,           /*tp_clear*/
7165     0,                                  /*tp_richcompare*/
7166     0,                                  /*tp_weaklistoffset*/
7167     0,                                  /*tp_iter*/
7168     0,                                  /*tp_iternext*/
7169     Unpickler_methods,                  /*tp_methods*/
7170     0,                                  /*tp_members*/
7171     Unpickler_getsets,                  /*tp_getset*/
7172     0,                                  /*tp_base*/
7173     0,                                  /*tp_dict*/
7174     0,                                  /*tp_descr_get*/
7175     0,                                  /*tp_descr_set*/
7176     0,                                  /*tp_dictoffset*/
7177     _pickle_Unpickler___init__,         /*tp_init*/
7178     PyType_GenericAlloc,                /*tp_alloc*/
7179     PyType_GenericNew,                  /*tp_new*/
7180     PyObject_GC_Del,                    /*tp_free*/
7181     0,                                  /*tp_is_gc*/
7182 };
7183 
7184 /*[clinic input]
7185 
7186 _pickle.dump
7187 
7188   obj: object
7189   file: object
7190   protocol: object = NULL
7191   *
7192   fix_imports: bool = True
7193 
7194 Write a pickled representation of obj to the open file object file.
7195 
7196 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7197 be more efficient.
7198 
7199 The optional *protocol* argument tells the pickler to use the given
7200 protocol supported protocols are 0, 1, 2, 3 and 4.  The default
7201 protocol is 3; a backward-incompatible protocol designed for Python 3.
7202 
7203 Specifying a negative protocol version selects the highest protocol
7204 version supported.  The higher the protocol used, the more recent the
7205 version of Python needed to read the pickle produced.
7206 
7207 The *file* argument must have a write() method that accepts a single
7208 bytes argument.  It can thus be a file object opened for binary
7209 writing, an io.BytesIO instance, or any other custom object that meets
7210 this interface.
7211 
7212 If *fix_imports* is True and protocol is less than 3, pickle will try
7213 to map the new Python 3 names to the old module names used in Python
7214 2, so that the pickle data stream is readable with Python 2.
7215 [clinic start generated code]*/
7216 
7217 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports)7218 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7219                   PyObject *protocol, int fix_imports)
7220 /*[clinic end generated code: output=a4774d5fde7d34de input=830f8a64cef6f042]*/
7221 {
7222     PicklerObject *pickler = _Pickler_New();
7223 
7224     if (pickler == NULL)
7225         return NULL;
7226 
7227     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7228         goto error;
7229 
7230     if (_Pickler_SetOutputStream(pickler, file) < 0)
7231         goto error;
7232 
7233     if (dump(pickler, obj) < 0)
7234         goto error;
7235 
7236     if (_Pickler_FlushToFile(pickler) < 0)
7237         goto error;
7238 
7239     Py_DECREF(pickler);
7240     Py_RETURN_NONE;
7241 
7242   error:
7243     Py_XDECREF(pickler);
7244     return NULL;
7245 }
7246 
7247 /*[clinic input]
7248 
7249 _pickle.dumps
7250 
7251   obj: object
7252   protocol: object = NULL
7253   *
7254   fix_imports: bool = True
7255 
7256 Return the pickled representation of the object as a bytes object.
7257 
7258 The optional *protocol* argument tells the pickler to use the given
7259 protocol; supported protocols are 0, 1, 2, 3 and 4.  The default
7260 protocol is 3; a backward-incompatible protocol designed for Python 3.
7261 
7262 Specifying a negative protocol version selects the highest protocol
7263 version supported.  The higher the protocol used, the more recent the
7264 version of Python needed to read the pickle produced.
7265 
7266 If *fix_imports* is True and *protocol* is less than 3, pickle will
7267 try to map the new Python 3 names to the old module names used in
7268 Python 2, so that the pickle data stream is readable with Python 2.
7269 [clinic start generated code]*/
7270 
7271 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports)7272 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7273                    int fix_imports)
7274 /*[clinic end generated code: output=d75d5cda456fd261 input=293dbeda181580b7]*/
7275 {
7276     PyObject *result;
7277     PicklerObject *pickler = _Pickler_New();
7278 
7279     if (pickler == NULL)
7280         return NULL;
7281 
7282     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7283         goto error;
7284 
7285     if (dump(pickler, obj) < 0)
7286         goto error;
7287 
7288     result = _Pickler_GetString(pickler);
7289     Py_DECREF(pickler);
7290     return result;
7291 
7292   error:
7293     Py_XDECREF(pickler);
7294     return NULL;
7295 }
7296 
7297 /*[clinic input]
7298 
7299 _pickle.load
7300 
7301   file: object
7302   *
7303   fix_imports: bool = True
7304   encoding: str = 'ASCII'
7305   errors: str = 'strict'
7306 
7307 Read and return an object from the pickle data stored in a file.
7308 
7309 This is equivalent to ``Unpickler(file).load()``, but may be more
7310 efficient.
7311 
7312 The protocol version of the pickle is detected automatically, so no
7313 protocol argument is needed.  Bytes past the pickled object's
7314 representation are ignored.
7315 
7316 The argument *file* must have two methods, a read() method that takes
7317 an integer argument, and a readline() method that requires no
7318 arguments.  Both methods should return bytes.  Thus *file* can be a
7319 binary file object opened for reading, an io.BytesIO object, or any
7320 other custom object that meets this interface.
7321 
7322 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7323 which are used to control compatibility support for pickle stream
7324 generated by Python 2.  If *fix_imports* is True, pickle will try to
7325 map the old Python 2 names to the new names used in Python 3.  The
7326 *encoding* and *errors* tell pickle how to decode 8-bit string
7327 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7328 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7329 string instances as bytes objects.
7330 [clinic start generated code]*/
7331 
7332 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors)7333 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7334                   const char *encoding, const char *errors)
7335 /*[clinic end generated code: output=69e298160285199e input=01b44dd3fc07afa7]*/
7336 {
7337     PyObject *result;
7338     UnpicklerObject *unpickler = _Unpickler_New();
7339 
7340     if (unpickler == NULL)
7341         return NULL;
7342 
7343     if (_Unpickler_SetInputStream(unpickler, file) < 0)
7344         goto error;
7345 
7346     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7347         goto error;
7348 
7349     unpickler->fix_imports = fix_imports;
7350 
7351     result = load(unpickler);
7352     Py_DECREF(unpickler);
7353     return result;
7354 
7355   error:
7356     Py_XDECREF(unpickler);
7357     return NULL;
7358 }
7359 
7360 /*[clinic input]
7361 
7362 _pickle.loads
7363 
7364   data: object
7365   *
7366   fix_imports: bool = True
7367   encoding: str = 'ASCII'
7368   errors: str = 'strict'
7369 
7370 Read and return an object from the given pickle data.
7371 
7372 The protocol version of the pickle is detected automatically, so no
7373 protocol argument is needed.  Bytes past the pickled object's
7374 representation are ignored.
7375 
7376 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7377 which are used to control compatibility support for pickle stream
7378 generated by Python 2.  If *fix_imports* is True, pickle will try to
7379 map the old Python 2 names to the new names used in Python 3.  The
7380 *encoding* and *errors* tell pickle how to decode 8-bit string
7381 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7382 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7383 string instances as bytes objects.
7384 [clinic start generated code]*/
7385 
7386 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors)7387 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7388                    const char *encoding, const char *errors)
7389 /*[clinic end generated code: output=1e7cb2343f2c440f input=70605948a719feb9]*/
7390 {
7391     PyObject *result;
7392     UnpicklerObject *unpickler = _Unpickler_New();
7393 
7394     if (unpickler == NULL)
7395         return NULL;
7396 
7397     if (_Unpickler_SetStringInput(unpickler, data) < 0)
7398         goto error;
7399 
7400     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7401         goto error;
7402 
7403     unpickler->fix_imports = fix_imports;
7404 
7405     result = load(unpickler);
7406     Py_DECREF(unpickler);
7407     return result;
7408 
7409   error:
7410     Py_XDECREF(unpickler);
7411     return NULL;
7412 }
7413 
7414 static struct PyMethodDef pickle_methods[] = {
7415     _PICKLE_DUMP_METHODDEF
7416     _PICKLE_DUMPS_METHODDEF
7417     _PICKLE_LOAD_METHODDEF
7418     _PICKLE_LOADS_METHODDEF
7419     {NULL, NULL} /* sentinel */
7420 };
7421 
7422 static int
pickle_clear(PyObject * m)7423 pickle_clear(PyObject *m)
7424 {
7425     _Pickle_ClearState(_Pickle_GetState(m));
7426     return 0;
7427 }
7428 
7429 static void
pickle_free(PyObject * m)7430 pickle_free(PyObject *m)
7431 {
7432     _Pickle_ClearState(_Pickle_GetState(m));
7433 }
7434 
7435 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7436 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7437 {
7438     PickleState *st = _Pickle_GetState(m);
7439     Py_VISIT(st->PickleError);
7440     Py_VISIT(st->PicklingError);
7441     Py_VISIT(st->UnpicklingError);
7442     Py_VISIT(st->dispatch_table);
7443     Py_VISIT(st->extension_registry);
7444     Py_VISIT(st->extension_cache);
7445     Py_VISIT(st->inverted_registry);
7446     Py_VISIT(st->name_mapping_2to3);
7447     Py_VISIT(st->import_mapping_2to3);
7448     Py_VISIT(st->name_mapping_3to2);
7449     Py_VISIT(st->import_mapping_3to2);
7450     Py_VISIT(st->codecs_encode);
7451     Py_VISIT(st->getattr);
7452     Py_VISIT(st->partial);
7453     return 0;
7454 }
7455 
7456 static struct PyModuleDef _picklemodule = {
7457     PyModuleDef_HEAD_INIT,
7458     "_pickle",            /* m_name */
7459     pickle_module_doc,    /* m_doc */
7460     sizeof(PickleState),  /* m_size */
7461     pickle_methods,       /* m_methods */
7462     NULL,                 /* m_reload */
7463     pickle_traverse,      /* m_traverse */
7464     pickle_clear,         /* m_clear */
7465     (freefunc)pickle_free /* m_free */
7466 };
7467 
7468 PyMODINIT_FUNC
PyInit__pickle(void)7469 PyInit__pickle(void)
7470 {
7471     PyObject *m;
7472     PickleState *st;
7473 
7474     m = PyState_FindModule(&_picklemodule);
7475     if (m) {
7476         Py_INCREF(m);
7477         return m;
7478     }
7479 
7480     if (PyType_Ready(&Unpickler_Type) < 0)
7481         return NULL;
7482     if (PyType_Ready(&Pickler_Type) < 0)
7483         return NULL;
7484     if (PyType_Ready(&Pdata_Type) < 0)
7485         return NULL;
7486     if (PyType_Ready(&PicklerMemoProxyType) < 0)
7487         return NULL;
7488     if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7489         return NULL;
7490 
7491     /* Create the module and add the functions. */
7492     m = PyModule_Create(&_picklemodule);
7493     if (m == NULL)
7494         return NULL;
7495 
7496     Py_INCREF(&Pickler_Type);
7497     if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
7498         return NULL;
7499     Py_INCREF(&Unpickler_Type);
7500     if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
7501         return NULL;
7502 
7503     st = _Pickle_GetState(m);
7504 
7505     /* Initialize the exceptions. */
7506     st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7507     if (st->PickleError == NULL)
7508         return NULL;
7509     st->PicklingError = \
7510         PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7511     if (st->PicklingError == NULL)
7512         return NULL;
7513     st->UnpicklingError = \
7514         PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7515     if (st->UnpicklingError == NULL)
7516         return NULL;
7517 
7518     Py_INCREF(st->PickleError);
7519     if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
7520         return NULL;
7521     Py_INCREF(st->PicklingError);
7522     if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
7523         return NULL;
7524     Py_INCREF(st->UnpicklingError);
7525     if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
7526         return NULL;
7527 
7528     if (_Pickle_InitState(st) < 0)
7529         return NULL;
7530 
7531     return m;
7532 }
7533