1 /* pickle accelerator C extensor: _pickle module.
2  *
3  * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4  * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5  * platforms. */
6 
7 #if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8 #  error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
9 #endif
10 
11 #include "Python.h"
12 #include "structmember.h"
13 
14 PyDoc_STRVAR(pickle_module_doc,
15 "Optimized C implementation for the Python pickle module.");
16 
17 /*[clinic input]
18 module _pickle
19 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
20 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
21 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
22 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
23 [clinic start generated code]*/
24 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
25 
26 /* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
27    Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
28    already includes it. */
29 enum {
30     HIGHEST_PROTOCOL = 5,
31     DEFAULT_PROTOCOL = 4
32 };
33 
34 /* Pickle opcodes. These must be kept updated with pickle.py.
35    Extensive docs are in pickletools.py. */
36 enum opcode {
37     MARK            = '(',
38     STOP            = '.',
39     POP             = '0',
40     POP_MARK        = '1',
41     DUP             = '2',
42     FLOAT           = 'F',
43     INT             = 'I',
44     BININT          = 'J',
45     BININT1         = 'K',
46     LONG            = 'L',
47     BININT2         = 'M',
48     NONE            = 'N',
49     PERSID          = 'P',
50     BINPERSID       = 'Q',
51     REDUCE          = 'R',
52     STRING          = 'S',
53     BINSTRING       = 'T',
54     SHORT_BINSTRING = 'U',
55     UNICODE         = 'V',
56     BINUNICODE      = 'X',
57     APPEND          = 'a',
58     BUILD           = 'b',
59     GLOBAL          = 'c',
60     DICT            = 'd',
61     EMPTY_DICT      = '}',
62     APPENDS         = 'e',
63     GET             = 'g',
64     BINGET          = 'h',
65     INST            = 'i',
66     LONG_BINGET     = 'j',
67     LIST            = 'l',
68     EMPTY_LIST      = ']',
69     OBJ             = 'o',
70     PUT             = 'p',
71     BINPUT          = 'q',
72     LONG_BINPUT     = 'r',
73     SETITEM         = 's',
74     TUPLE           = 't',
75     EMPTY_TUPLE     = ')',
76     SETITEMS        = 'u',
77     BINFLOAT        = 'G',
78 
79     /* Protocol 2. */
80     PROTO       = '\x80',
81     NEWOBJ      = '\x81',
82     EXT1        = '\x82',
83     EXT2        = '\x83',
84     EXT4        = '\x84',
85     TUPLE1      = '\x85',
86     TUPLE2      = '\x86',
87     TUPLE3      = '\x87',
88     NEWTRUE     = '\x88',
89     NEWFALSE    = '\x89',
90     LONG1       = '\x8a',
91     LONG4       = '\x8b',
92 
93     /* Protocol 3 (Python 3.x) */
94     BINBYTES       = 'B',
95     SHORT_BINBYTES = 'C',
96 
97     /* Protocol 4 */
98     SHORT_BINUNICODE = '\x8c',
99     BINUNICODE8      = '\x8d',
100     BINBYTES8        = '\x8e',
101     EMPTY_SET        = '\x8f',
102     ADDITEMS         = '\x90',
103     FROZENSET        = '\x91',
104     NEWOBJ_EX        = '\x92',
105     STACK_GLOBAL     = '\x93',
106     MEMOIZE          = '\x94',
107     FRAME            = '\x95',
108 
109     /* Protocol 5 */
110     BYTEARRAY8       = '\x96',
111     NEXT_BUFFER      = '\x97',
112     READONLY_BUFFER  = '\x98'
113 };
114 
115 enum {
116    /* Keep in synch with pickle.Pickler._BATCHSIZE.  This is how many elements
117       batch_list/dict() pumps out before doing APPENDS/SETITEMS.  Nothing will
118       break if this gets out of synch with pickle.py, but it's unclear that would
119       help anything either. */
120     BATCHSIZE = 1000,
121 
122     /* Nesting limit until Pickler, when running in "fast mode", starts
123        checking for self-referential data-structures. */
124     FAST_NESTING_LIMIT = 50,
125 
126     /* Initial size of the write buffer of Pickler. */
127     WRITE_BUF_SIZE = 4096,
128 
129     /* Prefetch size when unpickling (disabled on unpeekable streams) */
130     PREFETCH = 8192 * 16,
131 
132     FRAME_SIZE_MIN = 4,
133     FRAME_SIZE_TARGET = 64 * 1024,
134     FRAME_HEADER_SIZE = 9
135 };
136 
137 /*************************************************************************/
138 
139 /* State of the pickle module, per PEP 3121. */
140 typedef struct {
141     /* Exception classes for pickle. */
142     PyObject *PickleError;
143     PyObject *PicklingError;
144     PyObject *UnpicklingError;
145 
146     /* copyreg.dispatch_table, {type_object: pickling_function} */
147     PyObject *dispatch_table;
148 
149     /* For the extension opcodes EXT1, EXT2 and EXT4. */
150 
151     /* copyreg._extension_registry, {(module_name, function_name): code} */
152     PyObject *extension_registry;
153     /* copyreg._extension_cache, {code: object} */
154     PyObject *extension_cache;
155     /* copyreg._inverted_registry, {code: (module_name, function_name)} */
156     PyObject *inverted_registry;
157 
158     /* Import mappings for compatibility with Python 2.x */
159 
160     /* _compat_pickle.NAME_MAPPING,
161        {(oldmodule, oldname): (newmodule, newname)} */
162     PyObject *name_mapping_2to3;
163     /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
164     PyObject *import_mapping_2to3;
165     /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
166     PyObject *name_mapping_3to2;
167     PyObject *import_mapping_3to2;
168 
169     /* codecs.encode, used for saving bytes in older protocols */
170     PyObject *codecs_encode;
171     /* builtins.getattr, used for saving nested names with protocol < 4 */
172     PyObject *getattr;
173     /* functools.partial, used for implementing __newobj_ex__ with protocols
174        2 and 3 */
175     PyObject *partial;
176 } PickleState;
177 
178 /* Forward declaration of the _pickle module definition. */
179 static struct PyModuleDef _picklemodule;
180 
181 /* Given a module object, get its per-module state. */
182 static PickleState *
_Pickle_GetState(PyObject * module)183 _Pickle_GetState(PyObject *module)
184 {
185     return (PickleState *)PyModule_GetState(module);
186 }
187 
188 /* Find the module instance imported in the currently running sub-interpreter
189    and get its state. */
190 static PickleState *
_Pickle_GetGlobalState(void)191 _Pickle_GetGlobalState(void)
192 {
193     return _Pickle_GetState(PyState_FindModule(&_picklemodule));
194 }
195 
196 /* Clear the given pickle module state. */
197 static void
_Pickle_ClearState(PickleState * st)198 _Pickle_ClearState(PickleState *st)
199 {
200     Py_CLEAR(st->PickleError);
201     Py_CLEAR(st->PicklingError);
202     Py_CLEAR(st->UnpicklingError);
203     Py_CLEAR(st->dispatch_table);
204     Py_CLEAR(st->extension_registry);
205     Py_CLEAR(st->extension_cache);
206     Py_CLEAR(st->inverted_registry);
207     Py_CLEAR(st->name_mapping_2to3);
208     Py_CLEAR(st->import_mapping_2to3);
209     Py_CLEAR(st->name_mapping_3to2);
210     Py_CLEAR(st->import_mapping_3to2);
211     Py_CLEAR(st->codecs_encode);
212     Py_CLEAR(st->getattr);
213     Py_CLEAR(st->partial);
214 }
215 
216 /* Initialize the given pickle module state. */
217 static int
_Pickle_InitState(PickleState * st)218 _Pickle_InitState(PickleState *st)
219 {
220     PyObject *copyreg = NULL;
221     PyObject *compat_pickle = NULL;
222     PyObject *codecs = NULL;
223     PyObject *functools = NULL;
224     _Py_IDENTIFIER(getattr);
225 
226     st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
227     if (st->getattr == NULL)
228         goto error;
229 
230     copyreg = PyImport_ImportModule("copyreg");
231     if (!copyreg)
232         goto error;
233     st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
234     if (!st->dispatch_table)
235         goto error;
236     if (!PyDict_CheckExact(st->dispatch_table)) {
237         PyErr_Format(PyExc_RuntimeError,
238                      "copyreg.dispatch_table should be a dict, not %.200s",
239                      Py_TYPE(st->dispatch_table)->tp_name);
240         goto error;
241     }
242     st->extension_registry = \
243         PyObject_GetAttrString(copyreg, "_extension_registry");
244     if (!st->extension_registry)
245         goto error;
246     if (!PyDict_CheckExact(st->extension_registry)) {
247         PyErr_Format(PyExc_RuntimeError,
248                      "copyreg._extension_registry should be a dict, "
249                      "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
250         goto error;
251     }
252     st->inverted_registry = \
253         PyObject_GetAttrString(copyreg, "_inverted_registry");
254     if (!st->inverted_registry)
255         goto error;
256     if (!PyDict_CheckExact(st->inverted_registry)) {
257         PyErr_Format(PyExc_RuntimeError,
258                      "copyreg._inverted_registry should be a dict, "
259                      "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
260         goto error;
261     }
262     st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
263     if (!st->extension_cache)
264         goto error;
265     if (!PyDict_CheckExact(st->extension_cache)) {
266         PyErr_Format(PyExc_RuntimeError,
267                      "copyreg._extension_cache should be a dict, "
268                      "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
269         goto error;
270     }
271     Py_CLEAR(copyreg);
272 
273     /* Load the 2.x -> 3.x stdlib module mapping tables */
274     compat_pickle = PyImport_ImportModule("_compat_pickle");
275     if (!compat_pickle)
276         goto error;
277     st->name_mapping_2to3 = \
278         PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
279     if (!st->name_mapping_2to3)
280         goto error;
281     if (!PyDict_CheckExact(st->name_mapping_2to3)) {
282         PyErr_Format(PyExc_RuntimeError,
283                      "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
284                      Py_TYPE(st->name_mapping_2to3)->tp_name);
285         goto error;
286     }
287     st->import_mapping_2to3 = \
288         PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
289     if (!st->import_mapping_2to3)
290         goto error;
291     if (!PyDict_CheckExact(st->import_mapping_2to3)) {
292         PyErr_Format(PyExc_RuntimeError,
293                      "_compat_pickle.IMPORT_MAPPING should be a dict, "
294                      "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
295         goto error;
296     }
297     /* ... and the 3.x -> 2.x mapping tables */
298     st->name_mapping_3to2 = \
299         PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
300     if (!st->name_mapping_3to2)
301         goto error;
302     if (!PyDict_CheckExact(st->name_mapping_3to2)) {
303         PyErr_Format(PyExc_RuntimeError,
304                      "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
305                      "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
306         goto error;
307     }
308     st->import_mapping_3to2 = \
309         PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
310     if (!st->import_mapping_3to2)
311         goto error;
312     if (!PyDict_CheckExact(st->import_mapping_3to2)) {
313         PyErr_Format(PyExc_RuntimeError,
314                      "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
315                      "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
316         goto error;
317     }
318     Py_CLEAR(compat_pickle);
319 
320     codecs = PyImport_ImportModule("codecs");
321     if (codecs == NULL)
322         goto error;
323     st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
324     if (st->codecs_encode == NULL) {
325         goto error;
326     }
327     if (!PyCallable_Check(st->codecs_encode)) {
328         PyErr_Format(PyExc_RuntimeError,
329                      "codecs.encode should be a callable, not %.200s",
330                      Py_TYPE(st->codecs_encode)->tp_name);
331         goto error;
332     }
333     Py_CLEAR(codecs);
334 
335     functools = PyImport_ImportModule("functools");
336     if (!functools)
337         goto error;
338     st->partial = PyObject_GetAttrString(functools, "partial");
339     if (!st->partial)
340         goto error;
341     Py_CLEAR(functools);
342 
343     return 0;
344 
345   error:
346     Py_CLEAR(copyreg);
347     Py_CLEAR(compat_pickle);
348     Py_CLEAR(codecs);
349     Py_CLEAR(functools);
350     _Pickle_ClearState(st);
351     return -1;
352 }
353 
354 /* Helper for calling a function with a single argument quickly.
355 
356    This function steals the reference of the given argument. */
357 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)358 _Pickle_FastCall(PyObject *func, PyObject *obj)
359 {
360     PyObject *result;
361 
362     result = PyObject_CallFunctionObjArgs(func, obj, NULL);
363     Py_DECREF(obj);
364     return result;
365 }
366 
367 /*************************************************************************/
368 
369 /* Retrieve and deconstruct a method for avoiding a reference cycle
370    (pickler -> bound method of pickler -> pickler) */
371 static int
init_method_ref(PyObject * self,_Py_Identifier * name,PyObject ** method_func,PyObject ** method_self)372 init_method_ref(PyObject *self, _Py_Identifier *name,
373                 PyObject **method_func, PyObject **method_self)
374 {
375     PyObject *func, *func2;
376     int ret;
377 
378     /* *method_func and *method_self should be consistent.  All refcount decrements
379        should be occurred after setting *method_self and *method_func. */
380     ret = _PyObject_LookupAttrId(self, name, &func);
381     if (func == NULL) {
382         *method_self = NULL;
383         Py_CLEAR(*method_func);
384         return ret;
385     }
386 
387     if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
388         /* Deconstruct a bound Python method */
389         func2 = PyMethod_GET_FUNCTION(func);
390         Py_INCREF(func2);
391         *method_self = self; /* borrowed */
392         Py_XSETREF(*method_func, func2);
393         Py_DECREF(func);
394         return 0;
395     }
396     else {
397         *method_self = NULL;
398         Py_XSETREF(*method_func, func);
399         return 0;
400     }
401 }
402 
403 /* Bind a method if it was deconstructed */
404 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)405 reconstruct_method(PyObject *func, PyObject *self)
406 {
407     if (self) {
408         return PyMethod_New(func, self);
409     }
410     else {
411         Py_INCREF(func);
412         return func;
413     }
414 }
415 
416 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)417 call_method(PyObject *func, PyObject *self, PyObject *obj)
418 {
419     if (self) {
420         return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
421     }
422     else {
423         return PyObject_CallFunctionObjArgs(func, obj, NULL);
424     }
425 }
426 
427 /*************************************************************************/
428 
429 /* Internal data type used as the unpickling stack. */
430 typedef struct {
431     PyObject_VAR_HEAD
432     PyObject **data;
433     int mark_set;          /* is MARK set? */
434     Py_ssize_t fence;      /* position of top MARK or 0 */
435     Py_ssize_t allocated;  /* number of slots in data allocated */
436 } Pdata;
437 
438 static void
Pdata_dealloc(Pdata * self)439 Pdata_dealloc(Pdata *self)
440 {
441     Py_ssize_t i = Py_SIZE(self);
442     while (--i >= 0) {
443         Py_DECREF(self->data[i]);
444     }
445     PyMem_FREE(self->data);
446     PyObject_Del(self);
447 }
448 
449 static PyTypeObject Pdata_Type = {
450     PyVarObject_HEAD_INIT(NULL, 0)
451     "_pickle.Pdata",              /*tp_name*/
452     sizeof(Pdata),                /*tp_basicsize*/
453     sizeof(PyObject *),           /*tp_itemsize*/
454     (destructor)Pdata_dealloc,    /*tp_dealloc*/
455 };
456 
457 static PyObject *
Pdata_New(void)458 Pdata_New(void)
459 {
460     Pdata *self;
461 
462     if (!(self = PyObject_New(Pdata, &Pdata_Type)))
463         return NULL;
464     Py_SIZE(self) = 0;
465     self->mark_set = 0;
466     self->fence = 0;
467     self->allocated = 8;
468     self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
469     if (self->data)
470         return (PyObject *)self;
471     Py_DECREF(self);
472     return PyErr_NoMemory();
473 }
474 
475 
476 /* Retain only the initial clearto items.  If clearto >= the current
477  * number of items, this is a (non-erroneous) NOP.
478  */
479 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)480 Pdata_clear(Pdata *self, Py_ssize_t clearto)
481 {
482     Py_ssize_t i = Py_SIZE(self);
483 
484     assert(clearto >= self->fence);
485     if (clearto >= i)
486         return 0;
487 
488     while (--i >= clearto) {
489         Py_CLEAR(self->data[i]);
490     }
491     Py_SIZE(self) = clearto;
492     return 0;
493 }
494 
495 static int
Pdata_grow(Pdata * self)496 Pdata_grow(Pdata *self)
497 {
498     PyObject **data = self->data;
499     size_t allocated = (size_t)self->allocated;
500     size_t new_allocated;
501 
502     new_allocated = (allocated >> 3) + 6;
503     /* check for integer overflow */
504     if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
505         goto nomemory;
506     new_allocated += allocated;
507     PyMem_RESIZE(data, PyObject *, new_allocated);
508     if (data == NULL)
509         goto nomemory;
510 
511     self->data = data;
512     self->allocated = (Py_ssize_t)new_allocated;
513     return 0;
514 
515   nomemory:
516     PyErr_NoMemory();
517     return -1;
518 }
519 
520 static int
Pdata_stack_underflow(Pdata * self)521 Pdata_stack_underflow(Pdata *self)
522 {
523     PickleState *st = _Pickle_GetGlobalState();
524     PyErr_SetString(st->UnpicklingError,
525                     self->mark_set ?
526                     "unexpected MARK found" :
527                     "unpickling stack underflow");
528     return -1;
529 }
530 
531 /* D is a Pdata*.  Pop the topmost element and store it into V, which
532  * must be an lvalue holding PyObject*.  On stack underflow, UnpicklingError
533  * is raised and V is set to NULL.
534  */
535 static PyObject *
Pdata_pop(Pdata * self)536 Pdata_pop(Pdata *self)
537 {
538     if (Py_SIZE(self) <= self->fence) {
539         Pdata_stack_underflow(self);
540         return NULL;
541     }
542     return self->data[--Py_SIZE(self)];
543 }
544 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
545 
546 static int
Pdata_push(Pdata * self,PyObject * obj)547 Pdata_push(Pdata *self, PyObject *obj)
548 {
549     if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
550         return -1;
551     }
552     self->data[Py_SIZE(self)++] = obj;
553     return 0;
554 }
555 
556 /* Push an object on stack, transferring its ownership to the stack. */
557 #define PDATA_PUSH(D, O, ER) do {                               \
558         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
559 
560 /* Push an object on stack, adding a new reference to the object. */
561 #define PDATA_APPEND(D, O, ER) do {                             \
562         Py_INCREF((O));                                         \
563         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
564 
565 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)566 Pdata_poptuple(Pdata *self, Py_ssize_t start)
567 {
568     PyObject *tuple;
569     Py_ssize_t len, i, j;
570 
571     if (start < self->fence) {
572         Pdata_stack_underflow(self);
573         return NULL;
574     }
575     len = Py_SIZE(self) - start;
576     tuple = PyTuple_New(len);
577     if (tuple == NULL)
578         return NULL;
579     for (i = start, j = 0; j < len; i++, j++)
580         PyTuple_SET_ITEM(tuple, j, self->data[i]);
581 
582     Py_SIZE(self) = start;
583     return tuple;
584 }
585 
586 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)587 Pdata_poplist(Pdata *self, Py_ssize_t start)
588 {
589     PyObject *list;
590     Py_ssize_t len, i, j;
591 
592     len = Py_SIZE(self) - start;
593     list = PyList_New(len);
594     if (list == NULL)
595         return NULL;
596     for (i = start, j = 0; j < len; i++, j++)
597         PyList_SET_ITEM(list, j, self->data[i]);
598 
599     Py_SIZE(self) = start;
600     return list;
601 }
602 
603 typedef struct {
604     PyObject *me_key;
605     Py_ssize_t me_value;
606 } PyMemoEntry;
607 
608 typedef struct {
609     size_t mt_mask;
610     size_t mt_used;
611     size_t mt_allocated;
612     PyMemoEntry *mt_table;
613 } PyMemoTable;
614 
615 typedef struct PicklerObject {
616     PyObject_HEAD
617     PyMemoTable *memo;          /* Memo table, keep track of the seen
618                                    objects to support self-referential objects
619                                    pickling. */
620     PyObject *pers_func;        /* persistent_id() method, can be NULL */
621     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
622                                    is an unbound method, NULL otherwise */
623     PyObject *dispatch_table;   /* private dispatch_table, can be NULL */
624     PyObject *reducer_override; /* hook for invoking user-defined callbacks
625                                    instead of save_global when pickling
626                                    functions and classes*/
627 
628     PyObject *write;            /* write() method of the output stream. */
629     PyObject *output_buffer;    /* Write into a local bytearray buffer before
630                                    flushing to the stream. */
631     Py_ssize_t output_len;      /* Length of output_buffer. */
632     Py_ssize_t max_output_len;  /* Allocation size of output_buffer. */
633     int proto;                  /* Pickle protocol number, >= 0 */
634     int bin;                    /* Boolean, true if proto > 0 */
635     int framing;                /* True when framing is enabled, proto >= 4 */
636     Py_ssize_t frame_start;     /* Position in output_buffer where the
637                                    current frame begins. -1 if there
638                                    is no frame currently open. */
639 
640     Py_ssize_t buf_size;        /* Size of the current buffered pickle data */
641     int fast;                   /* Enable fast mode if set to a true value.
642                                    The fast mode disable the usage of memo,
643                                    therefore speeding the pickling process by
644                                    not generating superfluous PUT opcodes. It
645                                    should not be used if with self-referential
646                                    objects. */
647     int fast_nesting;
648     int fix_imports;            /* Indicate whether Pickler should fix
649                                    the name of globals for Python 2.x. */
650     PyObject *fast_memo;
651     PyObject *buffer_callback;  /* Callback for out-of-band buffers, or NULL */
652 } PicklerObject;
653 
654 typedef struct UnpicklerObject {
655     PyObject_HEAD
656     Pdata *stack;               /* Pickle data stack, store unpickled objects. */
657 
658     /* The unpickler memo is just an array of PyObject *s. Using a dict
659        is unnecessary, since the keys are contiguous ints. */
660     PyObject **memo;
661     size_t memo_size;       /* Capacity of the memo array */
662     size_t memo_len;        /* Number of objects in the memo */
663 
664     PyObject *pers_func;        /* persistent_load() method, can be NULL. */
665     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
666                                    is an unbound method, NULL otherwise */
667 
668     Py_buffer buffer;
669     char *input_buffer;
670     char *input_line;
671     Py_ssize_t input_len;
672     Py_ssize_t next_read_idx;
673     Py_ssize_t prefetched_idx;  /* index of first prefetched byte */
674 
675     PyObject *read;             /* read() method of the input stream. */
676     PyObject *readinto;         /* readinto() method of the input stream. */
677     PyObject *readline;         /* readline() method of the input stream. */
678     PyObject *peek;             /* peek() method of the input stream, or NULL */
679     PyObject *buffers;          /* iterable of out-of-band buffers, or NULL */
680 
681     char *encoding;             /* Name of the encoding to be used for
682                                    decoding strings pickled using Python
683                                    2.x. The default value is "ASCII" */
684     char *errors;               /* Name of errors handling scheme to used when
685                                    decoding strings. The default value is
686                                    "strict". */
687     Py_ssize_t *marks;          /* Mark stack, used for unpickling container
688                                    objects. */
689     Py_ssize_t num_marks;       /* Number of marks in the mark stack. */
690     Py_ssize_t marks_size;      /* Current allocated size of the mark stack. */
691     int proto;                  /* Protocol of the pickle loaded. */
692     int fix_imports;            /* Indicate whether Unpickler should fix
693                                    the name of globals pickled by Python 2.x. */
694 } UnpicklerObject;
695 
696 typedef struct {
697     PyObject_HEAD
698     PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
699 }  PicklerMemoProxyObject;
700 
701 typedef struct {
702     PyObject_HEAD
703     UnpicklerObject *unpickler;
704 } UnpicklerMemoProxyObject;
705 
706 /* Forward declarations */
707 static int save(PicklerObject *, PyObject *, int);
708 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
709 static PyTypeObject Pickler_Type;
710 static PyTypeObject Unpickler_Type;
711 
712 #include "clinic/_pickle.c.h"
713 
714 /*************************************************************************
715  A custom hashtable mapping void* to Python ints. This is used by the pickler
716  for memoization. Using a custom hashtable rather than PyDict allows us to skip
717  a bunch of unnecessary object creation. This makes a huge performance
718  difference. */
719 
720 #define MT_MINSIZE 8
721 #define PERTURB_SHIFT 5
722 
723 
724 static PyMemoTable *
PyMemoTable_New(void)725 PyMemoTable_New(void)
726 {
727     PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
728     if (memo == NULL) {
729         PyErr_NoMemory();
730         return NULL;
731     }
732 
733     memo->mt_used = 0;
734     memo->mt_allocated = MT_MINSIZE;
735     memo->mt_mask = MT_MINSIZE - 1;
736     memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
737     if (memo->mt_table == NULL) {
738         PyMem_FREE(memo);
739         PyErr_NoMemory();
740         return NULL;
741     }
742     memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
743 
744     return memo;
745 }
746 
747 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)748 PyMemoTable_Copy(PyMemoTable *self)
749 {
750     PyMemoTable *new = PyMemoTable_New();
751     if (new == NULL)
752         return NULL;
753 
754     new->mt_used = self->mt_used;
755     new->mt_allocated = self->mt_allocated;
756     new->mt_mask = self->mt_mask;
757     /* The table we get from _New() is probably smaller than we wanted.
758        Free it and allocate one that's the right size. */
759     PyMem_FREE(new->mt_table);
760     new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
761     if (new->mt_table == NULL) {
762         PyMem_FREE(new);
763         PyErr_NoMemory();
764         return NULL;
765     }
766     for (size_t i = 0; i < self->mt_allocated; i++) {
767         Py_XINCREF(self->mt_table[i].me_key);
768     }
769     memcpy(new->mt_table, self->mt_table,
770            sizeof(PyMemoEntry) * self->mt_allocated);
771 
772     return new;
773 }
774 
775 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)776 PyMemoTable_Size(PyMemoTable *self)
777 {
778     return self->mt_used;
779 }
780 
781 static int
PyMemoTable_Clear(PyMemoTable * self)782 PyMemoTable_Clear(PyMemoTable *self)
783 {
784     Py_ssize_t i = self->mt_allocated;
785 
786     while (--i >= 0) {
787         Py_XDECREF(self->mt_table[i].me_key);
788     }
789     self->mt_used = 0;
790     memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
791     return 0;
792 }
793 
794 static void
PyMemoTable_Del(PyMemoTable * self)795 PyMemoTable_Del(PyMemoTable *self)
796 {
797     if (self == NULL)
798         return;
799     PyMemoTable_Clear(self);
800 
801     PyMem_FREE(self->mt_table);
802     PyMem_FREE(self);
803 }
804 
805 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
806    can be considerably simpler than dictobject.c's lookdict(). */
807 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)808 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
809 {
810     size_t i;
811     size_t perturb;
812     size_t mask = self->mt_mask;
813     PyMemoEntry *table = self->mt_table;
814     PyMemoEntry *entry;
815     Py_hash_t hash = (Py_hash_t)key >> 3;
816 
817     i = hash & mask;
818     entry = &table[i];
819     if (entry->me_key == NULL || entry->me_key == key)
820         return entry;
821 
822     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
823         i = (i << 2) + i + perturb + 1;
824         entry = &table[i & mask];
825         if (entry->me_key == NULL || entry->me_key == key)
826             return entry;
827     }
828     Py_UNREACHABLE();
829 }
830 
831 /* Returns -1 on failure, 0 on success. */
832 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)833 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
834 {
835     PyMemoEntry *oldtable = NULL;
836     PyMemoEntry *oldentry, *newentry;
837     size_t new_size = MT_MINSIZE;
838     size_t to_process;
839 
840     assert(min_size > 0);
841 
842     if (min_size > PY_SSIZE_T_MAX) {
843         PyErr_NoMemory();
844         return -1;
845     }
846 
847     /* Find the smallest valid table size >= min_size. */
848     while (new_size < min_size) {
849         new_size <<= 1;
850     }
851     /* new_size needs to be a power of two. */
852     assert((new_size & (new_size - 1)) == 0);
853 
854     /* Allocate new table. */
855     oldtable = self->mt_table;
856     self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
857     if (self->mt_table == NULL) {
858         self->mt_table = oldtable;
859         PyErr_NoMemory();
860         return -1;
861     }
862     self->mt_allocated = new_size;
863     self->mt_mask = new_size - 1;
864     memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
865 
866     /* Copy entries from the old table. */
867     to_process = self->mt_used;
868     for (oldentry = oldtable; to_process > 0; oldentry++) {
869         if (oldentry->me_key != NULL) {
870             to_process--;
871             /* newentry is a pointer to a chunk of the new
872                mt_table, so we're setting the key:value pair
873                in-place. */
874             newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
875             newentry->me_key = oldentry->me_key;
876             newentry->me_value = oldentry->me_value;
877         }
878     }
879 
880     /* Deallocate the old table. */
881     PyMem_FREE(oldtable);
882     return 0;
883 }
884 
885 /* Returns NULL on failure, a pointer to the value otherwise. */
886 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)887 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
888 {
889     PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
890     if (entry->me_key == NULL)
891         return NULL;
892     return &entry->me_value;
893 }
894 
895 /* Returns -1 on failure, 0 on success. */
896 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)897 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
898 {
899     PyMemoEntry *entry;
900 
901     assert(key != NULL);
902 
903     entry = _PyMemoTable_Lookup(self, key);
904     if (entry->me_key != NULL) {
905         entry->me_value = value;
906         return 0;
907     }
908     Py_INCREF(key);
909     entry->me_key = key;
910     entry->me_value = value;
911     self->mt_used++;
912 
913     /* If we added a key, we can safely resize. Otherwise just return!
914      * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
915      *
916      * Quadrupling the size improves average table sparseness
917      * (reducing collisions) at the cost of some memory. It also halves
918      * the number of expensive resize operations in a growing memo table.
919      *
920      * Very large memo tables (over 50K items) use doubling instead.
921      * This may help applications with severe memory constraints.
922      */
923     if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
924         return 0;
925     }
926     // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
927     size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
928     return _PyMemoTable_ResizeTable(self, desired_size);
929 }
930 
931 #undef MT_MINSIZE
932 #undef PERTURB_SHIFT
933 
934 /*************************************************************************/
935 
936 
937 static int
_Pickler_ClearBuffer(PicklerObject * self)938 _Pickler_ClearBuffer(PicklerObject *self)
939 {
940     Py_XSETREF(self->output_buffer,
941               PyBytes_FromStringAndSize(NULL, self->max_output_len));
942     if (self->output_buffer == NULL)
943         return -1;
944     self->output_len = 0;
945     self->frame_start = -1;
946     return 0;
947 }
948 
949 static void
_write_size64(char * out,size_t value)950 _write_size64(char *out, size_t value)
951 {
952     size_t i;
953 
954     Py_BUILD_ASSERT(sizeof(size_t) <= 8);
955 
956     for (i = 0; i < sizeof(size_t); i++) {
957         out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
958     }
959     for (i = sizeof(size_t); i < 8; i++) {
960         out[i] = 0;
961     }
962 }
963 
964 static int
_Pickler_CommitFrame(PicklerObject * self)965 _Pickler_CommitFrame(PicklerObject *self)
966 {
967     size_t frame_len;
968     char *qdata;
969 
970     if (!self->framing || self->frame_start == -1)
971         return 0;
972     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
973     qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
974     if (frame_len >= FRAME_SIZE_MIN) {
975         qdata[0] = FRAME;
976         _write_size64(qdata + 1, frame_len);
977     }
978     else {
979         memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
980         self->output_len -= FRAME_HEADER_SIZE;
981     }
982     self->frame_start = -1;
983     return 0;
984 }
985 
986 static PyObject *
_Pickler_GetString(PicklerObject * self)987 _Pickler_GetString(PicklerObject *self)
988 {
989     PyObject *output_buffer = self->output_buffer;
990 
991     assert(self->output_buffer != NULL);
992 
993     if (_Pickler_CommitFrame(self))
994         return NULL;
995 
996     self->output_buffer = NULL;
997     /* Resize down to exact size */
998     if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
999         return NULL;
1000     return output_buffer;
1001 }
1002 
1003 static int
_Pickler_FlushToFile(PicklerObject * self)1004 _Pickler_FlushToFile(PicklerObject *self)
1005 {
1006     PyObject *output, *result;
1007 
1008     assert(self->write != NULL);
1009 
1010     /* This will commit the frame first */
1011     output = _Pickler_GetString(self);
1012     if (output == NULL)
1013         return -1;
1014 
1015     result = _Pickle_FastCall(self->write, output);
1016     Py_XDECREF(result);
1017     return (result == NULL) ? -1 : 0;
1018 }
1019 
1020 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1021 _Pickler_OpcodeBoundary(PicklerObject *self)
1022 {
1023     Py_ssize_t frame_len;
1024 
1025     if (!self->framing || self->frame_start == -1) {
1026         return 0;
1027     }
1028     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1029     if (frame_len >= FRAME_SIZE_TARGET) {
1030         if(_Pickler_CommitFrame(self)) {
1031             return -1;
1032         }
1033         /* Flush the content of the committed frame to the underlying
1034          * file and reuse the pickler buffer for the next frame so as
1035          * to limit memory usage when dumping large complex objects to
1036          * a file.
1037          *
1038          * self->write is NULL when called via dumps.
1039          */
1040         if (self->write != NULL) {
1041             if (_Pickler_FlushToFile(self) < 0) {
1042                 return -1;
1043             }
1044             if (_Pickler_ClearBuffer(self) < 0) {
1045                 return -1;
1046             }
1047         }
1048     }
1049     return 0;
1050 }
1051 
1052 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1053 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1054 {
1055     Py_ssize_t i, n, required;
1056     char *buffer;
1057     int need_new_frame;
1058 
1059     assert(s != NULL);
1060     need_new_frame = (self->framing && self->frame_start == -1);
1061 
1062     if (need_new_frame)
1063         n = data_len + FRAME_HEADER_SIZE;
1064     else
1065         n = data_len;
1066 
1067     required = self->output_len + n;
1068     if (required > self->max_output_len) {
1069         /* Make place in buffer for the pickle chunk */
1070         if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1071             PyErr_NoMemory();
1072             return -1;
1073         }
1074         self->max_output_len = (self->output_len + n) / 2 * 3;
1075         if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1076             return -1;
1077     }
1078     buffer = PyBytes_AS_STRING(self->output_buffer);
1079     if (need_new_frame) {
1080         /* Setup new frame */
1081         Py_ssize_t frame_start = self->output_len;
1082         self->frame_start = frame_start;
1083         for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1084             /* Write an invalid value, for debugging */
1085             buffer[frame_start + i] = 0xFE;
1086         }
1087         self->output_len += FRAME_HEADER_SIZE;
1088     }
1089     if (data_len < 8) {
1090         /* This is faster than memcpy when the string is short. */
1091         for (i = 0; i < data_len; i++) {
1092             buffer[self->output_len + i] = s[i];
1093         }
1094     }
1095     else {
1096         memcpy(buffer + self->output_len, s, data_len);
1097     }
1098     self->output_len += data_len;
1099     return data_len;
1100 }
1101 
1102 static PicklerObject *
_Pickler_New(void)1103 _Pickler_New(void)
1104 {
1105     PicklerObject *self;
1106 
1107     self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1108     if (self == NULL)
1109         return NULL;
1110 
1111     self->pers_func = NULL;
1112     self->dispatch_table = NULL;
1113     self->buffer_callback = NULL;
1114     self->write = NULL;
1115     self->proto = 0;
1116     self->bin = 0;
1117     self->framing = 0;
1118     self->frame_start = -1;
1119     self->fast = 0;
1120     self->fast_nesting = 0;
1121     self->fix_imports = 0;
1122     self->fast_memo = NULL;
1123     self->max_output_len = WRITE_BUF_SIZE;
1124     self->output_len = 0;
1125     self->reducer_override = NULL;
1126 
1127     self->memo = PyMemoTable_New();
1128     self->output_buffer = PyBytes_FromStringAndSize(NULL,
1129                                                     self->max_output_len);
1130 
1131     if (self->memo == NULL || self->output_buffer == NULL) {
1132         Py_DECREF(self);
1133         return NULL;
1134     }
1135 
1136     PyObject_GC_Track(self);
1137     return self;
1138 }
1139 
1140 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1141 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1142 {
1143     long proto;
1144 
1145     if (protocol == Py_None) {
1146         proto = DEFAULT_PROTOCOL;
1147     }
1148     else {
1149         proto = PyLong_AsLong(protocol);
1150         if (proto < 0) {
1151             if (proto == -1 && PyErr_Occurred())
1152                 return -1;
1153             proto = HIGHEST_PROTOCOL;
1154         }
1155         else if (proto > HIGHEST_PROTOCOL) {
1156             PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1157                          HIGHEST_PROTOCOL);
1158             return -1;
1159         }
1160     }
1161     self->proto = (int)proto;
1162     self->bin = proto > 0;
1163     self->fix_imports = fix_imports && proto < 3;
1164     return 0;
1165 }
1166 
1167 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1168    be called once on a freshly created Pickler. */
1169 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1170 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1171 {
1172     _Py_IDENTIFIER(write);
1173     assert(file != NULL);
1174     if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1175         return -1;
1176     }
1177     if (self->write == NULL) {
1178         PyErr_SetString(PyExc_TypeError,
1179                         "file must have a 'write' attribute");
1180         return -1;
1181     }
1182 
1183     return 0;
1184 }
1185 
1186 static int
_Pickler_SetBufferCallback(PicklerObject * self,PyObject * buffer_callback)1187 _Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1188 {
1189     if (buffer_callback == Py_None) {
1190         buffer_callback = NULL;
1191     }
1192     if (buffer_callback != NULL && self->proto < 5) {
1193         PyErr_SetString(PyExc_ValueError,
1194                         "buffer_callback needs protocol >= 5");
1195         return -1;
1196     }
1197 
1198     Py_XINCREF(buffer_callback);
1199     self->buffer_callback = buffer_callback;
1200     return 0;
1201 }
1202 
1203 /* Returns the size of the input on success, -1 on failure. This takes its
1204    own reference to `input`. */
1205 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1206 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1207 {
1208     if (self->buffer.buf != NULL)
1209         PyBuffer_Release(&self->buffer);
1210     if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1211         return -1;
1212     self->input_buffer = self->buffer.buf;
1213     self->input_len = self->buffer.len;
1214     self->next_read_idx = 0;
1215     self->prefetched_idx = self->input_len;
1216     return self->input_len;
1217 }
1218 
1219 static int
bad_readline(void)1220 bad_readline(void)
1221 {
1222     PickleState *st = _Pickle_GetGlobalState();
1223     PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1224     return -1;
1225 }
1226 
1227 /* Skip any consumed data that was only prefetched using peek() */
1228 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1229 _Unpickler_SkipConsumed(UnpicklerObject *self)
1230 {
1231     Py_ssize_t consumed;
1232     PyObject *r;
1233 
1234     consumed = self->next_read_idx - self->prefetched_idx;
1235     if (consumed <= 0)
1236         return 0;
1237 
1238     assert(self->peek);  /* otherwise we did something wrong */
1239     /* This makes a useless copy... */
1240     r = PyObject_CallFunction(self->read, "n", consumed);
1241     if (r == NULL)
1242         return -1;
1243     Py_DECREF(r);
1244 
1245     self->prefetched_idx = self->next_read_idx;
1246     return 0;
1247 }
1248 
1249 static const Py_ssize_t READ_WHOLE_LINE = -1;
1250 
1251 /* If reading from a file, we need to only pull the bytes we need, since there
1252    may be multiple pickle objects arranged contiguously in the same input
1253    buffer.
1254 
1255    If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1256    bytes from the input stream/buffer.
1257 
1258    Update the unpickler's input buffer with the newly-read data. Returns -1 on
1259    failure; on success, returns the number of bytes read from the file.
1260 
1261    On success, self->input_len will be 0; this is intentional so that when
1262    unpickling from a file, the "we've run out of data" code paths will trigger,
1263    causing the Unpickler to go back to the file for more data. Use the returned
1264    size to tell you how much data you can process. */
1265 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1266 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1267 {
1268     PyObject *data;
1269     Py_ssize_t read_size;
1270 
1271     assert(self->read != NULL);
1272 
1273     if (_Unpickler_SkipConsumed(self) < 0)
1274         return -1;
1275 
1276     if (n == READ_WHOLE_LINE) {
1277         data = _PyObject_CallNoArg(self->readline);
1278     }
1279     else {
1280         PyObject *len;
1281         /* Prefetch some data without advancing the file pointer, if possible */
1282         if (self->peek && n < PREFETCH) {
1283             len = PyLong_FromSsize_t(PREFETCH);
1284             if (len == NULL)
1285                 return -1;
1286             data = _Pickle_FastCall(self->peek, len);
1287             if (data == NULL) {
1288                 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1289                     return -1;
1290                 /* peek() is probably not supported by the given file object */
1291                 PyErr_Clear();
1292                 Py_CLEAR(self->peek);
1293             }
1294             else {
1295                 read_size = _Unpickler_SetStringInput(self, data);
1296                 Py_DECREF(data);
1297                 self->prefetched_idx = 0;
1298                 if (n <= read_size)
1299                     return n;
1300             }
1301         }
1302         len = PyLong_FromSsize_t(n);
1303         if (len == NULL)
1304             return -1;
1305         data = _Pickle_FastCall(self->read, len);
1306     }
1307     if (data == NULL)
1308         return -1;
1309 
1310     read_size = _Unpickler_SetStringInput(self, data);
1311     Py_DECREF(data);
1312     return read_size;
1313 }
1314 
1315 /* Don't call it directly: use _Unpickler_Read() */
1316 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1317 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1318 {
1319     Py_ssize_t num_read;
1320 
1321     *s = NULL;
1322     if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1323         PickleState *st = _Pickle_GetGlobalState();
1324         PyErr_SetString(st->UnpicklingError,
1325                         "read would overflow (invalid bytecode)");
1326         return -1;
1327     }
1328 
1329     /* This case is handled by the _Unpickler_Read() macro for efficiency */
1330     assert(self->next_read_idx + n > self->input_len);
1331 
1332     if (!self->read)
1333         return bad_readline();
1334 
1335     /* Extend the buffer to satisfy desired size */
1336     num_read = _Unpickler_ReadFromFile(self, n);
1337     if (num_read < 0)
1338         return -1;
1339     if (num_read < n)
1340         return bad_readline();
1341     *s = self->input_buffer;
1342     self->next_read_idx = n;
1343     return n;
1344 }
1345 
1346 /* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1347  *
1348  * This should only be used for non-small data reads where potentially
1349  * avoiding a copy is beneficial.  This method does not try to prefetch
1350  * more data into the input buffer.
1351  *
1352  * _Unpickler_Read() is recommended in most cases.
1353  */
1354 static Py_ssize_t
_Unpickler_ReadInto(UnpicklerObject * self,char * buf,Py_ssize_t n)1355 _Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1356 {
1357     assert(n != READ_WHOLE_LINE);
1358 
1359     /* Read from available buffer data, if any */
1360     Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1361     if (in_buffer > 0) {
1362         Py_ssize_t to_read = Py_MIN(in_buffer, n);
1363         memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1364         self->next_read_idx += to_read;
1365         buf += to_read;
1366         n -= to_read;
1367         if (n == 0) {
1368             /* Entire read was satisfied from buffer */
1369             return n;
1370         }
1371     }
1372 
1373     /* Read from file */
1374     if (!self->read) {
1375         /* We're unpickling memory, this means the input is truncated */
1376         return bad_readline();
1377     }
1378     if (_Unpickler_SkipConsumed(self) < 0) {
1379         return -1;
1380     }
1381 
1382     if (!self->readinto) {
1383         /* readinto() not supported on file-like object, fall back to read()
1384          * and copy into destination buffer (bpo-39681) */
1385         PyObject* len = PyLong_FromSsize_t(n);
1386         if (len == NULL) {
1387             return -1;
1388         }
1389         PyObject* data = _Pickle_FastCall(self->read, len);
1390         if (data == NULL) {
1391             return -1;
1392         }
1393         if (!PyBytes_Check(data)) {
1394             PyErr_Format(PyExc_ValueError,
1395                          "read() returned non-bytes object (%R)",
1396                          Py_TYPE(data));
1397             Py_DECREF(data);
1398             return -1;
1399         }
1400         Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1401         if (read_size < n) {
1402             Py_DECREF(data);
1403             return bad_readline();
1404         }
1405         memcpy(buf, PyBytes_AS_STRING(data), n);
1406         Py_DECREF(data);
1407         return n;
1408     }
1409 
1410     /* Call readinto() into user buffer */
1411     PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1412     if (buf_obj == NULL) {
1413         return -1;
1414     }
1415     PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1416     if (read_size_obj == NULL) {
1417         return -1;
1418     }
1419     Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1420     Py_DECREF(read_size_obj);
1421 
1422     if (read_size < 0) {
1423         if (!PyErr_Occurred()) {
1424             PyErr_SetString(PyExc_ValueError,
1425                             "readinto() returned negative size");
1426         }
1427         return -1;
1428     }
1429     if (read_size < n) {
1430         return bad_readline();
1431     }
1432     return n;
1433 }
1434 
1435 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1436 
1437    This should be used for all data reads, rather than accessing the unpickler's
1438    input buffer directly. This method deals correctly with reading from input
1439    streams, which the input buffer doesn't deal with.
1440 
1441    Note that when reading from a file-like object, self->next_read_idx won't
1442    be updated (it should remain at 0 for the entire unpickling process). You
1443    should use this function's return value to know how many bytes you can
1444    consume.
1445 
1446    Returns -1 (with an exception set) on failure. On success, return the
1447    number of chars read. */
1448 #define _Unpickler_Read(self, s, n) \
1449     (((n) <= (self)->input_len - (self)->next_read_idx)      \
1450      ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1451         (self)->next_read_idx += (n),                        \
1452         (n))                                                 \
1453      : _Unpickler_ReadImpl(self, (s), (n)))
1454 
1455 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1456 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1457                     char **result)
1458 {
1459     char *input_line = PyMem_Realloc(self->input_line, len + 1);
1460     if (input_line == NULL) {
1461         PyErr_NoMemory();
1462         return -1;
1463     }
1464 
1465     memcpy(input_line, line, len);
1466     input_line[len] = '\0';
1467     self->input_line = input_line;
1468     *result = self->input_line;
1469     return len;
1470 }
1471 
1472 /* Read a line from the input stream/buffer. If we run off the end of the input
1473    before hitting \n, raise an error.
1474 
1475    Returns the number of chars read, or -1 on failure. */
1476 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1477 _Unpickler_Readline(UnpicklerObject *self, char **result)
1478 {
1479     Py_ssize_t i, num_read;
1480 
1481     for (i = self->next_read_idx; i < self->input_len; i++) {
1482         if (self->input_buffer[i] == '\n') {
1483             char *line_start = self->input_buffer + self->next_read_idx;
1484             num_read = i - self->next_read_idx + 1;
1485             self->next_read_idx = i + 1;
1486             return _Unpickler_CopyLine(self, line_start, num_read, result);
1487         }
1488     }
1489     if (!self->read)
1490         return bad_readline();
1491 
1492     num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1493     if (num_read < 0)
1494         return -1;
1495     if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1496         return bad_readline();
1497     self->next_read_idx = num_read;
1498     return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1499 }
1500 
1501 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1502    will be modified in place. */
1503 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1504 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1505 {
1506     size_t i;
1507 
1508     assert(new_size > self->memo_size);
1509 
1510     PyObject **memo_new = self->memo;
1511     PyMem_RESIZE(memo_new, PyObject *, new_size);
1512     if (memo_new == NULL) {
1513         PyErr_NoMemory();
1514         return -1;
1515     }
1516     self->memo = memo_new;
1517     for (i = self->memo_size; i < new_size; i++)
1518         self->memo[i] = NULL;
1519     self->memo_size = new_size;
1520     return 0;
1521 }
1522 
1523 /* Returns NULL if idx is out of bounds. */
1524 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1525 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1526 {
1527     if (idx >= self->memo_size)
1528         return NULL;
1529 
1530     return self->memo[idx];
1531 }
1532 
1533 /* Returns -1 (with an exception set) on failure, 0 on success.
1534    This takes its own reference to `value`. */
1535 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1536 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1537 {
1538     PyObject *old_item;
1539 
1540     if (idx >= self->memo_size) {
1541         if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1542             return -1;
1543         assert(idx < self->memo_size);
1544     }
1545     Py_INCREF(value);
1546     old_item = self->memo[idx];
1547     self->memo[idx] = value;
1548     if (old_item != NULL) {
1549         Py_DECREF(old_item);
1550     }
1551     else {
1552         self->memo_len++;
1553     }
1554     return 0;
1555 }
1556 
1557 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1558 _Unpickler_NewMemo(Py_ssize_t new_size)
1559 {
1560     PyObject **memo = PyMem_NEW(PyObject *, new_size);
1561     if (memo == NULL) {
1562         PyErr_NoMemory();
1563         return NULL;
1564     }
1565     memset(memo, 0, new_size * sizeof(PyObject *));
1566     return memo;
1567 }
1568 
1569 /* Free the unpickler's memo, taking care to decref any items left in it. */
1570 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1571 _Unpickler_MemoCleanup(UnpicklerObject *self)
1572 {
1573     Py_ssize_t i;
1574     PyObject **memo = self->memo;
1575 
1576     if (self->memo == NULL)
1577         return;
1578     self->memo = NULL;
1579     i = self->memo_size;
1580     while (--i >= 0) {
1581         Py_XDECREF(memo[i]);
1582     }
1583     PyMem_FREE(memo);
1584 }
1585 
1586 static UnpicklerObject *
_Unpickler_New(void)1587 _Unpickler_New(void)
1588 {
1589     UnpicklerObject *self;
1590 
1591     self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1592     if (self == NULL)
1593         return NULL;
1594 
1595     self->pers_func = NULL;
1596     self->input_buffer = NULL;
1597     self->input_line = NULL;
1598     self->input_len = 0;
1599     self->next_read_idx = 0;
1600     self->prefetched_idx = 0;
1601     self->read = NULL;
1602     self->readinto = NULL;
1603     self->readline = NULL;
1604     self->peek = NULL;
1605     self->buffers = NULL;
1606     self->encoding = NULL;
1607     self->errors = NULL;
1608     self->marks = NULL;
1609     self->num_marks = 0;
1610     self->marks_size = 0;
1611     self->proto = 0;
1612     self->fix_imports = 0;
1613     memset(&self->buffer, 0, sizeof(Py_buffer));
1614     self->memo_size = 32;
1615     self->memo_len = 0;
1616     self->memo = _Unpickler_NewMemo(self->memo_size);
1617     self->stack = (Pdata *)Pdata_New();
1618 
1619     if (self->memo == NULL || self->stack == NULL) {
1620         Py_DECREF(self);
1621         return NULL;
1622     }
1623 
1624     PyObject_GC_Track(self);
1625     return self;
1626 }
1627 
1628 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1629    be called once on a freshly created Unpickler. */
1630 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1631 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1632 {
1633     _Py_IDENTIFIER(peek);
1634     _Py_IDENTIFIER(read);
1635     _Py_IDENTIFIER(readinto);
1636     _Py_IDENTIFIER(readline);
1637 
1638     /* Optional file methods */
1639     if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1640         return -1;
1641     }
1642     if (_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto) < 0) {
1643         return -1;
1644     }
1645     (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1646     (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
1647     if (!self->readline || !self->read) {
1648         if (!PyErr_Occurred()) {
1649             PyErr_SetString(PyExc_TypeError,
1650                             "file must have 'read' and 'readline' attributes");
1651         }
1652         Py_CLEAR(self->read);
1653         Py_CLEAR(self->readinto);
1654         Py_CLEAR(self->readline);
1655         Py_CLEAR(self->peek);
1656         return -1;
1657     }
1658     return 0;
1659 }
1660 
1661 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1662    be called once on a freshly created Unpickler. */
1663 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1664 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1665                             const char *encoding,
1666                             const char *errors)
1667 {
1668     if (encoding == NULL)
1669         encoding = "ASCII";
1670     if (errors == NULL)
1671         errors = "strict";
1672 
1673     self->encoding = _PyMem_Strdup(encoding);
1674     self->errors = _PyMem_Strdup(errors);
1675     if (self->encoding == NULL || self->errors == NULL) {
1676         PyErr_NoMemory();
1677         return -1;
1678     }
1679     return 0;
1680 }
1681 
1682 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1683    be called once on a freshly created Unpickler. */
1684 static int
_Unpickler_SetBuffers(UnpicklerObject * self,PyObject * buffers)1685 _Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1686 {
1687     if (buffers == NULL || buffers == Py_None) {
1688         self->buffers = NULL;
1689     }
1690     else {
1691         self->buffers = PyObject_GetIter(buffers);
1692         if (self->buffers == NULL) {
1693             return -1;
1694         }
1695     }
1696     return 0;
1697 }
1698 
1699 /* Generate a GET opcode for an object stored in the memo. */
1700 static int
memo_get(PicklerObject * self,PyObject * key)1701 memo_get(PicklerObject *self, PyObject *key)
1702 {
1703     Py_ssize_t *value;
1704     char pdata[30];
1705     Py_ssize_t len;
1706 
1707     value = PyMemoTable_Get(self->memo, key);
1708     if (value == NULL)  {
1709         PyErr_SetObject(PyExc_KeyError, key);
1710         return -1;
1711     }
1712 
1713     if (!self->bin) {
1714         pdata[0] = GET;
1715         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1716                       "%" PY_FORMAT_SIZE_T "d\n", *value);
1717         len = strlen(pdata);
1718     }
1719     else {
1720         if (*value < 256) {
1721             pdata[0] = BINGET;
1722             pdata[1] = (unsigned char)(*value & 0xff);
1723             len = 2;
1724         }
1725         else if ((size_t)*value <= 0xffffffffUL) {
1726             pdata[0] = LONG_BINGET;
1727             pdata[1] = (unsigned char)(*value & 0xff);
1728             pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1729             pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1730             pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1731             len = 5;
1732         }
1733         else { /* unlikely */
1734             PickleState *st = _Pickle_GetGlobalState();
1735             PyErr_SetString(st->PicklingError,
1736                             "memo id too large for LONG_BINGET");
1737             return -1;
1738         }
1739     }
1740 
1741     if (_Pickler_Write(self, pdata, len) < 0)
1742         return -1;
1743 
1744     return 0;
1745 }
1746 
1747 /* Store an object in the memo, assign it a new unique ID based on the number
1748    of objects currently stored in the memo and generate a PUT opcode. */
1749 static int
memo_put(PicklerObject * self,PyObject * obj)1750 memo_put(PicklerObject *self, PyObject *obj)
1751 {
1752     char pdata[30];
1753     Py_ssize_t len;
1754     Py_ssize_t idx;
1755 
1756     const char memoize_op = MEMOIZE;
1757 
1758     if (self->fast)
1759         return 0;
1760 
1761     idx = PyMemoTable_Size(self->memo);
1762     if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1763         return -1;
1764 
1765     if (self->proto >= 4) {
1766         if (_Pickler_Write(self, &memoize_op, 1) < 0)
1767             return -1;
1768         return 0;
1769     }
1770     else if (!self->bin) {
1771         pdata[0] = PUT;
1772         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1773                       "%" PY_FORMAT_SIZE_T "d\n", idx);
1774         len = strlen(pdata);
1775     }
1776     else {
1777         if (idx < 256) {
1778             pdata[0] = BINPUT;
1779             pdata[1] = (unsigned char)idx;
1780             len = 2;
1781         }
1782         else if ((size_t)idx <= 0xffffffffUL) {
1783             pdata[0] = LONG_BINPUT;
1784             pdata[1] = (unsigned char)(idx & 0xff);
1785             pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1786             pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1787             pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1788             len = 5;
1789         }
1790         else { /* unlikely */
1791             PickleState *st = _Pickle_GetGlobalState();
1792             PyErr_SetString(st->PicklingError,
1793                             "memo id too large for LONG_BINPUT");
1794             return -1;
1795         }
1796     }
1797     if (_Pickler_Write(self, pdata, len) < 0)
1798         return -1;
1799 
1800     return 0;
1801 }
1802 
1803 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1804 get_dotted_path(PyObject *obj, PyObject *name)
1805 {
1806     _Py_static_string(PyId_dot, ".");
1807     PyObject *dotted_path;
1808     Py_ssize_t i, n;
1809 
1810     dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1811     if (dotted_path == NULL)
1812         return NULL;
1813     n = PyList_GET_SIZE(dotted_path);
1814     assert(n >= 1);
1815     for (i = 0; i < n; i++) {
1816         PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1817         if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1818             if (obj == NULL)
1819                 PyErr_Format(PyExc_AttributeError,
1820                              "Can't pickle local object %R", name);
1821             else
1822                 PyErr_Format(PyExc_AttributeError,
1823                              "Can't pickle local attribute %R on %R", name, obj);
1824             Py_DECREF(dotted_path);
1825             return NULL;
1826         }
1827     }
1828     return dotted_path;
1829 }
1830 
1831 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1832 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1833 {
1834     Py_ssize_t i, n;
1835     PyObject *parent = NULL;
1836 
1837     assert(PyList_CheckExact(names));
1838     Py_INCREF(obj);
1839     n = PyList_GET_SIZE(names);
1840     for (i = 0; i < n; i++) {
1841         PyObject *name = PyList_GET_ITEM(names, i);
1842         Py_XDECREF(parent);
1843         parent = obj;
1844         (void)_PyObject_LookupAttr(parent, name, &obj);
1845         if (obj == NULL) {
1846             Py_DECREF(parent);
1847             return NULL;
1848         }
1849     }
1850     if (pparent != NULL)
1851         *pparent = parent;
1852     else
1853         Py_XDECREF(parent);
1854     return obj;
1855 }
1856 
1857 
1858 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1859 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1860 {
1861     PyObject *dotted_path, *attr;
1862 
1863     if (allow_qualname) {
1864         dotted_path = get_dotted_path(obj, name);
1865         if (dotted_path == NULL)
1866             return NULL;
1867         attr = get_deep_attribute(obj, dotted_path, NULL);
1868         Py_DECREF(dotted_path);
1869     }
1870     else {
1871         (void)_PyObject_LookupAttr(obj, name, &attr);
1872     }
1873     if (attr == NULL && !PyErr_Occurred()) {
1874         PyErr_Format(PyExc_AttributeError,
1875                      "Can't get attribute %R on %R", name, obj);
1876     }
1877     return attr;
1878 }
1879 
1880 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1881 _checkmodule(PyObject *module_name, PyObject *module,
1882              PyObject *global, PyObject *dotted_path)
1883 {
1884     if (module == Py_None) {
1885         return -1;
1886     }
1887     if (PyUnicode_Check(module_name) &&
1888             _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1889         return -1;
1890     }
1891 
1892     PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1893     if (candidate == NULL) {
1894         return -1;
1895     }
1896     if (candidate != global) {
1897         Py_DECREF(candidate);
1898         return -1;
1899     }
1900     Py_DECREF(candidate);
1901     return 0;
1902 }
1903 
1904 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1905 whichmodule(PyObject *global, PyObject *dotted_path)
1906 {
1907     PyObject *module_name;
1908     PyObject *module = NULL;
1909     Py_ssize_t i;
1910     PyObject *modules;
1911     _Py_IDENTIFIER(__module__);
1912     _Py_IDENTIFIER(modules);
1913     _Py_IDENTIFIER(__main__);
1914 
1915     if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1916         return NULL;
1917     }
1918     if (module_name) {
1919         /* In some rare cases (e.g., bound methods of extension types),
1920            __module__ can be None. If it is so, then search sys.modules for
1921            the module of global. */
1922         if (module_name != Py_None)
1923             return module_name;
1924         Py_CLEAR(module_name);
1925     }
1926     assert(module_name == NULL);
1927 
1928     /* Fallback on walking sys.modules */
1929     modules = _PySys_GetObjectId(&PyId_modules);
1930     if (modules == NULL) {
1931         PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1932         return NULL;
1933     }
1934     if (PyDict_CheckExact(modules)) {
1935         i = 0;
1936         while (PyDict_Next(modules, &i, &module_name, &module)) {
1937             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1938                 Py_INCREF(module_name);
1939                 return module_name;
1940             }
1941             if (PyErr_Occurred()) {
1942                 return NULL;
1943             }
1944         }
1945     }
1946     else {
1947         PyObject *iterator = PyObject_GetIter(modules);
1948         if (iterator == NULL) {
1949             return NULL;
1950         }
1951         while ((module_name = PyIter_Next(iterator))) {
1952             module = PyObject_GetItem(modules, module_name);
1953             if (module == NULL) {
1954                 Py_DECREF(module_name);
1955                 Py_DECREF(iterator);
1956                 return NULL;
1957             }
1958             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1959                 Py_DECREF(module);
1960                 Py_DECREF(iterator);
1961                 return module_name;
1962             }
1963             Py_DECREF(module);
1964             Py_DECREF(module_name);
1965             if (PyErr_Occurred()) {
1966                 Py_DECREF(iterator);
1967                 return NULL;
1968             }
1969         }
1970         Py_DECREF(iterator);
1971     }
1972 
1973     /* If no module is found, use __main__. */
1974     module_name = _PyUnicode_FromId(&PyId___main__);
1975     Py_XINCREF(module_name);
1976     return module_name;
1977 }
1978 
1979 /* fast_save_enter() and fast_save_leave() are guards against recursive
1980    objects when Pickler is used with the "fast mode" (i.e., with object
1981    memoization disabled). If the nesting of a list or dict object exceed
1982    FAST_NESTING_LIMIT, these guards will start keeping an internal
1983    reference to the seen list or dict objects and check whether these objects
1984    are recursive. These are not strictly necessary, since save() has a
1985    hard-coded recursion limit, but they give a nicer error message than the
1986    typical RuntimeError. */
1987 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1988 fast_save_enter(PicklerObject *self, PyObject *obj)
1989 {
1990     /* if fast_nesting < 0, we're doing an error exit. */
1991     if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1992         PyObject *key = NULL;
1993         if (self->fast_memo == NULL) {
1994             self->fast_memo = PyDict_New();
1995             if (self->fast_memo == NULL) {
1996                 self->fast_nesting = -1;
1997                 return 0;
1998             }
1999         }
2000         key = PyLong_FromVoidPtr(obj);
2001         if (key == NULL) {
2002             self->fast_nesting = -1;
2003             return 0;
2004         }
2005         if (PyDict_GetItemWithError(self->fast_memo, key)) {
2006             Py_DECREF(key);
2007             PyErr_Format(PyExc_ValueError,
2008                          "fast mode: can't pickle cyclic objects "
2009                          "including object type %.200s at %p",
2010                          obj->ob_type->tp_name, obj);
2011             self->fast_nesting = -1;
2012             return 0;
2013         }
2014         if (PyErr_Occurred()) {
2015             Py_DECREF(key);
2016             self->fast_nesting = -1;
2017             return 0;
2018         }
2019         if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
2020             Py_DECREF(key);
2021             self->fast_nesting = -1;
2022             return 0;
2023         }
2024         Py_DECREF(key);
2025     }
2026     return 1;
2027 }
2028 
2029 static int
fast_save_leave(PicklerObject * self,PyObject * obj)2030 fast_save_leave(PicklerObject *self, PyObject *obj)
2031 {
2032     if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2033         PyObject *key = PyLong_FromVoidPtr(obj);
2034         if (key == NULL)
2035             return 0;
2036         if (PyDict_DelItem(self->fast_memo, key) < 0) {
2037             Py_DECREF(key);
2038             return 0;
2039         }
2040         Py_DECREF(key);
2041     }
2042     return 1;
2043 }
2044 
2045 static int
save_none(PicklerObject * self,PyObject * obj)2046 save_none(PicklerObject *self, PyObject *obj)
2047 {
2048     const char none_op = NONE;
2049     if (_Pickler_Write(self, &none_op, 1) < 0)
2050         return -1;
2051 
2052     return 0;
2053 }
2054 
2055 static int
save_bool(PicklerObject * self,PyObject * obj)2056 save_bool(PicklerObject *self, PyObject *obj)
2057 {
2058     if (self->proto >= 2) {
2059         const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
2060         if (_Pickler_Write(self, &bool_op, 1) < 0)
2061             return -1;
2062     }
2063     else {
2064         /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2065          * so that unpicklers written before bools were introduced unpickle them
2066          * as ints, but unpicklers after can recognize that bools were intended.
2067          * Note that protocol 2 added direct ways to pickle bools.
2068          */
2069         const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2070         if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2071             return -1;
2072     }
2073     return 0;
2074 }
2075 
2076 static int
save_long(PicklerObject * self,PyObject * obj)2077 save_long(PicklerObject *self, PyObject *obj)
2078 {
2079     PyObject *repr = NULL;
2080     Py_ssize_t size;
2081     long val;
2082     int overflow;
2083     int status = 0;
2084 
2085     val= PyLong_AsLongAndOverflow(obj, &overflow);
2086     if (!overflow && (sizeof(long) <= 4 ||
2087             (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2088     {
2089         /* result fits in a signed 4-byte integer.
2090 
2091            Note: we can't use -0x80000000L in the above condition because some
2092            compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2093            before applying the unary minus when sizeof(long) <= 4. The
2094            resulting value stays unsigned which is commonly not what we want,
2095            so MSVC happily warns us about it.  However, that result would have
2096            been fine because we guard for sizeof(long) <= 4 which turns the
2097            condition true in that particular case. */
2098         char pdata[32];
2099         Py_ssize_t len = 0;
2100 
2101         if (self->bin) {
2102             pdata[1] = (unsigned char)(val & 0xff);
2103             pdata[2] = (unsigned char)((val >> 8) & 0xff);
2104             pdata[3] = (unsigned char)((val >> 16) & 0xff);
2105             pdata[4] = (unsigned char)((val >> 24) & 0xff);
2106 
2107             if ((pdata[4] != 0) || (pdata[3] != 0)) {
2108                 pdata[0] = BININT;
2109                 len = 5;
2110             }
2111             else if (pdata[2] != 0) {
2112                 pdata[0] = BININT2;
2113                 len = 3;
2114             }
2115             else {
2116                 pdata[0] = BININT1;
2117                 len = 2;
2118             }
2119         }
2120         else {
2121             sprintf(pdata, "%c%ld\n", INT,  val);
2122             len = strlen(pdata);
2123         }
2124         if (_Pickler_Write(self, pdata, len) < 0)
2125             return -1;
2126 
2127         return 0;
2128     }
2129     assert(!PyErr_Occurred());
2130 
2131     if (self->proto >= 2) {
2132         /* Linear-time pickling. */
2133         size_t nbits;
2134         size_t nbytes;
2135         unsigned char *pdata;
2136         char header[5];
2137         int i;
2138         int sign = _PyLong_Sign(obj);
2139 
2140         if (sign == 0) {
2141             header[0] = LONG1;
2142             header[1] = 0;      /* It's 0 -- an empty bytestring. */
2143             if (_Pickler_Write(self, header, 2) < 0)
2144                 goto error;
2145             return 0;
2146         }
2147         nbits = _PyLong_NumBits(obj);
2148         if (nbits == (size_t)-1 && PyErr_Occurred())
2149             goto error;
2150         /* How many bytes do we need?  There are nbits >> 3 full
2151          * bytes of data, and nbits & 7 leftover bits.  If there
2152          * are any leftover bits, then we clearly need another
2153          * byte.  What's not so obvious is that we *probably*
2154          * need another byte even if there aren't any leftovers:
2155          * the most-significant bit of the most-significant byte
2156          * acts like a sign bit, and it's usually got a sense
2157          * opposite of the one we need.  The exception is ints
2158          * of the form -(2**(8*j-1)) for j > 0.  Such an int is
2159          * its own 256's-complement, so has the right sign bit
2160          * even without the extra byte.  That's a pain to check
2161          * for in advance, though, so we always grab an extra
2162          * byte at the start, and cut it back later if possible.
2163          */
2164         nbytes = (nbits >> 3) + 1;
2165         if (nbytes > 0x7fffffffL) {
2166             PyErr_SetString(PyExc_OverflowError,
2167                             "int too large to pickle");
2168             goto error;
2169         }
2170         repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2171         if (repr == NULL)
2172             goto error;
2173         pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2174         i = _PyLong_AsByteArray((PyLongObject *)obj,
2175                                 pdata, nbytes,
2176                                 1 /* little endian */ , 1 /* signed */ );
2177         if (i < 0)
2178             goto error;
2179         /* If the int is negative, this may be a byte more than
2180          * needed.  This is so iff the MSB is all redundant sign
2181          * bits.
2182          */
2183         if (sign < 0 &&
2184             nbytes > 1 &&
2185             pdata[nbytes - 1] == 0xff &&
2186             (pdata[nbytes - 2] & 0x80) != 0) {
2187             nbytes--;
2188         }
2189 
2190         if (nbytes < 256) {
2191             header[0] = LONG1;
2192             header[1] = (unsigned char)nbytes;
2193             size = 2;
2194         }
2195         else {
2196             header[0] = LONG4;
2197             size = (Py_ssize_t) nbytes;
2198             for (i = 1; i < 5; i++) {
2199                 header[i] = (unsigned char)(size & 0xff);
2200                 size >>= 8;
2201             }
2202             size = 5;
2203         }
2204         if (_Pickler_Write(self, header, size) < 0 ||
2205             _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2206             goto error;
2207     }
2208     else {
2209         const char long_op = LONG;
2210         const char *string;
2211 
2212         /* proto < 2: write the repr and newline.  This is quadratic-time (in
2213            the number of digits), in both directions.  We add a trailing 'L'
2214            to the repr, for compatibility with Python 2.x. */
2215 
2216         repr = PyObject_Repr(obj);
2217         if (repr == NULL)
2218             goto error;
2219 
2220         string = PyUnicode_AsUTF8AndSize(repr, &size);
2221         if (string == NULL)
2222             goto error;
2223 
2224         if (_Pickler_Write(self, &long_op, 1) < 0 ||
2225             _Pickler_Write(self, string, size) < 0 ||
2226             _Pickler_Write(self, "L\n", 2) < 0)
2227             goto error;
2228     }
2229 
2230     if (0) {
2231   error:
2232       status = -1;
2233     }
2234     Py_XDECREF(repr);
2235 
2236     return status;
2237 }
2238 
2239 static int
save_float(PicklerObject * self,PyObject * obj)2240 save_float(PicklerObject *self, PyObject *obj)
2241 {
2242     double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2243 
2244     if (self->bin) {
2245         char pdata[9];
2246         pdata[0] = BINFLOAT;
2247         if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2248             return -1;
2249         if (_Pickler_Write(self, pdata, 9) < 0)
2250             return -1;
2251    }
2252     else {
2253         int result = -1;
2254         char *buf = NULL;
2255         char op = FLOAT;
2256 
2257         if (_Pickler_Write(self, &op, 1) < 0)
2258             goto done;
2259 
2260         buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2261         if (!buf) {
2262             PyErr_NoMemory();
2263             goto done;
2264         }
2265 
2266         if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2267             goto done;
2268 
2269         if (_Pickler_Write(self, "\n", 1) < 0)
2270             goto done;
2271 
2272         result = 0;
2273 done:
2274         PyMem_Free(buf);
2275         return result;
2276     }
2277 
2278     return 0;
2279 }
2280 
2281 /* Perform direct write of the header and payload of the binary object.
2282 
2283    The large contiguous data is written directly into the underlying file
2284    object, bypassing the output_buffer of the Pickler.  We intentionally
2285    do not insert a protocol 4 frame opcode to make it possible to optimize
2286    file.read calls in the loader.
2287  */
2288 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2289 _Pickler_write_bytes(PicklerObject *self,
2290                      const char *header, Py_ssize_t header_size,
2291                      const char *data, Py_ssize_t data_size,
2292                      PyObject *payload)
2293 {
2294     int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2295     int framing = self->framing;
2296 
2297     if (bypass_buffer) {
2298         assert(self->output_buffer != NULL);
2299         /* Commit the previous frame. */
2300         if (_Pickler_CommitFrame(self)) {
2301             return -1;
2302         }
2303         /* Disable framing temporarily */
2304         self->framing = 0;
2305     }
2306 
2307     if (_Pickler_Write(self, header, header_size) < 0) {
2308         return -1;
2309     }
2310 
2311     if (bypass_buffer && self->write != NULL) {
2312         /* Bypass the in-memory buffer to directly stream large data
2313            into the underlying file object. */
2314         PyObject *result, *mem = NULL;
2315         /* Dump the output buffer to the file. */
2316         if (_Pickler_FlushToFile(self) < 0) {
2317             return -1;
2318         }
2319 
2320         /* Stream write the payload into the file without going through the
2321            output buffer. */
2322         if (payload == NULL) {
2323             /* TODO: It would be better to use a memoryview with a linked
2324                original string if this is possible. */
2325             payload = mem = PyBytes_FromStringAndSize(data, data_size);
2326             if (payload == NULL) {
2327                 return -1;
2328             }
2329         }
2330         result = PyObject_CallFunctionObjArgs(self->write, payload, NULL);
2331         Py_XDECREF(mem);
2332         if (result == NULL) {
2333             return -1;
2334         }
2335         Py_DECREF(result);
2336 
2337         /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2338         if (_Pickler_ClearBuffer(self) < 0) {
2339             return -1;
2340         }
2341     }
2342     else {
2343         if (_Pickler_Write(self, data, data_size) < 0) {
2344             return -1;
2345         }
2346     }
2347 
2348     /* Re-enable framing for subsequent calls to _Pickler_Write. */
2349     self->framing = framing;
2350 
2351     return 0;
2352 }
2353 
2354 static int
_save_bytes_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2355 _save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2356                  Py_ssize_t size)
2357 {
2358     assert(self->proto >= 3);
2359 
2360     char header[9];
2361     Py_ssize_t len;
2362 
2363     if (size < 0)
2364         return -1;
2365 
2366     if (size <= 0xff) {
2367         header[0] = SHORT_BINBYTES;
2368         header[1] = (unsigned char)size;
2369         len = 2;
2370     }
2371     else if ((size_t)size <= 0xffffffffUL) {
2372         header[0] = BINBYTES;
2373         header[1] = (unsigned char)(size & 0xff);
2374         header[2] = (unsigned char)((size >> 8) & 0xff);
2375         header[3] = (unsigned char)((size >> 16) & 0xff);
2376         header[4] = (unsigned char)((size >> 24) & 0xff);
2377         len = 5;
2378     }
2379     else if (self->proto >= 4) {
2380         header[0] = BINBYTES8;
2381         _write_size64(header + 1, size);
2382         len = 9;
2383     }
2384     else {
2385         PyErr_SetString(PyExc_OverflowError,
2386                         "serializing a bytes object larger than 4 GiB "
2387                         "requires pickle protocol 4 or higher");
2388         return -1;
2389     }
2390 
2391     if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2392         return -1;
2393     }
2394 
2395     if (memo_put(self, obj) < 0) {
2396         return -1;
2397     }
2398 
2399     return 0;
2400 }
2401 
2402 static int
save_bytes(PicklerObject * self,PyObject * obj)2403 save_bytes(PicklerObject *self, PyObject *obj)
2404 {
2405     if (self->proto < 3) {
2406         /* Older pickle protocols do not have an opcode for pickling bytes
2407            objects. Therefore, we need to fake the copy protocol (i.e.,
2408            the __reduce__ method) to permit bytes object unpickling.
2409 
2410            Here we use a hack to be compatible with Python 2. Since in Python
2411            2 'bytes' is just an alias for 'str' (which has different
2412            parameters than the actual bytes object), we use codecs.encode
2413            to create the appropriate 'str' object when unpickled using
2414            Python 2 *and* the appropriate 'bytes' object when unpickled
2415            using Python 3. Again this is a hack and we don't need to do this
2416            with newer protocols. */
2417         PyObject *reduce_value;
2418         int status;
2419 
2420         if (PyBytes_GET_SIZE(obj) == 0) {
2421             reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2422         }
2423         else {
2424             PickleState *st = _Pickle_GetGlobalState();
2425             PyObject *unicode_str =
2426                 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2427                                        PyBytes_GET_SIZE(obj),
2428                                        "strict");
2429             _Py_IDENTIFIER(latin1);
2430 
2431             if (unicode_str == NULL)
2432                 return -1;
2433             reduce_value = Py_BuildValue("(O(OO))",
2434                                          st->codecs_encode, unicode_str,
2435                                          _PyUnicode_FromId(&PyId_latin1));
2436             Py_DECREF(unicode_str);
2437         }
2438 
2439         if (reduce_value == NULL)
2440             return -1;
2441 
2442         /* save_reduce() will memoize the object automatically. */
2443         status = save_reduce(self, reduce_value, obj);
2444         Py_DECREF(reduce_value);
2445         return status;
2446     }
2447     else {
2448         return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2449                                 PyBytes_GET_SIZE(obj));
2450     }
2451 }
2452 
2453 static int
_save_bytearray_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2454 _save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2455                      Py_ssize_t size)
2456 {
2457     assert(self->proto >= 5);
2458 
2459     char header[9];
2460     Py_ssize_t len;
2461 
2462     if (size < 0)
2463         return -1;
2464 
2465     header[0] = BYTEARRAY8;
2466     _write_size64(header + 1, size);
2467     len = 9;
2468 
2469     if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2470         return -1;
2471     }
2472 
2473     if (memo_put(self, obj) < 0) {
2474         return -1;
2475     }
2476 
2477     return 0;
2478 }
2479 
2480 static int
save_bytearray(PicklerObject * self,PyObject * obj)2481 save_bytearray(PicklerObject *self, PyObject *obj)
2482 {
2483     if (self->proto < 5) {
2484         /* Older pickle protocols do not have an opcode for pickling
2485          * bytearrays. */
2486         PyObject *reduce_value = NULL;
2487         int status;
2488 
2489         if (PyByteArray_GET_SIZE(obj) == 0) {
2490             reduce_value = Py_BuildValue("(O())",
2491                                          (PyObject *) &PyByteArray_Type);
2492         }
2493         else {
2494             PyObject *bytes_obj = PyBytes_FromObject(obj);
2495             if (bytes_obj != NULL) {
2496                 reduce_value = Py_BuildValue("(O(O))",
2497                                              (PyObject *) &PyByteArray_Type,
2498                                              bytes_obj);
2499                 Py_DECREF(bytes_obj);
2500             }
2501         }
2502         if (reduce_value == NULL)
2503             return -1;
2504 
2505         /* save_reduce() will memoize the object automatically. */
2506         status = save_reduce(self, reduce_value, obj);
2507         Py_DECREF(reduce_value);
2508         return status;
2509     }
2510     else {
2511         return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2512                                     PyByteArray_GET_SIZE(obj));
2513     }
2514 }
2515 
2516 static int
save_picklebuffer(PicklerObject * self,PyObject * obj)2517 save_picklebuffer(PicklerObject *self, PyObject *obj)
2518 {
2519     if (self->proto < 5) {
2520         PickleState *st = _Pickle_GetGlobalState();
2521         PyErr_SetString(st->PicklingError,
2522                         "PickleBuffer can only pickled with protocol >= 5");
2523         return -1;
2524     }
2525     const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2526     if (view == NULL) {
2527         return -1;
2528     }
2529     if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2530         PickleState *st = _Pickle_GetGlobalState();
2531         PyErr_SetString(st->PicklingError,
2532                         "PickleBuffer can not be pickled when "
2533                         "pointing to a non-contiguous buffer");
2534         return -1;
2535     }
2536     int in_band = 1;
2537     if (self->buffer_callback != NULL) {
2538         PyObject *ret = PyObject_CallFunctionObjArgs(self->buffer_callback,
2539                                                      obj, NULL);
2540         if (ret == NULL) {
2541             return -1;
2542         }
2543         in_band = PyObject_IsTrue(ret);
2544         Py_DECREF(ret);
2545         if (in_band == -1) {
2546             return -1;
2547         }
2548     }
2549     if (in_band) {
2550         /* Write data in-band */
2551         if (view->readonly) {
2552             return _save_bytes_data(self, obj, (const char*) view->buf,
2553                                     view->len);
2554         }
2555         else {
2556             return _save_bytearray_data(self, obj, (const char*) view->buf,
2557                                         view->len);
2558         }
2559     }
2560     else {
2561         /* Write data out-of-band */
2562         const char next_buffer_op = NEXT_BUFFER;
2563         if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2564             return -1;
2565         }
2566         if (view->readonly) {
2567             const char readonly_buffer_op = READONLY_BUFFER;
2568             if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2569                 return -1;
2570             }
2571         }
2572     }
2573     return 0;
2574 }
2575 
2576 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2577    backslash and newline characters to \uXXXX escapes. */
2578 static PyObject *
raw_unicode_escape(PyObject * obj)2579 raw_unicode_escape(PyObject *obj)
2580 {
2581     char *p;
2582     Py_ssize_t i, size;
2583     void *data;
2584     unsigned int kind;
2585     _PyBytesWriter writer;
2586 
2587     if (PyUnicode_READY(obj))
2588         return NULL;
2589 
2590     _PyBytesWriter_Init(&writer);
2591 
2592     size = PyUnicode_GET_LENGTH(obj);
2593     data = PyUnicode_DATA(obj);
2594     kind = PyUnicode_KIND(obj);
2595 
2596     p = _PyBytesWriter_Alloc(&writer, size);
2597     if (p == NULL)
2598         goto error;
2599     writer.overallocate = 1;
2600 
2601     for (i=0; i < size; i++) {
2602         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2603         /* Map 32-bit characters to '\Uxxxxxxxx' */
2604         if (ch >= 0x10000) {
2605             /* -1: subtract 1 preallocated byte */
2606             p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2607             if (p == NULL)
2608                 goto error;
2609 
2610             *p++ = '\\';
2611             *p++ = 'U';
2612             *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2613             *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2614             *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2615             *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2616             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2617             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2618             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2619             *p++ = Py_hexdigits[ch & 15];
2620         }
2621         /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2622         else if (ch >= 256 ||
2623                  ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2624                  ch == 0x1a)
2625         {
2626             /* -1: subtract 1 preallocated byte */
2627             p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2628             if (p == NULL)
2629                 goto error;
2630 
2631             *p++ = '\\';
2632             *p++ = 'u';
2633             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2634             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2635             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2636             *p++ = Py_hexdigits[ch & 15];
2637         }
2638         /* Copy everything else as-is */
2639         else
2640             *p++ = (char) ch;
2641     }
2642 
2643     return _PyBytesWriter_Finish(&writer, p);
2644 
2645 error:
2646     _PyBytesWriter_Dealloc(&writer);
2647     return NULL;
2648 }
2649 
2650 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2651 write_unicode_binary(PicklerObject *self, PyObject *obj)
2652 {
2653     char header[9];
2654     Py_ssize_t len;
2655     PyObject *encoded = NULL;
2656     Py_ssize_t size;
2657     const char *data;
2658 
2659     if (PyUnicode_READY(obj))
2660         return -1;
2661 
2662     data = PyUnicode_AsUTF8AndSize(obj, &size);
2663     if (data == NULL) {
2664         /* Issue #8383: for strings with lone surrogates, fallback on the
2665            "surrogatepass" error handler. */
2666         PyErr_Clear();
2667         encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2668         if (encoded == NULL)
2669             return -1;
2670 
2671         data = PyBytes_AS_STRING(encoded);
2672         size = PyBytes_GET_SIZE(encoded);
2673     }
2674 
2675     assert(size >= 0);
2676     if (size <= 0xff && self->proto >= 4) {
2677         header[0] = SHORT_BINUNICODE;
2678         header[1] = (unsigned char)(size & 0xff);
2679         len = 2;
2680     }
2681     else if ((size_t)size <= 0xffffffffUL) {
2682         header[0] = BINUNICODE;
2683         header[1] = (unsigned char)(size & 0xff);
2684         header[2] = (unsigned char)((size >> 8) & 0xff);
2685         header[3] = (unsigned char)((size >> 16) & 0xff);
2686         header[4] = (unsigned char)((size >> 24) & 0xff);
2687         len = 5;
2688     }
2689     else if (self->proto >= 4) {
2690         header[0] = BINUNICODE8;
2691         _write_size64(header + 1, size);
2692         len = 9;
2693     }
2694     else {
2695         PyErr_SetString(PyExc_OverflowError,
2696                         "serializing a string larger than 4 GiB "
2697                         "requires pickle protocol 4 or higher");
2698         Py_XDECREF(encoded);
2699         return -1;
2700     }
2701 
2702     if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2703         Py_XDECREF(encoded);
2704         return -1;
2705     }
2706     Py_XDECREF(encoded);
2707     return 0;
2708 }
2709 
2710 static int
save_unicode(PicklerObject * self,PyObject * obj)2711 save_unicode(PicklerObject *self, PyObject *obj)
2712 {
2713     if (self->bin) {
2714         if (write_unicode_binary(self, obj) < 0)
2715             return -1;
2716     }
2717     else {
2718         PyObject *encoded;
2719         Py_ssize_t size;
2720         const char unicode_op = UNICODE;
2721 
2722         encoded = raw_unicode_escape(obj);
2723         if (encoded == NULL)
2724             return -1;
2725 
2726         if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2727             Py_DECREF(encoded);
2728             return -1;
2729         }
2730 
2731         size = PyBytes_GET_SIZE(encoded);
2732         if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2733             Py_DECREF(encoded);
2734             return -1;
2735         }
2736         Py_DECREF(encoded);
2737 
2738         if (_Pickler_Write(self, "\n", 1) < 0)
2739             return -1;
2740     }
2741     if (memo_put(self, obj) < 0)
2742         return -1;
2743 
2744     return 0;
2745 }
2746 
2747 /* A helper for save_tuple.  Push the len elements in tuple t on the stack. */
2748 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2749 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2750 {
2751     Py_ssize_t i;
2752 
2753     assert(PyTuple_Size(t) == len);
2754 
2755     for (i = 0; i < len; i++) {
2756         PyObject *element = PyTuple_GET_ITEM(t, i);
2757 
2758         if (element == NULL)
2759             return -1;
2760         if (save(self, element, 0) < 0)
2761             return -1;
2762     }
2763 
2764     return 0;
2765 }
2766 
2767 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2768  * used across protocols to minimize the space needed to pickle them.
2769  * Tuples are also the only builtin immutable type that can be recursive
2770  * (a tuple can be reached from itself), and that requires some subtle
2771  * magic so that it works in all cases.  IOW, this is a long routine.
2772  */
2773 static int
save_tuple(PicklerObject * self,PyObject * obj)2774 save_tuple(PicklerObject *self, PyObject *obj)
2775 {
2776     Py_ssize_t len, i;
2777 
2778     const char mark_op = MARK;
2779     const char tuple_op = TUPLE;
2780     const char pop_op = POP;
2781     const char pop_mark_op = POP_MARK;
2782     const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2783 
2784     if ((len = PyTuple_Size(obj)) < 0)
2785         return -1;
2786 
2787     if (len == 0) {
2788         char pdata[2];
2789 
2790         if (self->proto) {
2791             pdata[0] = EMPTY_TUPLE;
2792             len = 1;
2793         }
2794         else {
2795             pdata[0] = MARK;
2796             pdata[1] = TUPLE;
2797             len = 2;
2798         }
2799         if (_Pickler_Write(self, pdata, len) < 0)
2800             return -1;
2801         return 0;
2802     }
2803 
2804     /* The tuple isn't in the memo now.  If it shows up there after
2805      * saving the tuple elements, the tuple must be recursive, in
2806      * which case we'll pop everything we put on the stack, and fetch
2807      * its value from the memo.
2808      */
2809     if (len <= 3 && self->proto >= 2) {
2810         /* Use TUPLE{1,2,3} opcodes. */
2811         if (store_tuple_elements(self, obj, len) < 0)
2812             return -1;
2813 
2814         if (PyMemoTable_Get(self->memo, obj)) {
2815             /* pop the len elements */
2816             for (i = 0; i < len; i++)
2817                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2818                     return -1;
2819             /* fetch from memo */
2820             if (memo_get(self, obj) < 0)
2821                 return -1;
2822 
2823             return 0;
2824         }
2825         else { /* Not recursive. */
2826             if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2827                 return -1;
2828         }
2829         goto memoize;
2830     }
2831 
2832     /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2833      * Generate MARK e1 e2 ... TUPLE
2834      */
2835     if (_Pickler_Write(self, &mark_op, 1) < 0)
2836         return -1;
2837 
2838     if (store_tuple_elements(self, obj, len) < 0)
2839         return -1;
2840 
2841     if (PyMemoTable_Get(self->memo, obj)) {
2842         /* pop the stack stuff we pushed */
2843         if (self->bin) {
2844             if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2845                 return -1;
2846         }
2847         else {
2848             /* Note that we pop one more than len, to remove
2849              * the MARK too.
2850              */
2851             for (i = 0; i <= len; i++)
2852                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2853                     return -1;
2854         }
2855         /* fetch from memo */
2856         if (memo_get(self, obj) < 0)
2857             return -1;
2858 
2859         return 0;
2860     }
2861     else { /* Not recursive. */
2862         if (_Pickler_Write(self, &tuple_op, 1) < 0)
2863             return -1;
2864     }
2865 
2866   memoize:
2867     if (memo_put(self, obj) < 0)
2868         return -1;
2869 
2870     return 0;
2871 }
2872 
2873 /* iter is an iterator giving items, and we batch up chunks of
2874  *     MARK item item ... item APPENDS
2875  * opcode sequences.  Calling code should have arranged to first create an
2876  * empty list, or list-like object, for the APPENDS to operate on.
2877  * Returns 0 on success, <0 on error.
2878  */
2879 static int
batch_list(PicklerObject * self,PyObject * iter)2880 batch_list(PicklerObject *self, PyObject *iter)
2881 {
2882     PyObject *obj = NULL;
2883     PyObject *firstitem = NULL;
2884     int i, n;
2885 
2886     const char mark_op = MARK;
2887     const char append_op = APPEND;
2888     const char appends_op = APPENDS;
2889 
2890     assert(iter != NULL);
2891 
2892     /* XXX: I think this function could be made faster by avoiding the
2893        iterator interface and fetching objects directly from list using
2894        PyList_GET_ITEM.
2895     */
2896 
2897     if (self->proto == 0) {
2898         /* APPENDS isn't available; do one at a time. */
2899         for (;;) {
2900             obj = PyIter_Next(iter);
2901             if (obj == NULL) {
2902                 if (PyErr_Occurred())
2903                     return -1;
2904                 break;
2905             }
2906             i = save(self, obj, 0);
2907             Py_DECREF(obj);
2908             if (i < 0)
2909                 return -1;
2910             if (_Pickler_Write(self, &append_op, 1) < 0)
2911                 return -1;
2912         }
2913         return 0;
2914     }
2915 
2916     /* proto > 0:  write in batches of BATCHSIZE. */
2917     do {
2918         /* Get first item */
2919         firstitem = PyIter_Next(iter);
2920         if (firstitem == NULL) {
2921             if (PyErr_Occurred())
2922                 goto error;
2923 
2924             /* nothing more to add */
2925             break;
2926         }
2927 
2928         /* Try to get a second item */
2929         obj = PyIter_Next(iter);
2930         if (obj == NULL) {
2931             if (PyErr_Occurred())
2932                 goto error;
2933 
2934             /* Only one item to write */
2935             if (save(self, firstitem, 0) < 0)
2936                 goto error;
2937             if (_Pickler_Write(self, &append_op, 1) < 0)
2938                 goto error;
2939             Py_CLEAR(firstitem);
2940             break;
2941         }
2942 
2943         /* More than one item to write */
2944 
2945         /* Pump out MARK, items, APPENDS. */
2946         if (_Pickler_Write(self, &mark_op, 1) < 0)
2947             goto error;
2948 
2949         if (save(self, firstitem, 0) < 0)
2950             goto error;
2951         Py_CLEAR(firstitem);
2952         n = 1;
2953 
2954         /* Fetch and save up to BATCHSIZE items */
2955         while (obj) {
2956             if (save(self, obj, 0) < 0)
2957                 goto error;
2958             Py_CLEAR(obj);
2959             n += 1;
2960 
2961             if (n == BATCHSIZE)
2962                 break;
2963 
2964             obj = PyIter_Next(iter);
2965             if (obj == NULL) {
2966                 if (PyErr_Occurred())
2967                     goto error;
2968                 break;
2969             }
2970         }
2971 
2972         if (_Pickler_Write(self, &appends_op, 1) < 0)
2973             goto error;
2974 
2975     } while (n == BATCHSIZE);
2976     return 0;
2977 
2978   error:
2979     Py_XDECREF(firstitem);
2980     Py_XDECREF(obj);
2981     return -1;
2982 }
2983 
2984 /* This is a variant of batch_list() above, specialized for lists (with no
2985  * support for list subclasses). Like batch_list(), we batch up chunks of
2986  *     MARK item item ... item APPENDS
2987  * opcode sequences.  Calling code should have arranged to first create an
2988  * empty list, or list-like object, for the APPENDS to operate on.
2989  * Returns 0 on success, -1 on error.
2990  *
2991  * This version is considerably faster than batch_list(), if less general.
2992  *
2993  * Note that this only works for protocols > 0.
2994  */
2995 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2996 batch_list_exact(PicklerObject *self, PyObject *obj)
2997 {
2998     PyObject *item = NULL;
2999     Py_ssize_t this_batch, total;
3000 
3001     const char append_op = APPEND;
3002     const char appends_op = APPENDS;
3003     const char mark_op = MARK;
3004 
3005     assert(obj != NULL);
3006     assert(self->proto > 0);
3007     assert(PyList_CheckExact(obj));
3008 
3009     if (PyList_GET_SIZE(obj) == 1) {
3010         item = PyList_GET_ITEM(obj, 0);
3011         if (save(self, item, 0) < 0)
3012             return -1;
3013         if (_Pickler_Write(self, &append_op, 1) < 0)
3014             return -1;
3015         return 0;
3016     }
3017 
3018     /* Write in batches of BATCHSIZE. */
3019     total = 0;
3020     do {
3021         this_batch = 0;
3022         if (_Pickler_Write(self, &mark_op, 1) < 0)
3023             return -1;
3024         while (total < PyList_GET_SIZE(obj)) {
3025             item = PyList_GET_ITEM(obj, total);
3026             if (save(self, item, 0) < 0)
3027                 return -1;
3028             total++;
3029             if (++this_batch == BATCHSIZE)
3030                 break;
3031         }
3032         if (_Pickler_Write(self, &appends_op, 1) < 0)
3033             return -1;
3034 
3035     } while (total < PyList_GET_SIZE(obj));
3036 
3037     return 0;
3038 }
3039 
3040 static int
save_list(PicklerObject * self,PyObject * obj)3041 save_list(PicklerObject *self, PyObject *obj)
3042 {
3043     char header[3];
3044     Py_ssize_t len;
3045     int status = 0;
3046 
3047     if (self->fast && !fast_save_enter(self, obj))
3048         goto error;
3049 
3050     /* Create an empty list. */
3051     if (self->bin) {
3052         header[0] = EMPTY_LIST;
3053         len = 1;
3054     }
3055     else {
3056         header[0] = MARK;
3057         header[1] = LIST;
3058         len = 2;
3059     }
3060 
3061     if (_Pickler_Write(self, header, len) < 0)
3062         goto error;
3063 
3064     /* Get list length, and bow out early if empty. */
3065     if ((len = PyList_Size(obj)) < 0)
3066         goto error;
3067 
3068     if (memo_put(self, obj) < 0)
3069         goto error;
3070 
3071     if (len != 0) {
3072         /* Materialize the list elements. */
3073         if (PyList_CheckExact(obj) && self->proto > 0) {
3074             if (Py_EnterRecursiveCall(" while pickling an object"))
3075                 goto error;
3076             status = batch_list_exact(self, obj);
3077             Py_LeaveRecursiveCall();
3078         } else {
3079             PyObject *iter = PyObject_GetIter(obj);
3080             if (iter == NULL)
3081                 goto error;
3082 
3083             if (Py_EnterRecursiveCall(" while pickling an object")) {
3084                 Py_DECREF(iter);
3085                 goto error;
3086             }
3087             status = batch_list(self, iter);
3088             Py_LeaveRecursiveCall();
3089             Py_DECREF(iter);
3090         }
3091     }
3092     if (0) {
3093   error:
3094         status = -1;
3095     }
3096 
3097     if (self->fast && !fast_save_leave(self, obj))
3098         status = -1;
3099 
3100     return status;
3101 }
3102 
3103 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3104  *     MARK key value ... key value SETITEMS
3105  * opcode sequences.  Calling code should have arranged to first create an
3106  * empty dict, or dict-like object, for the SETITEMS to operate on.
3107  * Returns 0 on success, <0 on error.
3108  *
3109  * This is very much like batch_list().  The difference between saving
3110  * elements directly, and picking apart two-tuples, is so long-winded at
3111  * the C level, though, that attempts to combine these routines were too
3112  * ugly to bear.
3113  */
3114 static int
batch_dict(PicklerObject * self,PyObject * iter)3115 batch_dict(PicklerObject *self, PyObject *iter)
3116 {
3117     PyObject *obj = NULL;
3118     PyObject *firstitem = NULL;
3119     int i, n;
3120 
3121     const char mark_op = MARK;
3122     const char setitem_op = SETITEM;
3123     const char setitems_op = SETITEMS;
3124 
3125     assert(iter != NULL);
3126 
3127     if (self->proto == 0) {
3128         /* SETITEMS isn't available; do one at a time. */
3129         for (;;) {
3130             obj = PyIter_Next(iter);
3131             if (obj == NULL) {
3132                 if (PyErr_Occurred())
3133                     return -1;
3134                 break;
3135             }
3136             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3137                 PyErr_SetString(PyExc_TypeError, "dict items "
3138                                 "iterator must return 2-tuples");
3139                 return -1;
3140             }
3141             i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3142             if (i >= 0)
3143                 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3144             Py_DECREF(obj);
3145             if (i < 0)
3146                 return -1;
3147             if (_Pickler_Write(self, &setitem_op, 1) < 0)
3148                 return -1;
3149         }
3150         return 0;
3151     }
3152 
3153     /* proto > 0:  write in batches of BATCHSIZE. */
3154     do {
3155         /* Get first item */
3156         firstitem = PyIter_Next(iter);
3157         if (firstitem == NULL) {
3158             if (PyErr_Occurred())
3159                 goto error;
3160 
3161             /* nothing more to add */
3162             break;
3163         }
3164         if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3165             PyErr_SetString(PyExc_TypeError, "dict items "
3166                                 "iterator must return 2-tuples");
3167             goto error;
3168         }
3169 
3170         /* Try to get a second item */
3171         obj = PyIter_Next(iter);
3172         if (obj == NULL) {
3173             if (PyErr_Occurred())
3174                 goto error;
3175 
3176             /* Only one item to write */
3177             if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3178                 goto error;
3179             if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3180                 goto error;
3181             if (_Pickler_Write(self, &setitem_op, 1) < 0)
3182                 goto error;
3183             Py_CLEAR(firstitem);
3184             break;
3185         }
3186 
3187         /* More than one item to write */
3188 
3189         /* Pump out MARK, items, SETITEMS. */
3190         if (_Pickler_Write(self, &mark_op, 1) < 0)
3191             goto error;
3192 
3193         if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3194             goto error;
3195         if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3196             goto error;
3197         Py_CLEAR(firstitem);
3198         n = 1;
3199 
3200         /* Fetch and save up to BATCHSIZE items */
3201         while (obj) {
3202             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3203                 PyErr_SetString(PyExc_TypeError, "dict items "
3204                     "iterator must return 2-tuples");
3205                 goto error;
3206             }
3207             if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3208                 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3209                 goto error;
3210             Py_CLEAR(obj);
3211             n += 1;
3212 
3213             if (n == BATCHSIZE)
3214                 break;
3215 
3216             obj = PyIter_Next(iter);
3217             if (obj == NULL) {
3218                 if (PyErr_Occurred())
3219                     goto error;
3220                 break;
3221             }
3222         }
3223 
3224         if (_Pickler_Write(self, &setitems_op, 1) < 0)
3225             goto error;
3226 
3227     } while (n == BATCHSIZE);
3228     return 0;
3229 
3230   error:
3231     Py_XDECREF(firstitem);
3232     Py_XDECREF(obj);
3233     return -1;
3234 }
3235 
3236 /* This is a variant of batch_dict() above that specializes for dicts, with no
3237  * support for dict subclasses. Like batch_dict(), we batch up chunks of
3238  *     MARK key value ... key value SETITEMS
3239  * opcode sequences.  Calling code should have arranged to first create an
3240  * empty dict, or dict-like object, for the SETITEMS to operate on.
3241  * Returns 0 on success, -1 on error.
3242  *
3243  * Note that this currently doesn't work for protocol 0.
3244  */
3245 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)3246 batch_dict_exact(PicklerObject *self, PyObject *obj)
3247 {
3248     PyObject *key = NULL, *value = NULL;
3249     int i;
3250     Py_ssize_t dict_size, ppos = 0;
3251 
3252     const char mark_op = MARK;
3253     const char setitem_op = SETITEM;
3254     const char setitems_op = SETITEMS;
3255 
3256     assert(obj != NULL && PyDict_CheckExact(obj));
3257     assert(self->proto > 0);
3258 
3259     dict_size = PyDict_GET_SIZE(obj);
3260 
3261     /* Special-case len(d) == 1 to save space. */
3262     if (dict_size == 1) {
3263         PyDict_Next(obj, &ppos, &key, &value);
3264         if (save(self, key, 0) < 0)
3265             return -1;
3266         if (save(self, value, 0) < 0)
3267             return -1;
3268         if (_Pickler_Write(self, &setitem_op, 1) < 0)
3269             return -1;
3270         return 0;
3271     }
3272 
3273     /* Write in batches of BATCHSIZE. */
3274     do {
3275         i = 0;
3276         if (_Pickler_Write(self, &mark_op, 1) < 0)
3277             return -1;
3278         while (PyDict_Next(obj, &ppos, &key, &value)) {
3279             if (save(self, key, 0) < 0)
3280                 return -1;
3281             if (save(self, value, 0) < 0)
3282                 return -1;
3283             if (++i == BATCHSIZE)
3284                 break;
3285         }
3286         if (_Pickler_Write(self, &setitems_op, 1) < 0)
3287             return -1;
3288         if (PyDict_GET_SIZE(obj) != dict_size) {
3289             PyErr_Format(
3290                 PyExc_RuntimeError,
3291                 "dictionary changed size during iteration");
3292             return -1;
3293         }
3294 
3295     } while (i == BATCHSIZE);
3296     return 0;
3297 }
3298 
3299 static int
save_dict(PicklerObject * self,PyObject * obj)3300 save_dict(PicklerObject *self, PyObject *obj)
3301 {
3302     PyObject *items, *iter;
3303     char header[3];
3304     Py_ssize_t len;
3305     int status = 0;
3306     assert(PyDict_Check(obj));
3307 
3308     if (self->fast && !fast_save_enter(self, obj))
3309         goto error;
3310 
3311     /* Create an empty dict. */
3312     if (self->bin) {
3313         header[0] = EMPTY_DICT;
3314         len = 1;
3315     }
3316     else {
3317         header[0] = MARK;
3318         header[1] = DICT;
3319         len = 2;
3320     }
3321 
3322     if (_Pickler_Write(self, header, len) < 0)
3323         goto error;
3324 
3325     if (memo_put(self, obj) < 0)
3326         goto error;
3327 
3328     if (PyDict_GET_SIZE(obj)) {
3329         /* Save the dict items. */
3330         if (PyDict_CheckExact(obj) && self->proto > 0) {
3331             /* We can take certain shortcuts if we know this is a dict and
3332                not a dict subclass. */
3333             if (Py_EnterRecursiveCall(" while pickling an object"))
3334                 goto error;
3335             status = batch_dict_exact(self, obj);
3336             Py_LeaveRecursiveCall();
3337         } else {
3338             _Py_IDENTIFIER(items);
3339 
3340             items = _PyObject_CallMethodId(obj, &PyId_items, NULL);
3341             if (items == NULL)
3342                 goto error;
3343             iter = PyObject_GetIter(items);
3344             Py_DECREF(items);
3345             if (iter == NULL)
3346                 goto error;
3347             if (Py_EnterRecursiveCall(" while pickling an object")) {
3348                 Py_DECREF(iter);
3349                 goto error;
3350             }
3351             status = batch_dict(self, iter);
3352             Py_LeaveRecursiveCall();
3353             Py_DECREF(iter);
3354         }
3355     }
3356 
3357     if (0) {
3358   error:
3359         status = -1;
3360     }
3361 
3362     if (self->fast && !fast_save_leave(self, obj))
3363         status = -1;
3364 
3365     return status;
3366 }
3367 
3368 static int
save_set(PicklerObject * self,PyObject * obj)3369 save_set(PicklerObject *self, PyObject *obj)
3370 {
3371     PyObject *item;
3372     int i;
3373     Py_ssize_t set_size, ppos = 0;
3374     Py_hash_t hash;
3375 
3376     const char empty_set_op = EMPTY_SET;
3377     const char mark_op = MARK;
3378     const char additems_op = ADDITEMS;
3379 
3380     if (self->proto < 4) {
3381         PyObject *items;
3382         PyObject *reduce_value;
3383         int status;
3384 
3385         items = PySequence_List(obj);
3386         if (items == NULL) {
3387             return -1;
3388         }
3389         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3390         Py_DECREF(items);
3391         if (reduce_value == NULL) {
3392             return -1;
3393         }
3394         /* save_reduce() will memoize the object automatically. */
3395         status = save_reduce(self, reduce_value, obj);
3396         Py_DECREF(reduce_value);
3397         return status;
3398     }
3399 
3400     if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3401         return -1;
3402 
3403     if (memo_put(self, obj) < 0)
3404         return -1;
3405 
3406     set_size = PySet_GET_SIZE(obj);
3407     if (set_size == 0)
3408         return 0;  /* nothing to do */
3409 
3410     /* Write in batches of BATCHSIZE. */
3411     do {
3412         i = 0;
3413         if (_Pickler_Write(self, &mark_op, 1) < 0)
3414             return -1;
3415         while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3416             if (save(self, item, 0) < 0)
3417                 return -1;
3418             if (++i == BATCHSIZE)
3419                 break;
3420         }
3421         if (_Pickler_Write(self, &additems_op, 1) < 0)
3422             return -1;
3423         if (PySet_GET_SIZE(obj) != set_size) {
3424             PyErr_Format(
3425                 PyExc_RuntimeError,
3426                 "set changed size during iteration");
3427             return -1;
3428         }
3429     } while (i == BATCHSIZE);
3430 
3431     return 0;
3432 }
3433 
3434 static int
save_frozenset(PicklerObject * self,PyObject * obj)3435 save_frozenset(PicklerObject *self, PyObject *obj)
3436 {
3437     PyObject *iter;
3438 
3439     const char mark_op = MARK;
3440     const char frozenset_op = FROZENSET;
3441 
3442     if (self->fast && !fast_save_enter(self, obj))
3443         return -1;
3444 
3445     if (self->proto < 4) {
3446         PyObject *items;
3447         PyObject *reduce_value;
3448         int status;
3449 
3450         items = PySequence_List(obj);
3451         if (items == NULL) {
3452             return -1;
3453         }
3454         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3455                                      items);
3456         Py_DECREF(items);
3457         if (reduce_value == NULL) {
3458             return -1;
3459         }
3460         /* save_reduce() will memoize the object automatically. */
3461         status = save_reduce(self, reduce_value, obj);
3462         Py_DECREF(reduce_value);
3463         return status;
3464     }
3465 
3466     if (_Pickler_Write(self, &mark_op, 1) < 0)
3467         return -1;
3468 
3469     iter = PyObject_GetIter(obj);
3470     if (iter == NULL) {
3471         return -1;
3472     }
3473     for (;;) {
3474         PyObject *item;
3475 
3476         item = PyIter_Next(iter);
3477         if (item == NULL) {
3478             if (PyErr_Occurred()) {
3479                 Py_DECREF(iter);
3480                 return -1;
3481             }
3482             break;
3483         }
3484         if (save(self, item, 0) < 0) {
3485             Py_DECREF(item);
3486             Py_DECREF(iter);
3487             return -1;
3488         }
3489         Py_DECREF(item);
3490     }
3491     Py_DECREF(iter);
3492 
3493     /* If the object is already in the memo, this means it is
3494        recursive. In this case, throw away everything we put on the
3495        stack, and fetch the object back from the memo. */
3496     if (PyMemoTable_Get(self->memo, obj)) {
3497         const char pop_mark_op = POP_MARK;
3498 
3499         if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3500             return -1;
3501         if (memo_get(self, obj) < 0)
3502             return -1;
3503         return 0;
3504     }
3505 
3506     if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3507         return -1;
3508     if (memo_put(self, obj) < 0)
3509         return -1;
3510 
3511     return 0;
3512 }
3513 
3514 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3515 fix_imports(PyObject **module_name, PyObject **global_name)
3516 {
3517     PyObject *key;
3518     PyObject *item;
3519     PickleState *st = _Pickle_GetGlobalState();
3520 
3521     key = PyTuple_Pack(2, *module_name, *global_name);
3522     if (key == NULL)
3523         return -1;
3524     item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3525     Py_DECREF(key);
3526     if (item) {
3527         PyObject *fixed_module_name;
3528         PyObject *fixed_global_name;
3529 
3530         if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3531             PyErr_Format(PyExc_RuntimeError,
3532                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3533                          "should be 2-tuples, not %.200s",
3534                          Py_TYPE(item)->tp_name);
3535             return -1;
3536         }
3537         fixed_module_name = PyTuple_GET_ITEM(item, 0);
3538         fixed_global_name = PyTuple_GET_ITEM(item, 1);
3539         if (!PyUnicode_Check(fixed_module_name) ||
3540             !PyUnicode_Check(fixed_global_name)) {
3541             PyErr_Format(PyExc_RuntimeError,
3542                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3543                          "should be pairs of str, not (%.200s, %.200s)",
3544                          Py_TYPE(fixed_module_name)->tp_name,
3545                          Py_TYPE(fixed_global_name)->tp_name);
3546             return -1;
3547         }
3548 
3549         Py_CLEAR(*module_name);
3550         Py_CLEAR(*global_name);
3551         Py_INCREF(fixed_module_name);
3552         Py_INCREF(fixed_global_name);
3553         *module_name = fixed_module_name;
3554         *global_name = fixed_global_name;
3555         return 0;
3556     }
3557     else if (PyErr_Occurred()) {
3558         return -1;
3559     }
3560 
3561     item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3562     if (item) {
3563         if (!PyUnicode_Check(item)) {
3564             PyErr_Format(PyExc_RuntimeError,
3565                          "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3566                          "should be strings, not %.200s",
3567                          Py_TYPE(item)->tp_name);
3568             return -1;
3569         }
3570         Py_INCREF(item);
3571         Py_XSETREF(*module_name, item);
3572     }
3573     else if (PyErr_Occurred()) {
3574         return -1;
3575     }
3576 
3577     return 0;
3578 }
3579 
3580 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3581 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3582 {
3583     PyObject *global_name = NULL;
3584     PyObject *module_name = NULL;
3585     PyObject *module = NULL;
3586     PyObject *parent = NULL;
3587     PyObject *dotted_path = NULL;
3588     PyObject *lastname = NULL;
3589     PyObject *cls;
3590     PickleState *st = _Pickle_GetGlobalState();
3591     int status = 0;
3592     _Py_IDENTIFIER(__name__);
3593     _Py_IDENTIFIER(__qualname__);
3594 
3595     const char global_op = GLOBAL;
3596 
3597     if (name) {
3598         Py_INCREF(name);
3599         global_name = name;
3600     }
3601     else {
3602         if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3603             goto error;
3604         if (global_name == NULL) {
3605             global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3606             if (global_name == NULL)
3607                 goto error;
3608         }
3609     }
3610 
3611     dotted_path = get_dotted_path(module, global_name);
3612     if (dotted_path == NULL)
3613         goto error;
3614     module_name = whichmodule(obj, dotted_path);
3615     if (module_name == NULL)
3616         goto error;
3617 
3618     /* XXX: Change to use the import C API directly with level=0 to disallow
3619        relative imports.
3620 
3621        XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3622        builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3623        custom import functions (IMHO, this would be a nice security
3624        feature). The import C API would need to be extended to support the
3625        extra parameters of __import__ to fix that. */
3626     module = PyImport_Import(module_name);
3627     if (module == NULL) {
3628         PyErr_Format(st->PicklingError,
3629                      "Can't pickle %R: import of module %R failed",
3630                      obj, module_name);
3631         goto error;
3632     }
3633     lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3634     Py_INCREF(lastname);
3635     cls = get_deep_attribute(module, dotted_path, &parent);
3636     Py_CLEAR(dotted_path);
3637     if (cls == NULL) {
3638         PyErr_Format(st->PicklingError,
3639                      "Can't pickle %R: attribute lookup %S on %S failed",
3640                      obj, global_name, module_name);
3641         goto error;
3642     }
3643     if (cls != obj) {
3644         Py_DECREF(cls);
3645         PyErr_Format(st->PicklingError,
3646                      "Can't pickle %R: it's not the same object as %S.%S",
3647                      obj, module_name, global_name);
3648         goto error;
3649     }
3650     Py_DECREF(cls);
3651 
3652     if (self->proto >= 2) {
3653         /* See whether this is in the extension registry, and if
3654          * so generate an EXT opcode.
3655          */
3656         PyObject *extension_key;
3657         PyObject *code_obj;      /* extension code as Python object */
3658         long code;               /* extension code as C value */
3659         char pdata[5];
3660         Py_ssize_t n;
3661 
3662         extension_key = PyTuple_Pack(2, module_name, global_name);
3663         if (extension_key == NULL) {
3664             goto error;
3665         }
3666         code_obj = PyDict_GetItemWithError(st->extension_registry,
3667                                            extension_key);
3668         Py_DECREF(extension_key);
3669         /* The object is not registered in the extension registry.
3670            This is the most likely code path. */
3671         if (code_obj == NULL) {
3672             if (PyErr_Occurred()) {
3673                 goto error;
3674             }
3675             goto gen_global;
3676         }
3677 
3678         /* XXX: pickle.py doesn't check neither the type, nor the range
3679            of the value returned by the extension_registry. It should for
3680            consistency. */
3681 
3682         /* Verify code_obj has the right type and value. */
3683         if (!PyLong_Check(code_obj)) {
3684             PyErr_Format(st->PicklingError,
3685                          "Can't pickle %R: extension code %R isn't an integer",
3686                          obj, code_obj);
3687             goto error;
3688         }
3689         code = PyLong_AS_LONG(code_obj);
3690         if (code <= 0 || code > 0x7fffffffL) {
3691             if (!PyErr_Occurred())
3692                 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3693                              "code %ld is out of range", obj, code);
3694             goto error;
3695         }
3696 
3697         /* Generate an EXT opcode. */
3698         if (code <= 0xff) {
3699             pdata[0] = EXT1;
3700             pdata[1] = (unsigned char)code;
3701             n = 2;
3702         }
3703         else if (code <= 0xffff) {
3704             pdata[0] = EXT2;
3705             pdata[1] = (unsigned char)(code & 0xff);
3706             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3707             n = 3;
3708         }
3709         else {
3710             pdata[0] = EXT4;
3711             pdata[1] = (unsigned char)(code & 0xff);
3712             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3713             pdata[3] = (unsigned char)((code >> 16) & 0xff);
3714             pdata[4] = (unsigned char)((code >> 24) & 0xff);
3715             n = 5;
3716         }
3717 
3718         if (_Pickler_Write(self, pdata, n) < 0)
3719             goto error;
3720     }
3721     else {
3722   gen_global:
3723         if (parent == module) {
3724             Py_INCREF(lastname);
3725             Py_DECREF(global_name);
3726             global_name = lastname;
3727         }
3728         if (self->proto >= 4) {
3729             const char stack_global_op = STACK_GLOBAL;
3730 
3731             if (save(self, module_name, 0) < 0)
3732                 goto error;
3733             if (save(self, global_name, 0) < 0)
3734                 goto error;
3735 
3736             if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3737                 goto error;
3738         }
3739         else if (parent != module) {
3740             PickleState *st = _Pickle_GetGlobalState();
3741             PyObject *reduce_value = Py_BuildValue("(O(OO))",
3742                                         st->getattr, parent, lastname);
3743             if (reduce_value == NULL)
3744                 goto error;
3745             status = save_reduce(self, reduce_value, NULL);
3746             Py_DECREF(reduce_value);
3747             if (status < 0)
3748                 goto error;
3749         }
3750         else {
3751             /* Generate a normal global opcode if we are using a pickle
3752                protocol < 4, or if the object is not registered in the
3753                extension registry. */
3754             PyObject *encoded;
3755             PyObject *(*unicode_encoder)(PyObject *);
3756 
3757             if (_Pickler_Write(self, &global_op, 1) < 0)
3758                 goto error;
3759 
3760             /* For protocol < 3 and if the user didn't request against doing
3761                so, we convert module names to the old 2.x module names. */
3762             if (self->proto < 3 && self->fix_imports) {
3763                 if (fix_imports(&module_name, &global_name) < 0) {
3764                     goto error;
3765                 }
3766             }
3767 
3768             /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3769                both the module name and the global name using UTF-8. We do so
3770                only when we are using the pickle protocol newer than version
3771                3. This is to ensure compatibility with older Unpickler running
3772                on Python 2.x. */
3773             if (self->proto == 3) {
3774                 unicode_encoder = PyUnicode_AsUTF8String;
3775             }
3776             else {
3777                 unicode_encoder = PyUnicode_AsASCIIString;
3778             }
3779             encoded = unicode_encoder(module_name);
3780             if (encoded == NULL) {
3781                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3782                     PyErr_Format(st->PicklingError,
3783                                  "can't pickle module identifier '%S' using "
3784                                  "pickle protocol %i",
3785                                  module_name, self->proto);
3786                 goto error;
3787             }
3788             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3789                                PyBytes_GET_SIZE(encoded)) < 0) {
3790                 Py_DECREF(encoded);
3791                 goto error;
3792             }
3793             Py_DECREF(encoded);
3794             if(_Pickler_Write(self, "\n", 1) < 0)
3795                 goto error;
3796 
3797             /* Save the name of the module. */
3798             encoded = unicode_encoder(global_name);
3799             if (encoded == NULL) {
3800                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3801                     PyErr_Format(st->PicklingError,
3802                                  "can't pickle global identifier '%S' using "
3803                                  "pickle protocol %i",
3804                                  global_name, self->proto);
3805                 goto error;
3806             }
3807             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3808                                PyBytes_GET_SIZE(encoded)) < 0) {
3809                 Py_DECREF(encoded);
3810                 goto error;
3811             }
3812             Py_DECREF(encoded);
3813             if (_Pickler_Write(self, "\n", 1) < 0)
3814                 goto error;
3815         }
3816         /* Memoize the object. */
3817         if (memo_put(self, obj) < 0)
3818             goto error;
3819     }
3820 
3821     if (0) {
3822   error:
3823         status = -1;
3824     }
3825     Py_XDECREF(module_name);
3826     Py_XDECREF(global_name);
3827     Py_XDECREF(module);
3828     Py_XDECREF(parent);
3829     Py_XDECREF(dotted_path);
3830     Py_XDECREF(lastname);
3831 
3832     return status;
3833 }
3834 
3835 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3836 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3837 {
3838     PyObject *reduce_value;
3839     int status;
3840 
3841     reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3842     if (reduce_value == NULL) {
3843         return -1;
3844     }
3845     status = save_reduce(self, reduce_value, obj);
3846     Py_DECREF(reduce_value);
3847     return status;
3848 }
3849 
3850 static int
save_type(PicklerObject * self,PyObject * obj)3851 save_type(PicklerObject *self, PyObject *obj)
3852 {
3853     if (obj == (PyObject *)&_PyNone_Type) {
3854         return save_singleton_type(self, obj, Py_None);
3855     }
3856     else if (obj == (PyObject *)&PyEllipsis_Type) {
3857         return save_singleton_type(self, obj, Py_Ellipsis);
3858     }
3859     else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3860         return save_singleton_type(self, obj, Py_NotImplemented);
3861     }
3862     return save_global(self, obj, NULL);
3863 }
3864 
3865 static int
save_pers(PicklerObject * self,PyObject * obj)3866 save_pers(PicklerObject *self, PyObject *obj)
3867 {
3868     PyObject *pid = NULL;
3869     int status = 0;
3870 
3871     const char persid_op = PERSID;
3872     const char binpersid_op = BINPERSID;
3873 
3874     pid = call_method(self->pers_func, self->pers_func_self, obj);
3875     if (pid == NULL)
3876         return -1;
3877 
3878     if (pid != Py_None) {
3879         if (self->bin) {
3880             if (save(self, pid, 1) < 0 ||
3881                 _Pickler_Write(self, &binpersid_op, 1) < 0)
3882                 goto error;
3883         }
3884         else {
3885             PyObject *pid_str;
3886 
3887             pid_str = PyObject_Str(pid);
3888             if (pid_str == NULL)
3889                 goto error;
3890 
3891             /* XXX: Should it check whether the pid contains embedded
3892                newlines? */
3893             if (!PyUnicode_IS_ASCII(pid_str)) {
3894                 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3895                                 "persistent IDs in protocol 0 must be "
3896                                 "ASCII strings");
3897                 Py_DECREF(pid_str);
3898                 goto error;
3899             }
3900 
3901             if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3902                 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3903                                PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3904                 _Pickler_Write(self, "\n", 1) < 0) {
3905                 Py_DECREF(pid_str);
3906                 goto error;
3907             }
3908             Py_DECREF(pid_str);
3909         }
3910         status = 1;
3911     }
3912 
3913     if (0) {
3914   error:
3915         status = -1;
3916     }
3917     Py_XDECREF(pid);
3918 
3919     return status;
3920 }
3921 
3922 static PyObject *
get_class(PyObject * obj)3923 get_class(PyObject *obj)
3924 {
3925     PyObject *cls;
3926     _Py_IDENTIFIER(__class__);
3927 
3928     if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3929         cls = (PyObject *) Py_TYPE(obj);
3930         Py_INCREF(cls);
3931     }
3932     return cls;
3933 }
3934 
3935 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3936  * appropriate __reduce__ method for obj.
3937  */
3938 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3939 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3940 {
3941     PyObject *callable;
3942     PyObject *argtup;
3943     PyObject *state = NULL;
3944     PyObject *listitems = Py_None;
3945     PyObject *dictitems = Py_None;
3946     PyObject *state_setter = Py_None;
3947     PickleState *st = _Pickle_GetGlobalState();
3948     Py_ssize_t size;
3949     int use_newobj = 0, use_newobj_ex = 0;
3950 
3951     const char reduce_op = REDUCE;
3952     const char build_op = BUILD;
3953     const char newobj_op = NEWOBJ;
3954     const char newobj_ex_op = NEWOBJ_EX;
3955 
3956     size = PyTuple_Size(args);
3957     if (size < 2 || size > 6) {
3958         PyErr_SetString(st->PicklingError, "tuple returned by "
3959                         "__reduce__ must contain 2 through 6 elements");
3960         return -1;
3961     }
3962 
3963     if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3964                            &callable, &argtup, &state, &listitems, &dictitems,
3965                            &state_setter))
3966         return -1;
3967 
3968     if (!PyCallable_Check(callable)) {
3969         PyErr_SetString(st->PicklingError, "first item of the tuple "
3970                         "returned by __reduce__ must be callable");
3971         return -1;
3972     }
3973     if (!PyTuple_Check(argtup)) {
3974         PyErr_SetString(st->PicklingError, "second item of the tuple "
3975                         "returned by __reduce__ must be a tuple");
3976         return -1;
3977     }
3978 
3979     if (state == Py_None)
3980         state = NULL;
3981 
3982     if (listitems == Py_None)
3983         listitems = NULL;
3984     else if (!PyIter_Check(listitems)) {
3985         PyErr_Format(st->PicklingError, "fourth element of the tuple "
3986                      "returned by __reduce__ must be an iterator, not %s",
3987                      Py_TYPE(listitems)->tp_name);
3988         return -1;
3989     }
3990 
3991     if (dictitems == Py_None)
3992         dictitems = NULL;
3993     else if (!PyIter_Check(dictitems)) {
3994         PyErr_Format(st->PicklingError, "fifth element of the tuple "
3995                      "returned by __reduce__ must be an iterator, not %s",
3996                      Py_TYPE(dictitems)->tp_name);
3997         return -1;
3998     }
3999 
4000     if (state_setter == Py_None)
4001         state_setter = NULL;
4002     else if (!PyCallable_Check(state_setter)) {
4003         PyErr_Format(st->PicklingError, "sixth element of the tuple "
4004                      "returned by __reduce__ must be a function, not %s",
4005                      Py_TYPE(state_setter)->tp_name);
4006         return -1;
4007     }
4008 
4009     if (self->proto >= 2) {
4010         PyObject *name;
4011         _Py_IDENTIFIER(__name__);
4012 
4013         if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
4014             return -1;
4015         }
4016         if (name != NULL && PyUnicode_Check(name)) {
4017             _Py_IDENTIFIER(__newobj_ex__);
4018             use_newobj_ex = _PyUnicode_EqualToASCIIId(
4019                     name, &PyId___newobj_ex__);
4020             if (!use_newobj_ex) {
4021                 _Py_IDENTIFIER(__newobj__);
4022                 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
4023             }
4024         }
4025         Py_XDECREF(name);
4026     }
4027 
4028     if (use_newobj_ex) {
4029         PyObject *cls;
4030         PyObject *args;
4031         PyObject *kwargs;
4032 
4033         if (PyTuple_GET_SIZE(argtup) != 3) {
4034             PyErr_Format(st->PicklingError,
4035                          "length of the NEWOBJ_EX argument tuple must be "
4036                          "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
4037             return -1;
4038         }
4039 
4040         cls = PyTuple_GET_ITEM(argtup, 0);
4041         if (!PyType_Check(cls)) {
4042             PyErr_Format(st->PicklingError,
4043                          "first item from NEWOBJ_EX argument tuple must "
4044                          "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4045             return -1;
4046         }
4047         args = PyTuple_GET_ITEM(argtup, 1);
4048         if (!PyTuple_Check(args)) {
4049             PyErr_Format(st->PicklingError,
4050                          "second item from NEWOBJ_EX argument tuple must "
4051                          "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4052             return -1;
4053         }
4054         kwargs = PyTuple_GET_ITEM(argtup, 2);
4055         if (!PyDict_Check(kwargs)) {
4056             PyErr_Format(st->PicklingError,
4057                          "third item from NEWOBJ_EX argument tuple must "
4058                          "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4059             return -1;
4060         }
4061 
4062         if (self->proto >= 4) {
4063             if (save(self, cls, 0) < 0 ||
4064                 save(self, args, 0) < 0 ||
4065                 save(self, kwargs, 0) < 0 ||
4066                 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4067                 return -1;
4068             }
4069         }
4070         else {
4071             PyObject *newargs;
4072             PyObject *cls_new;
4073             Py_ssize_t i;
4074             _Py_IDENTIFIER(__new__);
4075 
4076             newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
4077             if (newargs == NULL)
4078                 return -1;
4079 
4080             cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
4081             if (cls_new == NULL) {
4082                 Py_DECREF(newargs);
4083                 return -1;
4084             }
4085             PyTuple_SET_ITEM(newargs, 0, cls_new);
4086             Py_INCREF(cls);
4087             PyTuple_SET_ITEM(newargs, 1, cls);
4088             for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
4089                 PyObject *item = PyTuple_GET_ITEM(args, i);
4090                 Py_INCREF(item);
4091                 PyTuple_SET_ITEM(newargs, i + 2, item);
4092             }
4093 
4094             callable = PyObject_Call(st->partial, newargs, kwargs);
4095             Py_DECREF(newargs);
4096             if (callable == NULL)
4097                 return -1;
4098 
4099             newargs = PyTuple_New(0);
4100             if (newargs == NULL) {
4101                 Py_DECREF(callable);
4102                 return -1;
4103             }
4104 
4105             if (save(self, callable, 0) < 0 ||
4106                 save(self, newargs, 0) < 0 ||
4107                 _Pickler_Write(self, &reduce_op, 1) < 0) {
4108                 Py_DECREF(newargs);
4109                 Py_DECREF(callable);
4110                 return -1;
4111             }
4112             Py_DECREF(newargs);
4113             Py_DECREF(callable);
4114         }
4115     }
4116     else if (use_newobj) {
4117         PyObject *cls;
4118         PyObject *newargtup;
4119         PyObject *obj_class;
4120         int p;
4121 
4122         /* Sanity checks. */
4123         if (PyTuple_GET_SIZE(argtup) < 1) {
4124             PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
4125             return -1;
4126         }
4127 
4128         cls = PyTuple_GET_ITEM(argtup, 0);
4129         if (!PyType_Check(cls)) {
4130             PyErr_SetString(st->PicklingError, "args[0] from "
4131                             "__newobj__ args is not a type");
4132             return -1;
4133         }
4134 
4135         if (obj != NULL) {
4136             obj_class = get_class(obj);
4137             if (obj_class == NULL) {
4138                 return -1;
4139             }
4140             p = obj_class != cls;
4141             Py_DECREF(obj_class);
4142             if (p) {
4143                 PyErr_SetString(st->PicklingError, "args[0] from "
4144                                 "__newobj__ args has the wrong class");
4145                 return -1;
4146             }
4147         }
4148         /* XXX: These calls save() are prone to infinite recursion. Imagine
4149            what happen if the value returned by the __reduce__() method of
4150            some extension type contains another object of the same type. Ouch!
4151 
4152            Here is a quick example, that I ran into, to illustrate what I
4153            mean:
4154 
4155              >>> import pickle, copyreg
4156              >>> copyreg.dispatch_table.pop(complex)
4157              >>> pickle.dumps(1+2j)
4158              Traceback (most recent call last):
4159                ...
4160              RecursionError: maximum recursion depth exceeded
4161 
4162            Removing the complex class from copyreg.dispatch_table made the
4163            __reduce_ex__() method emit another complex object:
4164 
4165              >>> (1+1j).__reduce_ex__(2)
4166              (<function __newobj__ at 0xb7b71c3c>,
4167                (<class 'complex'>, (1+1j)), None, None, None)
4168 
4169            Thus when save() was called on newargstup (the 2nd item) recursion
4170            ensued. Of course, the bug was in the complex class which had a
4171            broken __getnewargs__() that emitted another complex object. But,
4172            the point, here, is it is quite easy to end up with a broken reduce
4173            function. */
4174 
4175         /* Save the class and its __new__ arguments. */
4176         if (save(self, cls, 0) < 0)
4177             return -1;
4178 
4179         newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
4180         if (newargtup == NULL)
4181             return -1;
4182 
4183         p = save(self, newargtup, 0);
4184         Py_DECREF(newargtup);
4185         if (p < 0)
4186             return -1;
4187 
4188         /* Add NEWOBJ opcode. */
4189         if (_Pickler_Write(self, &newobj_op, 1) < 0)
4190             return -1;
4191     }
4192     else { /* Not using NEWOBJ. */
4193         if (save(self, callable, 0) < 0 ||
4194             save(self, argtup, 0) < 0 ||
4195             _Pickler_Write(self, &reduce_op, 1) < 0)
4196             return -1;
4197     }
4198 
4199     /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4200        the caller do not want to memoize the object. Not particularly useful,
4201        but that is to mimic the behavior save_reduce() in pickle.py when
4202        obj is None. */
4203     if (obj != NULL) {
4204         /* If the object is already in the memo, this means it is
4205            recursive. In this case, throw away everything we put on the
4206            stack, and fetch the object back from the memo. */
4207         if (PyMemoTable_Get(self->memo, obj)) {
4208             const char pop_op = POP;
4209 
4210             if (_Pickler_Write(self, &pop_op, 1) < 0)
4211                 return -1;
4212             if (memo_get(self, obj) < 0)
4213                 return -1;
4214 
4215             return 0;
4216         }
4217         else if (memo_put(self, obj) < 0)
4218             return -1;
4219     }
4220 
4221     if (listitems && batch_list(self, listitems) < 0)
4222         return -1;
4223 
4224     if (dictitems && batch_dict(self, dictitems) < 0)
4225         return -1;
4226 
4227     if (state) {
4228         if (state_setter == NULL) {
4229             if (save(self, state, 0) < 0 ||
4230                 _Pickler_Write(self, &build_op, 1) < 0)
4231                 return -1;
4232         }
4233         else {
4234 
4235             /* If a state_setter is specified, call it instead of load_build to
4236              * update obj's with its previous state.
4237              * The first 4 save/write instructions push state_setter and its
4238              * tuple of expected arguments (obj, state) onto the stack. The
4239              * REDUCE opcode triggers the state_setter(obj, state) function
4240              * call. Finally, because state-updating routines only do in-place
4241              * modification, the whole operation has to be stack-transparent.
4242              * Thus, we finally pop the call's output from the stack.*/
4243 
4244             const char tupletwo_op = TUPLE2;
4245             const char pop_op = POP;
4246             if (save(self, state_setter, 0) < 0 ||
4247                 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4248                 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4249                 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4250                 _Pickler_Write(self, &pop_op, 1) < 0)
4251                 return -1;
4252         }
4253     }
4254     return 0;
4255 }
4256 
4257 static int
save(PicklerObject * self,PyObject * obj,int pers_save)4258 save(PicklerObject *self, PyObject *obj, int pers_save)
4259 {
4260     PyTypeObject *type;
4261     PyObject *reduce_func = NULL;
4262     PyObject *reduce_value = NULL;
4263     int status = 0;
4264 
4265     if (_Pickler_OpcodeBoundary(self) < 0)
4266         return -1;
4267 
4268     /* The extra pers_save argument is necessary to avoid calling save_pers()
4269        on its returned object. */
4270     if (!pers_save && self->pers_func) {
4271         /* save_pers() returns:
4272             -1   to signal an error;
4273              0   if it did nothing successfully;
4274              1   if a persistent id was saved.
4275          */
4276         if ((status = save_pers(self, obj)) != 0)
4277             return status;
4278     }
4279 
4280     type = Py_TYPE(obj);
4281 
4282     /* The old cPickle had an optimization that used switch-case statement
4283        dispatching on the first letter of the type name.  This has was removed
4284        since benchmarks shown that this optimization was actually slowing
4285        things down. */
4286 
4287     /* Atom types; these aren't memoized, so don't check the memo. */
4288 
4289     if (obj == Py_None) {
4290         return save_none(self, obj);
4291     }
4292     else if (obj == Py_False || obj == Py_True) {
4293         return save_bool(self, obj);
4294     }
4295     else if (type == &PyLong_Type) {
4296         return save_long(self, obj);
4297     }
4298     else if (type == &PyFloat_Type) {
4299         return save_float(self, obj);
4300     }
4301 
4302     /* Check the memo to see if it has the object. If so, generate
4303        a GET (or BINGET) opcode, instead of pickling the object
4304        once again. */
4305     if (PyMemoTable_Get(self->memo, obj)) {
4306         return memo_get(self, obj);
4307     }
4308 
4309     if (type == &PyBytes_Type) {
4310         return save_bytes(self, obj);
4311     }
4312     else if (type == &PyUnicode_Type) {
4313         return save_unicode(self, obj);
4314     }
4315 
4316     /* We're only calling Py_EnterRecursiveCall here so that atomic
4317        types above are pickled faster. */
4318     if (Py_EnterRecursiveCall(" while pickling an object")) {
4319         return -1;
4320     }
4321 
4322     if (type == &PyDict_Type) {
4323         status = save_dict(self, obj);
4324         goto done;
4325     }
4326     else if (type == &PySet_Type) {
4327         status = save_set(self, obj);
4328         goto done;
4329     }
4330     else if (type == &PyFrozenSet_Type) {
4331         status = save_frozenset(self, obj);
4332         goto done;
4333     }
4334     else if (type == &PyList_Type) {
4335         status = save_list(self, obj);
4336         goto done;
4337     }
4338     else if (type == &PyTuple_Type) {
4339         status = save_tuple(self, obj);
4340         goto done;
4341     }
4342     else if (type == &PyByteArray_Type) {
4343         status = save_bytearray(self, obj);
4344         goto done;
4345     }
4346     else if (type == &PyPickleBuffer_Type) {
4347         status = save_picklebuffer(self, obj);
4348         goto done;
4349     }
4350 
4351     /* Now, check reducer_override.  If it returns NotImplemented,
4352      * fallback to save_type or save_global, and then perhaps to the
4353      * regular reduction mechanism.
4354      */
4355     if (self->reducer_override != NULL) {
4356         reduce_value = PyObject_CallFunctionObjArgs(self->reducer_override,
4357                                                     obj, NULL);
4358         if (reduce_value == NULL) {
4359             goto error;
4360         }
4361         if (reduce_value != Py_NotImplemented) {
4362             goto reduce;
4363         }
4364         Py_DECREF(reduce_value);
4365         reduce_value = NULL;
4366     }
4367 
4368     if (type == &PyType_Type) {
4369         status = save_type(self, obj);
4370         goto done;
4371     }
4372     else if (type == &PyFunction_Type) {
4373         status = save_global(self, obj, NULL);
4374         goto done;
4375     }
4376 
4377     /* XXX: This part needs some unit tests. */
4378 
4379     /* Get a reduction callable, and call it.  This may come from
4380      * self.dispatch_table, copyreg.dispatch_table, the object's
4381      * __reduce_ex__ method, or the object's __reduce__ method.
4382      */
4383     if (self->dispatch_table == NULL) {
4384         PickleState *st = _Pickle_GetGlobalState();
4385         reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4386                                               (PyObject *)type);
4387         if (reduce_func == NULL) {
4388             if (PyErr_Occurred()) {
4389                 goto error;
4390             }
4391         } else {
4392             /* PyDict_GetItemWithError() returns a borrowed reference.
4393                Increase the reference count to be consistent with
4394                PyObject_GetItem and _PyObject_GetAttrId used below. */
4395             Py_INCREF(reduce_func);
4396         }
4397     } else {
4398         reduce_func = PyObject_GetItem(self->dispatch_table,
4399                                        (PyObject *)type);
4400         if (reduce_func == NULL) {
4401             if (PyErr_ExceptionMatches(PyExc_KeyError))
4402                 PyErr_Clear();
4403             else
4404                 goto error;
4405         }
4406     }
4407     if (reduce_func != NULL) {
4408         Py_INCREF(obj);
4409         reduce_value = _Pickle_FastCall(reduce_func, obj);
4410     }
4411     else if (PyType_IsSubtype(type, &PyType_Type)) {
4412         status = save_global(self, obj, NULL);
4413         goto done;
4414     }
4415     else {
4416         _Py_IDENTIFIER(__reduce__);
4417         _Py_IDENTIFIER(__reduce_ex__);
4418 
4419         /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4420            automatically defined as __reduce__. While this is convenient, this
4421            make it impossible to know which method was actually called. Of
4422            course, this is not a big deal. But still, it would be nice to let
4423            the user know which method was called when something go
4424            wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4425            don't actually have to check for a __reduce__ method. */
4426 
4427         /* Check for a __reduce_ex__ method. */
4428         if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4429             goto error;
4430         }
4431         if (reduce_func != NULL) {
4432             PyObject *proto;
4433             proto = PyLong_FromLong(self->proto);
4434             if (proto != NULL) {
4435                 reduce_value = _Pickle_FastCall(reduce_func, proto);
4436             }
4437         }
4438         else {
4439             /* Check for a __reduce__ method. */
4440             if (_PyObject_LookupAttrId(obj, &PyId___reduce__, &reduce_func) < 0) {
4441                 goto error;
4442             }
4443             if (reduce_func != NULL) {
4444                 reduce_value = _PyObject_CallNoArg(reduce_func);
4445             }
4446             else {
4447                 PickleState *st = _Pickle_GetGlobalState();
4448                 PyErr_Format(st->PicklingError,
4449                              "can't pickle '%.200s' object: %R",
4450                              type->tp_name, obj);
4451                 goto error;
4452             }
4453         }
4454     }
4455 
4456     if (reduce_value == NULL)
4457         goto error;
4458 
4459   reduce:
4460     if (PyUnicode_Check(reduce_value)) {
4461         status = save_global(self, obj, reduce_value);
4462         goto done;
4463     }
4464 
4465     if (!PyTuple_Check(reduce_value)) {
4466         PickleState *st = _Pickle_GetGlobalState();
4467         PyErr_SetString(st->PicklingError,
4468                         "__reduce__ must return a string or tuple");
4469         goto error;
4470     }
4471 
4472     status = save_reduce(self, reduce_value, obj);
4473 
4474     if (0) {
4475   error:
4476         status = -1;
4477     }
4478   done:
4479 
4480     Py_LeaveRecursiveCall();
4481     Py_XDECREF(reduce_func);
4482     Py_XDECREF(reduce_value);
4483 
4484     return status;
4485 }
4486 
4487 static int
dump(PicklerObject * self,PyObject * obj)4488 dump(PicklerObject *self, PyObject *obj)
4489 {
4490     const char stop_op = STOP;
4491     int status = -1;
4492     PyObject *tmp;
4493     _Py_IDENTIFIER(reducer_override);
4494 
4495     if (_PyObject_LookupAttrId((PyObject *)self, &PyId_reducer_override,
4496                                &tmp) < 0) {
4497       goto error;
4498     }
4499     /* Cache the reducer_override method, if it exists. */
4500     if (tmp != NULL) {
4501         Py_XSETREF(self->reducer_override, tmp);
4502     }
4503     else {
4504         Py_CLEAR(self->reducer_override);
4505     }
4506 
4507     if (self->proto >= 2) {
4508         char header[2];
4509 
4510         header[0] = PROTO;
4511         assert(self->proto >= 0 && self->proto < 256);
4512         header[1] = (unsigned char)self->proto;
4513         if (_Pickler_Write(self, header, 2) < 0)
4514             goto error;
4515         if (self->proto >= 4)
4516             self->framing = 1;
4517     }
4518 
4519     if (save(self, obj, 0) < 0 ||
4520         _Pickler_Write(self, &stop_op, 1) < 0 ||
4521         _Pickler_CommitFrame(self) < 0)
4522         goto error;
4523 
4524     // Success
4525     status = 0;
4526 
4527   error:
4528     self->framing = 0;
4529 
4530     /* Break the reference cycle we generated at the beginning this function
4531      * call when setting the reducer_override attribute of the Pickler instance
4532      * to a bound method of the same instance. This is important as the Pickler
4533      * instance holds a reference to each object it has pickled (through its
4534      * memo): thus, these objects wont be garbage-collected as long as the
4535      * Pickler itself is not collected. */
4536     Py_CLEAR(self->reducer_override);
4537     return status;
4538 }
4539 
4540 /*[clinic input]
4541 
4542 _pickle.Pickler.clear_memo
4543 
4544 Clears the pickler's "memo".
4545 
4546 The memo is the data structure that remembers which objects the
4547 pickler has already seen, so that shared or recursive objects are
4548 pickled by reference and not by value.  This method is useful when
4549 re-using picklers.
4550 [clinic start generated code]*/
4551 
4552 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4553 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4554 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4555 {
4556     if (self->memo)
4557         PyMemoTable_Clear(self->memo);
4558 
4559     Py_RETURN_NONE;
4560 }
4561 
4562 /*[clinic input]
4563 
4564 _pickle.Pickler.dump
4565 
4566   obj: object
4567   /
4568 
4569 Write a pickled representation of the given object to the open file.
4570 [clinic start generated code]*/
4571 
4572 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4573 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4574 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4575 {
4576     /* Check whether the Pickler was initialized correctly (issue3664).
4577        Developers often forget to call __init__() in their subclasses, which
4578        would trigger a segfault without this check. */
4579     if (self->write == NULL) {
4580         PickleState *st = _Pickle_GetGlobalState();
4581         PyErr_Format(st->PicklingError,
4582                      "Pickler.__init__() was not called by %s.__init__()",
4583                      Py_TYPE(self)->tp_name);
4584         return NULL;
4585     }
4586 
4587     if (_Pickler_ClearBuffer(self) < 0)
4588         return NULL;
4589 
4590     if (dump(self, obj) < 0)
4591         return NULL;
4592 
4593     if (_Pickler_FlushToFile(self) < 0)
4594         return NULL;
4595 
4596     Py_RETURN_NONE;
4597 }
4598 
4599 /*[clinic input]
4600 
4601 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4602 
4603 Returns size in memory, in bytes.
4604 [clinic start generated code]*/
4605 
4606 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4607 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4608 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4609 {
4610     Py_ssize_t res, s;
4611 
4612     res = _PyObject_SIZE(Py_TYPE(self));
4613     if (self->memo != NULL) {
4614         res += sizeof(PyMemoTable);
4615         res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4616     }
4617     if (self->output_buffer != NULL) {
4618         s = _PySys_GetSizeOf(self->output_buffer);
4619         if (s == -1)
4620             return -1;
4621         res += s;
4622     }
4623     return res;
4624 }
4625 
4626 static struct PyMethodDef Pickler_methods[] = {
4627     _PICKLE_PICKLER_DUMP_METHODDEF
4628     _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4629     _PICKLE_PICKLER___SIZEOF___METHODDEF
4630     {NULL, NULL}                /* sentinel */
4631 };
4632 
4633 static void
Pickler_dealloc(PicklerObject * self)4634 Pickler_dealloc(PicklerObject *self)
4635 {
4636     PyObject_GC_UnTrack(self);
4637 
4638     Py_XDECREF(self->output_buffer);
4639     Py_XDECREF(self->write);
4640     Py_XDECREF(self->pers_func);
4641     Py_XDECREF(self->dispatch_table);
4642     Py_XDECREF(self->fast_memo);
4643     Py_XDECREF(self->reducer_override);
4644     Py_XDECREF(self->buffer_callback);
4645 
4646     PyMemoTable_Del(self->memo);
4647 
4648     Py_TYPE(self)->tp_free((PyObject *)self);
4649 }
4650 
4651 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4652 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4653 {
4654     Py_VISIT(self->write);
4655     Py_VISIT(self->pers_func);
4656     Py_VISIT(self->dispatch_table);
4657     Py_VISIT(self->fast_memo);
4658     Py_VISIT(self->reducer_override);
4659     Py_VISIT(self->buffer_callback);
4660     return 0;
4661 }
4662 
4663 static int
Pickler_clear(PicklerObject * self)4664 Pickler_clear(PicklerObject *self)
4665 {
4666     Py_CLEAR(self->output_buffer);
4667     Py_CLEAR(self->write);
4668     Py_CLEAR(self->pers_func);
4669     Py_CLEAR(self->dispatch_table);
4670     Py_CLEAR(self->fast_memo);
4671     Py_CLEAR(self->reducer_override);
4672     Py_CLEAR(self->buffer_callback);
4673 
4674     if (self->memo != NULL) {
4675         PyMemoTable *memo = self->memo;
4676         self->memo = NULL;
4677         PyMemoTable_Del(memo);
4678     }
4679     return 0;
4680 }
4681 
4682 
4683 /*[clinic input]
4684 
4685 _pickle.Pickler.__init__
4686 
4687   file: object
4688   protocol: object = None
4689   fix_imports: bool = True
4690   buffer_callback: object = None
4691 
4692 This takes a binary file for writing a pickle data stream.
4693 
4694 The optional *protocol* argument tells the pickler to use the given
4695 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
4696 protocol is 4. It was introduced in Python 3.4, and is incompatible
4697 with previous versions.
4698 
4699 Specifying a negative protocol version selects the highest protocol
4700 version supported.  The higher the protocol used, the more recent the
4701 version of Python needed to read the pickle produced.
4702 
4703 The *file* argument must have a write() method that accepts a single
4704 bytes argument. It can thus be a file object opened for binary
4705 writing, an io.BytesIO instance, or any other custom object that meets
4706 this interface.
4707 
4708 If *fix_imports* is True and protocol is less than 3, pickle will try
4709 to map the new Python 3 names to the old module names used in Python
4710 2, so that the pickle data stream is readable with Python 2.
4711 
4712 If *buffer_callback* is None (the default), buffer views are
4713 serialized into *file* as part of the pickle stream.
4714 
4715 If *buffer_callback* is not None, then it can be called any number
4716 of times with a buffer view.  If the callback returns a false value
4717 (such as None), the given buffer is out-of-band; otherwise the
4718 buffer is serialized in-band, i.e. inside the pickle stream.
4719 
4720 It is an error if *buffer_callback* is not None and *protocol*
4721 is None or smaller than 5.
4722 
4723 [clinic start generated code]*/
4724 
4725 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)4726 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4727                               PyObject *protocol, int fix_imports,
4728                               PyObject *buffer_callback)
4729 /*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
4730 {
4731     _Py_IDENTIFIER(persistent_id);
4732     _Py_IDENTIFIER(dispatch_table);
4733 
4734     /* In case of multiple __init__() calls, clear previous content. */
4735     if (self->write != NULL)
4736         (void)Pickler_clear(self);
4737 
4738     if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4739         return -1;
4740 
4741     if (_Pickler_SetOutputStream(self, file) < 0)
4742         return -1;
4743 
4744     if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4745         return -1;
4746 
4747     /* memo and output_buffer may have already been created in _Pickler_New */
4748     if (self->memo == NULL) {
4749         self->memo = PyMemoTable_New();
4750         if (self->memo == NULL)
4751             return -1;
4752     }
4753     self->output_len = 0;
4754     if (self->output_buffer == NULL) {
4755         self->max_output_len = WRITE_BUF_SIZE;
4756         self->output_buffer = PyBytes_FromStringAndSize(NULL,
4757                                                         self->max_output_len);
4758         if (self->output_buffer == NULL)
4759             return -1;
4760     }
4761 
4762     self->fast = 0;
4763     self->fast_nesting = 0;
4764     self->fast_memo = NULL;
4765 
4766     if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4767                         &self->pers_func, &self->pers_func_self) < 0)
4768     {
4769         return -1;
4770     }
4771 
4772     if (_PyObject_LookupAttrId((PyObject *)self,
4773                                     &PyId_dispatch_table, &self->dispatch_table) < 0) {
4774         return -1;
4775     }
4776 
4777     return 0;
4778 }
4779 
4780 
4781 /* Define a proxy object for the Pickler's internal memo object. This is to
4782  * avoid breaking code like:
4783  *  pickler.memo.clear()
4784  * and
4785  *  pickler.memo = saved_memo
4786  * Is this a good idea? Not really, but we don't want to break code that uses
4787  * it. Note that we don't implement the entire mapping API here. This is
4788  * intentional, as these should be treated as black-box implementation details.
4789  */
4790 
4791 /*[clinic input]
4792 _pickle.PicklerMemoProxy.clear
4793 
4794 Remove all items from memo.
4795 [clinic start generated code]*/
4796 
4797 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4798 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4799 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4800 {
4801     if (self->pickler->memo)
4802         PyMemoTable_Clear(self->pickler->memo);
4803     Py_RETURN_NONE;
4804 }
4805 
4806 /*[clinic input]
4807 _pickle.PicklerMemoProxy.copy
4808 
4809 Copy the memo to a new object.
4810 [clinic start generated code]*/
4811 
4812 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4813 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4814 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4815 {
4816     PyMemoTable *memo;
4817     PyObject *new_memo = PyDict_New();
4818     if (new_memo == NULL)
4819         return NULL;
4820 
4821     memo = self->pickler->memo;
4822     for (size_t i = 0; i < memo->mt_allocated; ++i) {
4823         PyMemoEntry entry = memo->mt_table[i];
4824         if (entry.me_key != NULL) {
4825             int status;
4826             PyObject *key, *value;
4827 
4828             key = PyLong_FromVoidPtr(entry.me_key);
4829             value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4830 
4831             if (key == NULL || value == NULL) {
4832                 Py_XDECREF(key);
4833                 Py_XDECREF(value);
4834                 goto error;
4835             }
4836             status = PyDict_SetItem(new_memo, key, value);
4837             Py_DECREF(key);
4838             Py_DECREF(value);
4839             if (status < 0)
4840                 goto error;
4841         }
4842     }
4843     return new_memo;
4844 
4845   error:
4846     Py_XDECREF(new_memo);
4847     return NULL;
4848 }
4849 
4850 /*[clinic input]
4851 _pickle.PicklerMemoProxy.__reduce__
4852 
4853 Implement pickle support.
4854 [clinic start generated code]*/
4855 
4856 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4857 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4858 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4859 {
4860     PyObject *reduce_value, *dict_args;
4861     PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4862     if (contents == NULL)
4863         return NULL;
4864 
4865     reduce_value = PyTuple_New(2);
4866     if (reduce_value == NULL) {
4867         Py_DECREF(contents);
4868         return NULL;
4869     }
4870     dict_args = PyTuple_New(1);
4871     if (dict_args == NULL) {
4872         Py_DECREF(contents);
4873         Py_DECREF(reduce_value);
4874         return NULL;
4875     }
4876     PyTuple_SET_ITEM(dict_args, 0, contents);
4877     Py_INCREF((PyObject *)&PyDict_Type);
4878     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4879     PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4880     return reduce_value;
4881 }
4882 
4883 static PyMethodDef picklerproxy_methods[] = {
4884     _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4885     _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4886     _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4887     {NULL, NULL} /* sentinel */
4888 };
4889 
4890 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4891 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4892 {
4893     PyObject_GC_UnTrack(self);
4894     Py_XDECREF(self->pickler);
4895     PyObject_GC_Del((PyObject *)self);
4896 }
4897 
4898 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4899 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4900                           visitproc visit, void *arg)
4901 {
4902     Py_VISIT(self->pickler);
4903     return 0;
4904 }
4905 
4906 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4907 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4908 {
4909     Py_CLEAR(self->pickler);
4910     return 0;
4911 }
4912 
4913 static PyTypeObject PicklerMemoProxyType = {
4914     PyVarObject_HEAD_INIT(NULL, 0)
4915     "_pickle.PicklerMemoProxy",                 /*tp_name*/
4916     sizeof(PicklerMemoProxyObject),             /*tp_basicsize*/
4917     0,
4918     (destructor)PicklerMemoProxy_dealloc,       /* tp_dealloc */
4919     0,                                          /* tp_vectorcall_offset */
4920     0,                                          /* tp_getattr */
4921     0,                                          /* tp_setattr */
4922     0,                                          /* tp_as_async */
4923     0,                                          /* tp_repr */
4924     0,                                          /* tp_as_number */
4925     0,                                          /* tp_as_sequence */
4926     0,                                          /* tp_as_mapping */
4927     PyObject_HashNotImplemented,                /* tp_hash */
4928     0,                                          /* tp_call */
4929     0,                                          /* tp_str */
4930     PyObject_GenericGetAttr,                    /* tp_getattro */
4931     PyObject_GenericSetAttr,                    /* tp_setattro */
4932     0,                                          /* tp_as_buffer */
4933     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4934     0,                                          /* tp_doc */
4935     (traverseproc)PicklerMemoProxy_traverse,    /* tp_traverse */
4936     (inquiry)PicklerMemoProxy_clear,            /* tp_clear */
4937     0,                                          /* tp_richcompare */
4938     0,                                          /* tp_weaklistoffset */
4939     0,                                          /* tp_iter */
4940     0,                                          /* tp_iternext */
4941     picklerproxy_methods,                       /* tp_methods */
4942 };
4943 
4944 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4945 PicklerMemoProxy_New(PicklerObject *pickler)
4946 {
4947     PicklerMemoProxyObject *self;
4948 
4949     self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4950     if (self == NULL)
4951         return NULL;
4952     Py_INCREF(pickler);
4953     self->pickler = pickler;
4954     PyObject_GC_Track(self);
4955     return (PyObject *)self;
4956 }
4957 
4958 /*****************************************************************************/
4959 
4960 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4961 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4962 {
4963     return PicklerMemoProxy_New(self);
4964 }
4965 
4966 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4967 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4968 {
4969     PyMemoTable *new_memo = NULL;
4970 
4971     if (obj == NULL) {
4972         PyErr_SetString(PyExc_TypeError,
4973                         "attribute deletion is not supported");
4974         return -1;
4975     }
4976 
4977     if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4978         PicklerObject *pickler =
4979             ((PicklerMemoProxyObject *)obj)->pickler;
4980 
4981         new_memo = PyMemoTable_Copy(pickler->memo);
4982         if (new_memo == NULL)
4983             return -1;
4984     }
4985     else if (PyDict_Check(obj)) {
4986         Py_ssize_t i = 0;
4987         PyObject *key, *value;
4988 
4989         new_memo = PyMemoTable_New();
4990         if (new_memo == NULL)
4991             return -1;
4992 
4993         while (PyDict_Next(obj, &i, &key, &value)) {
4994             Py_ssize_t memo_id;
4995             PyObject *memo_obj;
4996 
4997             if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4998                 PyErr_SetString(PyExc_TypeError,
4999                                 "'memo' values must be 2-item tuples");
5000                 goto error;
5001             }
5002             memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
5003             if (memo_id == -1 && PyErr_Occurred())
5004                 goto error;
5005             memo_obj = PyTuple_GET_ITEM(value, 1);
5006             if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5007                 goto error;
5008         }
5009     }
5010     else {
5011         PyErr_Format(PyExc_TypeError,
5012                      "'memo' attribute must be a PicklerMemoProxy object "
5013                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
5014         return -1;
5015     }
5016 
5017     PyMemoTable_Del(self->memo);
5018     self->memo = new_memo;
5019 
5020     return 0;
5021 
5022   error:
5023     if (new_memo)
5024         PyMemoTable_Del(new_memo);
5025     return -1;
5026 }
5027 
5028 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))5029 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
5030 {
5031     if (self->pers_func == NULL) {
5032         PyErr_SetString(PyExc_AttributeError, "persistent_id");
5033         return NULL;
5034     }
5035     return reconstruct_method(self->pers_func, self->pers_func_self);
5036 }
5037 
5038 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))5039 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
5040 {
5041     if (value == NULL) {
5042         PyErr_SetString(PyExc_TypeError,
5043                         "attribute deletion is not supported");
5044         return -1;
5045     }
5046     if (!PyCallable_Check(value)) {
5047         PyErr_SetString(PyExc_TypeError,
5048                         "persistent_id must be a callable taking one argument");
5049         return -1;
5050     }
5051 
5052     self->pers_func_self = NULL;
5053     Py_INCREF(value);
5054     Py_XSETREF(self->pers_func, value);
5055 
5056     return 0;
5057 }
5058 
5059 static PyMemberDef Pickler_members[] = {
5060     {"bin", T_INT, offsetof(PicklerObject, bin)},
5061     {"fast", T_INT, offsetof(PicklerObject, fast)},
5062     {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
5063     {NULL}
5064 };
5065 
5066 static PyGetSetDef Pickler_getsets[] = {
5067     {"memo",          (getter)Pickler_get_memo,
5068                       (setter)Pickler_set_memo},
5069     {"persistent_id", (getter)Pickler_get_persid,
5070                       (setter)Pickler_set_persid},
5071     {NULL}
5072 };
5073 
5074 static PyTypeObject Pickler_Type = {
5075     PyVarObject_HEAD_INIT(NULL, 0)
5076     "_pickle.Pickler"  ,                /*tp_name*/
5077     sizeof(PicklerObject),              /*tp_basicsize*/
5078     0,                                  /*tp_itemsize*/
5079     (destructor)Pickler_dealloc,        /*tp_dealloc*/
5080     0,                                  /*tp_vectorcall_offset*/
5081     0,                                  /*tp_getattr*/
5082     0,                                  /*tp_setattr*/
5083     0,                                  /*tp_as_async*/
5084     0,                                  /*tp_repr*/
5085     0,                                  /*tp_as_number*/
5086     0,                                  /*tp_as_sequence*/
5087     0,                                  /*tp_as_mapping*/
5088     0,                                  /*tp_hash*/
5089     0,                                  /*tp_call*/
5090     0,                                  /*tp_str*/
5091     0,                                  /*tp_getattro*/
5092     0,                                  /*tp_setattro*/
5093     0,                                  /*tp_as_buffer*/
5094     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5095     _pickle_Pickler___init____doc__,    /*tp_doc*/
5096     (traverseproc)Pickler_traverse,     /*tp_traverse*/
5097     (inquiry)Pickler_clear,             /*tp_clear*/
5098     0,                                  /*tp_richcompare*/
5099     0,                                  /*tp_weaklistoffset*/
5100     0,                                  /*tp_iter*/
5101     0,                                  /*tp_iternext*/
5102     Pickler_methods,                    /*tp_methods*/
5103     Pickler_members,                    /*tp_members*/
5104     Pickler_getsets,                    /*tp_getset*/
5105     0,                                  /*tp_base*/
5106     0,                                  /*tp_dict*/
5107     0,                                  /*tp_descr_get*/
5108     0,                                  /*tp_descr_set*/
5109     0,                                  /*tp_dictoffset*/
5110     _pickle_Pickler___init__,           /*tp_init*/
5111     PyType_GenericAlloc,                /*tp_alloc*/
5112     PyType_GenericNew,                  /*tp_new*/
5113     PyObject_GC_Del,                    /*tp_free*/
5114     0,                                  /*tp_is_gc*/
5115 };
5116 
5117 /* Temporary helper for calling self.find_class().
5118 
5119    XXX: It would be nice to able to avoid Python function call overhead, by
5120    using directly the C version of find_class(), when find_class() is not
5121    overridden by a subclass. Although, this could become rather hackish. A
5122    simpler optimization would be to call the C function when self is not a
5123    subclass instance. */
5124 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)5125 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5126 {
5127     _Py_IDENTIFIER(find_class);
5128 
5129     return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
5130                                          module_name, global_name, NULL);
5131 }
5132 
5133 static Py_ssize_t
marker(UnpicklerObject * self)5134 marker(UnpicklerObject *self)
5135 {
5136     Py_ssize_t mark;
5137 
5138     if (self->num_marks < 1) {
5139         PickleState *st = _Pickle_GetGlobalState();
5140         PyErr_SetString(st->UnpicklingError, "could not find MARK");
5141         return -1;
5142     }
5143 
5144     mark = self->marks[--self->num_marks];
5145     self->stack->mark_set = self->num_marks != 0;
5146     self->stack->fence = self->num_marks ?
5147             self->marks[self->num_marks - 1] : 0;
5148     return mark;
5149 }
5150 
5151 static int
load_none(UnpicklerObject * self)5152 load_none(UnpicklerObject *self)
5153 {
5154     PDATA_APPEND(self->stack, Py_None, -1);
5155     return 0;
5156 }
5157 
5158 static int
load_int(UnpicklerObject * self)5159 load_int(UnpicklerObject *self)
5160 {
5161     PyObject *value;
5162     char *endptr, *s;
5163     Py_ssize_t len;
5164     long x;
5165 
5166     if ((len = _Unpickler_Readline(self, &s)) < 0)
5167         return -1;
5168     if (len < 2)
5169         return bad_readline();
5170 
5171     errno = 0;
5172     /* XXX: Should the base argument of strtol() be explicitly set to 10?
5173        XXX(avassalotti): Should this uses PyOS_strtol()? */
5174     x = strtol(s, &endptr, 0);
5175 
5176     if (errno || (*endptr != '\n' && *endptr != '\0')) {
5177         /* Hm, maybe we've got something long.  Let's try reading
5178          * it as a Python int object. */
5179         errno = 0;
5180         /* XXX: Same thing about the base here. */
5181         value = PyLong_FromString(s, NULL, 0);
5182         if (value == NULL) {
5183             PyErr_SetString(PyExc_ValueError,
5184                             "could not convert string to int");
5185             return -1;
5186         }
5187     }
5188     else {
5189         if (len == 3 && (x == 0 || x == 1)) {
5190             if ((value = PyBool_FromLong(x)) == NULL)
5191                 return -1;
5192         }
5193         else {
5194             if ((value = PyLong_FromLong(x)) == NULL)
5195                 return -1;
5196         }
5197     }
5198 
5199     PDATA_PUSH(self->stack, value, -1);
5200     return 0;
5201 }
5202 
5203 static int
load_bool(UnpicklerObject * self,PyObject * boolean)5204 load_bool(UnpicklerObject *self, PyObject *boolean)
5205 {
5206     assert(boolean == Py_True || boolean == Py_False);
5207     PDATA_APPEND(self->stack, boolean, -1);
5208     return 0;
5209 }
5210 
5211 /* s contains x bytes of an unsigned little-endian integer.  Return its value
5212  * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5213  */
5214 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)5215 calc_binsize(char *bytes, int nbytes)
5216 {
5217     unsigned char *s = (unsigned char *)bytes;
5218     int i;
5219     size_t x = 0;
5220 
5221     if (nbytes > (int)sizeof(size_t)) {
5222         /* Check for integer overflow.  BINBYTES8 and BINUNICODE8 opcodes
5223          * have 64-bit size that can't be represented on 32-bit platform.
5224          */
5225         for (i = (int)sizeof(size_t); i < nbytes; i++) {
5226             if (s[i])
5227                 return -1;
5228         }
5229         nbytes = (int)sizeof(size_t);
5230     }
5231     for (i = 0; i < nbytes; i++) {
5232         x |= (size_t) s[i] << (8 * i);
5233     }
5234 
5235     if (x > PY_SSIZE_T_MAX)
5236         return -1;
5237     else
5238         return (Py_ssize_t) x;
5239 }
5240 
5241 /* s contains x bytes of a little-endian integer.  Return its value as a
5242  * C int.  Obscure:  when x is 1 or 2, this is an unsigned little-endian
5243  * int, but when x is 4 it's a signed one.  This is a historical source
5244  * of x-platform bugs.
5245  */
5246 static long
calc_binint(char * bytes,int nbytes)5247 calc_binint(char *bytes, int nbytes)
5248 {
5249     unsigned char *s = (unsigned char *)bytes;
5250     Py_ssize_t i;
5251     long x = 0;
5252 
5253     for (i = 0; i < nbytes; i++) {
5254         x |= (long)s[i] << (8 * i);
5255     }
5256 
5257     /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5258      * is signed, so on a box with longs bigger than 4 bytes we need
5259      * to extend a BININT's sign bit to the full width.
5260      */
5261     if (SIZEOF_LONG > 4 && nbytes == 4) {
5262         x |= -(x & (1L << 31));
5263     }
5264 
5265     return x;
5266 }
5267 
5268 static int
load_binintx(UnpicklerObject * self,char * s,int size)5269 load_binintx(UnpicklerObject *self, char *s, int size)
5270 {
5271     PyObject *value;
5272     long x;
5273 
5274     x = calc_binint(s, size);
5275 
5276     if ((value = PyLong_FromLong(x)) == NULL)
5277         return -1;
5278 
5279     PDATA_PUSH(self->stack, value, -1);
5280     return 0;
5281 }
5282 
5283 static int
load_binint(UnpicklerObject * self)5284 load_binint(UnpicklerObject *self)
5285 {
5286     char *s;
5287 
5288     if (_Unpickler_Read(self, &s, 4) < 0)
5289         return -1;
5290 
5291     return load_binintx(self, s, 4);
5292 }
5293 
5294 static int
load_binint1(UnpicklerObject * self)5295 load_binint1(UnpicklerObject *self)
5296 {
5297     char *s;
5298 
5299     if (_Unpickler_Read(self, &s, 1) < 0)
5300         return -1;
5301 
5302     return load_binintx(self, s, 1);
5303 }
5304 
5305 static int
load_binint2(UnpicklerObject * self)5306 load_binint2(UnpicklerObject *self)
5307 {
5308     char *s;
5309 
5310     if (_Unpickler_Read(self, &s, 2) < 0)
5311         return -1;
5312 
5313     return load_binintx(self, s, 2);
5314 }
5315 
5316 static int
load_long(UnpicklerObject * self)5317 load_long(UnpicklerObject *self)
5318 {
5319     PyObject *value;
5320     char *s = NULL;
5321     Py_ssize_t len;
5322 
5323     if ((len = _Unpickler_Readline(self, &s)) < 0)
5324         return -1;
5325     if (len < 2)
5326         return bad_readline();
5327 
5328     /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5329        the 'L' before calling PyLong_FromString.  In order to maintain
5330        compatibility with Python 3.0.0, we don't actually *require*
5331        the 'L' to be present. */
5332     if (s[len-2] == 'L')
5333         s[len-2] = '\0';
5334     /* XXX: Should the base argument explicitly set to 10? */
5335     value = PyLong_FromString(s, NULL, 0);
5336     if (value == NULL)
5337         return -1;
5338 
5339     PDATA_PUSH(self->stack, value, -1);
5340     return 0;
5341 }
5342 
5343 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
5344  * data following.
5345  */
5346 static int
load_counted_long(UnpicklerObject * self,int size)5347 load_counted_long(UnpicklerObject *self, int size)
5348 {
5349     PyObject *value;
5350     char *nbytes;
5351     char *pdata;
5352 
5353     assert(size == 1 || size == 4);
5354     if (_Unpickler_Read(self, &nbytes, size) < 0)
5355         return -1;
5356 
5357     size = calc_binint(nbytes, size);
5358     if (size < 0) {
5359         PickleState *st = _Pickle_GetGlobalState();
5360         /* Corrupt or hostile pickle -- we never write one like this */
5361         PyErr_SetString(st->UnpicklingError,
5362                         "LONG pickle has negative byte count");
5363         return -1;
5364     }
5365 
5366     if (size == 0)
5367         value = PyLong_FromLong(0L);
5368     else {
5369         /* Read the raw little-endian bytes and convert. */
5370         if (_Unpickler_Read(self, &pdata, size) < 0)
5371             return -1;
5372         value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5373                                       1 /* little endian */ , 1 /* signed */ );
5374     }
5375     if (value == NULL)
5376         return -1;
5377     PDATA_PUSH(self->stack, value, -1);
5378     return 0;
5379 }
5380 
5381 static int
load_float(UnpicklerObject * self)5382 load_float(UnpicklerObject *self)
5383 {
5384     PyObject *value;
5385     char *endptr, *s;
5386     Py_ssize_t len;
5387     double d;
5388 
5389     if ((len = _Unpickler_Readline(self, &s)) < 0)
5390         return -1;
5391     if (len < 2)
5392         return bad_readline();
5393 
5394     errno = 0;
5395     d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5396     if (d == -1.0 && PyErr_Occurred())
5397         return -1;
5398     if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5399         PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5400         return -1;
5401     }
5402     value = PyFloat_FromDouble(d);
5403     if (value == NULL)
5404         return -1;
5405 
5406     PDATA_PUSH(self->stack, value, -1);
5407     return 0;
5408 }
5409 
5410 static int
load_binfloat(UnpicklerObject * self)5411 load_binfloat(UnpicklerObject *self)
5412 {
5413     PyObject *value;
5414     double x;
5415     char *s;
5416 
5417     if (_Unpickler_Read(self, &s, 8) < 0)
5418         return -1;
5419 
5420     x = _PyFloat_Unpack8((unsigned char *)s, 0);
5421     if (x == -1.0 && PyErr_Occurred())
5422         return -1;
5423 
5424     if ((value = PyFloat_FromDouble(x)) == NULL)
5425         return -1;
5426 
5427     PDATA_PUSH(self->stack, value, -1);
5428     return 0;
5429 }
5430 
5431 static int
load_string(UnpicklerObject * self)5432 load_string(UnpicklerObject *self)
5433 {
5434     PyObject *bytes;
5435     PyObject *obj;
5436     Py_ssize_t len;
5437     char *s, *p;
5438 
5439     if ((len = _Unpickler_Readline(self, &s)) < 0)
5440         return -1;
5441     /* Strip the newline */
5442     len--;
5443     /* Strip outermost quotes */
5444     if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5445         p = s + 1;
5446         len -= 2;
5447     }
5448     else {
5449         PickleState *st = _Pickle_GetGlobalState();
5450         PyErr_SetString(st->UnpicklingError,
5451                         "the STRING opcode argument must be quoted");
5452         return -1;
5453     }
5454     assert(len >= 0);
5455 
5456     /* Use the PyBytes API to decode the string, since that is what is used
5457        to encode, and then coerce the result to Unicode. */
5458     bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5459     if (bytes == NULL)
5460         return -1;
5461 
5462     /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5463        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5464     if (strcmp(self->encoding, "bytes") == 0) {
5465         obj = bytes;
5466     }
5467     else {
5468         obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5469         Py_DECREF(bytes);
5470         if (obj == NULL) {
5471             return -1;
5472         }
5473     }
5474 
5475     PDATA_PUSH(self->stack, obj, -1);
5476     return 0;
5477 }
5478 
5479 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5480 load_counted_binstring(UnpicklerObject *self, int nbytes)
5481 {
5482     PyObject *obj;
5483     Py_ssize_t size;
5484     char *s;
5485 
5486     if (_Unpickler_Read(self, &s, nbytes) < 0)
5487         return -1;
5488 
5489     size = calc_binsize(s, nbytes);
5490     if (size < 0) {
5491         PickleState *st = _Pickle_GetGlobalState();
5492         PyErr_Format(st->UnpicklingError,
5493                      "BINSTRING exceeds system's maximum size of %zd bytes",
5494                      PY_SSIZE_T_MAX);
5495         return -1;
5496     }
5497 
5498     if (_Unpickler_Read(self, &s, size) < 0)
5499         return -1;
5500 
5501     /* Convert Python 2.x strings to bytes if the *encoding* given to the
5502        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5503     if (strcmp(self->encoding, "bytes") == 0) {
5504         obj = PyBytes_FromStringAndSize(s, size);
5505     }
5506     else {
5507         obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5508     }
5509     if (obj == NULL) {
5510         return -1;
5511     }
5512 
5513     PDATA_PUSH(self->stack, obj, -1);
5514     return 0;
5515 }
5516 
5517 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5518 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5519 {
5520     PyObject *bytes;
5521     Py_ssize_t size;
5522     char *s;
5523 
5524     if (_Unpickler_Read(self, &s, nbytes) < 0)
5525         return -1;
5526 
5527     size = calc_binsize(s, nbytes);
5528     if (size < 0) {
5529         PyErr_Format(PyExc_OverflowError,
5530                      "BINBYTES exceeds system's maximum size of %zd bytes",
5531                      PY_SSIZE_T_MAX);
5532         return -1;
5533     }
5534 
5535     bytes = PyBytes_FromStringAndSize(NULL, size);
5536     if (bytes == NULL)
5537         return -1;
5538     if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5539         Py_DECREF(bytes);
5540         return -1;
5541     }
5542 
5543     PDATA_PUSH(self->stack, bytes, -1);
5544     return 0;
5545 }
5546 
5547 static int
load_counted_bytearray(UnpicklerObject * self)5548 load_counted_bytearray(UnpicklerObject *self)
5549 {
5550     PyObject *bytearray;
5551     Py_ssize_t size;
5552     char *s;
5553 
5554     if (_Unpickler_Read(self, &s, 8) < 0) {
5555         return -1;
5556     }
5557 
5558     size = calc_binsize(s, 8);
5559     if (size < 0) {
5560         PyErr_Format(PyExc_OverflowError,
5561                      "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5562                      PY_SSIZE_T_MAX);
5563         return -1;
5564     }
5565 
5566     bytearray = PyByteArray_FromStringAndSize(NULL, size);
5567     if (bytearray == NULL) {
5568         return -1;
5569     }
5570     if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5571         Py_DECREF(bytearray);
5572         return -1;
5573     }
5574 
5575     PDATA_PUSH(self->stack, bytearray, -1);
5576     return 0;
5577 }
5578 
5579 static int
load_next_buffer(UnpicklerObject * self)5580 load_next_buffer(UnpicklerObject *self)
5581 {
5582     if (self->buffers == NULL) {
5583         PickleState *st = _Pickle_GetGlobalState();
5584         PyErr_SetString(st->UnpicklingError,
5585                         "pickle stream refers to out-of-band data "
5586                         "but no *buffers* argument was given");
5587         return -1;
5588     }
5589     PyObject *buf = PyIter_Next(self->buffers);
5590     if (buf == NULL) {
5591         if (!PyErr_Occurred()) {
5592             PickleState *st = _Pickle_GetGlobalState();
5593             PyErr_SetString(st->UnpicklingError,
5594                             "not enough out-of-band buffers");
5595         }
5596         return -1;
5597     }
5598 
5599     PDATA_PUSH(self->stack, buf, -1);
5600     return 0;
5601 }
5602 
5603 static int
load_readonly_buffer(UnpicklerObject * self)5604 load_readonly_buffer(UnpicklerObject *self)
5605 {
5606     Py_ssize_t len = Py_SIZE(self->stack);
5607     if (len <= self->stack->fence) {
5608         return Pdata_stack_underflow(self->stack);
5609     }
5610 
5611     PyObject *obj = self->stack->data[len - 1];
5612     PyObject *view = PyMemoryView_FromObject(obj);
5613     if (view == NULL) {
5614         return -1;
5615     }
5616     if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5617         /* Original object is writable */
5618         PyMemoryView_GET_BUFFER(view)->readonly = 1;
5619         self->stack->data[len - 1] = view;
5620         Py_DECREF(obj);
5621     }
5622     else {
5623         /* Original object is read-only, no need to replace it */
5624         Py_DECREF(view);
5625     }
5626     return 0;
5627 }
5628 
5629 static int
load_unicode(UnpicklerObject * self)5630 load_unicode(UnpicklerObject *self)
5631 {
5632     PyObject *str;
5633     Py_ssize_t len;
5634     char *s = NULL;
5635 
5636     if ((len = _Unpickler_Readline(self, &s)) < 0)
5637         return -1;
5638     if (len < 1)
5639         return bad_readline();
5640 
5641     str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5642     if (str == NULL)
5643         return -1;
5644 
5645     PDATA_PUSH(self->stack, str, -1);
5646     return 0;
5647 }
5648 
5649 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5650 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5651 {
5652     PyObject *str;
5653     Py_ssize_t size;
5654     char *s;
5655 
5656     if (_Unpickler_Read(self, &s, nbytes) < 0)
5657         return -1;
5658 
5659     size = calc_binsize(s, nbytes);
5660     if (size < 0) {
5661         PyErr_Format(PyExc_OverflowError,
5662                      "BINUNICODE exceeds system's maximum size of %zd bytes",
5663                      PY_SSIZE_T_MAX);
5664         return -1;
5665     }
5666 
5667     if (_Unpickler_Read(self, &s, size) < 0)
5668         return -1;
5669 
5670     str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5671     if (str == NULL)
5672         return -1;
5673 
5674     PDATA_PUSH(self->stack, str, -1);
5675     return 0;
5676 }
5677 
5678 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5679 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5680 {
5681     PyObject *tuple;
5682 
5683     if (Py_SIZE(self->stack) < len)
5684         return Pdata_stack_underflow(self->stack);
5685 
5686     tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5687     if (tuple == NULL)
5688         return -1;
5689     PDATA_PUSH(self->stack, tuple, -1);
5690     return 0;
5691 }
5692 
5693 static int
load_tuple(UnpicklerObject * self)5694 load_tuple(UnpicklerObject *self)
5695 {
5696     Py_ssize_t i;
5697 
5698     if ((i = marker(self)) < 0)
5699         return -1;
5700 
5701     return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5702 }
5703 
5704 static int
load_empty_list(UnpicklerObject * self)5705 load_empty_list(UnpicklerObject *self)
5706 {
5707     PyObject *list;
5708 
5709     if ((list = PyList_New(0)) == NULL)
5710         return -1;
5711     PDATA_PUSH(self->stack, list, -1);
5712     return 0;
5713 }
5714 
5715 static int
load_empty_dict(UnpicklerObject * self)5716 load_empty_dict(UnpicklerObject *self)
5717 {
5718     PyObject *dict;
5719 
5720     if ((dict = PyDict_New()) == NULL)
5721         return -1;
5722     PDATA_PUSH(self->stack, dict, -1);
5723     return 0;
5724 }
5725 
5726 static int
load_empty_set(UnpicklerObject * self)5727 load_empty_set(UnpicklerObject *self)
5728 {
5729     PyObject *set;
5730 
5731     if ((set = PySet_New(NULL)) == NULL)
5732         return -1;
5733     PDATA_PUSH(self->stack, set, -1);
5734     return 0;
5735 }
5736 
5737 static int
load_list(UnpicklerObject * self)5738 load_list(UnpicklerObject *self)
5739 {
5740     PyObject *list;
5741     Py_ssize_t i;
5742 
5743     if ((i = marker(self)) < 0)
5744         return -1;
5745 
5746     list = Pdata_poplist(self->stack, i);
5747     if (list == NULL)
5748         return -1;
5749     PDATA_PUSH(self->stack, list, -1);
5750     return 0;
5751 }
5752 
5753 static int
load_dict(UnpicklerObject * self)5754 load_dict(UnpicklerObject *self)
5755 {
5756     PyObject *dict, *key, *value;
5757     Py_ssize_t i, j, k;
5758 
5759     if ((i = marker(self)) < 0)
5760         return -1;
5761     j = Py_SIZE(self->stack);
5762 
5763     if ((dict = PyDict_New()) == NULL)
5764         return -1;
5765 
5766     if ((j - i) % 2 != 0) {
5767         PickleState *st = _Pickle_GetGlobalState();
5768         PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5769         Py_DECREF(dict);
5770         return -1;
5771     }
5772 
5773     for (k = i + 1; k < j; k += 2) {
5774         key = self->stack->data[k - 1];
5775         value = self->stack->data[k];
5776         if (PyDict_SetItem(dict, key, value) < 0) {
5777             Py_DECREF(dict);
5778             return -1;
5779         }
5780     }
5781     Pdata_clear(self->stack, i);
5782     PDATA_PUSH(self->stack, dict, -1);
5783     return 0;
5784 }
5785 
5786 static int
load_frozenset(UnpicklerObject * self)5787 load_frozenset(UnpicklerObject *self)
5788 {
5789     PyObject *items;
5790     PyObject *frozenset;
5791     Py_ssize_t i;
5792 
5793     if ((i = marker(self)) < 0)
5794         return -1;
5795 
5796     items = Pdata_poptuple(self->stack, i);
5797     if (items == NULL)
5798         return -1;
5799 
5800     frozenset = PyFrozenSet_New(items);
5801     Py_DECREF(items);
5802     if (frozenset == NULL)
5803         return -1;
5804 
5805     PDATA_PUSH(self->stack, frozenset, -1);
5806     return 0;
5807 }
5808 
5809 static PyObject *
instantiate(PyObject * cls,PyObject * args)5810 instantiate(PyObject *cls, PyObject *args)
5811 {
5812     /* Caller must assure args are a tuple.  Normally, args come from
5813        Pdata_poptuple which packs objects from the top of the stack
5814        into a newly created tuple. */
5815     assert(PyTuple_Check(args));
5816     if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5817         _Py_IDENTIFIER(__getinitargs__);
5818         _Py_IDENTIFIER(__new__);
5819         PyObject *func;
5820         if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5821             return NULL;
5822         }
5823         if (func == NULL) {
5824             return _PyObject_CallMethodIdObjArgs(cls, &PyId___new__, cls, NULL);
5825         }
5826         Py_DECREF(func);
5827     }
5828     return PyObject_CallObject(cls, args);
5829 }
5830 
5831 static int
load_obj(UnpicklerObject * self)5832 load_obj(UnpicklerObject *self)
5833 {
5834     PyObject *cls, *args, *obj = NULL;
5835     Py_ssize_t i;
5836 
5837     if ((i = marker(self)) < 0)
5838         return -1;
5839 
5840     if (Py_SIZE(self->stack) - i < 1)
5841         return Pdata_stack_underflow(self->stack);
5842 
5843     args = Pdata_poptuple(self->stack, i + 1);
5844     if (args == NULL)
5845         return -1;
5846 
5847     PDATA_POP(self->stack, cls);
5848     if (cls) {
5849         obj = instantiate(cls, args);
5850         Py_DECREF(cls);
5851     }
5852     Py_DECREF(args);
5853     if (obj == NULL)
5854         return -1;
5855 
5856     PDATA_PUSH(self->stack, obj, -1);
5857     return 0;
5858 }
5859 
5860 static int
load_inst(UnpicklerObject * self)5861 load_inst(UnpicklerObject *self)
5862 {
5863     PyObject *cls = NULL;
5864     PyObject *args = NULL;
5865     PyObject *obj = NULL;
5866     PyObject *module_name;
5867     PyObject *class_name;
5868     Py_ssize_t len;
5869     Py_ssize_t i;
5870     char *s;
5871 
5872     if ((i = marker(self)) < 0)
5873         return -1;
5874     if ((len = _Unpickler_Readline(self, &s)) < 0)
5875         return -1;
5876     if (len < 2)
5877         return bad_readline();
5878 
5879     /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5880        identifiers are permitted in Python 3.0, since the INST opcode is only
5881        supported by older protocols on Python 2.x. */
5882     module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5883     if (module_name == NULL)
5884         return -1;
5885 
5886     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5887         if (len < 2) {
5888             Py_DECREF(module_name);
5889             return bad_readline();
5890         }
5891         class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5892         if (class_name != NULL) {
5893             cls = find_class(self, module_name, class_name);
5894             Py_DECREF(class_name);
5895         }
5896     }
5897     Py_DECREF(module_name);
5898 
5899     if (cls == NULL)
5900         return -1;
5901 
5902     if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5903         obj = instantiate(cls, args);
5904         Py_DECREF(args);
5905     }
5906     Py_DECREF(cls);
5907 
5908     if (obj == NULL)
5909         return -1;
5910 
5911     PDATA_PUSH(self->stack, obj, -1);
5912     return 0;
5913 }
5914 
5915 static int
load_newobj(UnpicklerObject * self)5916 load_newobj(UnpicklerObject *self)
5917 {
5918     PyObject *args = NULL;
5919     PyObject *clsraw = NULL;
5920     PyTypeObject *cls;          /* clsraw cast to its true type */
5921     PyObject *obj;
5922     PickleState *st = _Pickle_GetGlobalState();
5923 
5924     /* Stack is ... cls argtuple, and we want to call
5925      * cls.__new__(cls, *argtuple).
5926      */
5927     PDATA_POP(self->stack, args);
5928     if (args == NULL)
5929         goto error;
5930     if (!PyTuple_Check(args)) {
5931         PyErr_SetString(st->UnpicklingError,
5932                         "NEWOBJ expected an arg " "tuple.");
5933         goto error;
5934     }
5935 
5936     PDATA_POP(self->stack, clsraw);
5937     cls = (PyTypeObject *)clsraw;
5938     if (cls == NULL)
5939         goto error;
5940     if (!PyType_Check(cls)) {
5941         PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5942                         "isn't a type object");
5943         goto error;
5944     }
5945     if (cls->tp_new == NULL) {
5946         PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5947                         "has NULL tp_new");
5948         goto error;
5949     }
5950 
5951     /* Call __new__. */
5952     obj = cls->tp_new(cls, args, NULL);
5953     if (obj == NULL)
5954         goto error;
5955 
5956     Py_DECREF(args);
5957     Py_DECREF(clsraw);
5958     PDATA_PUSH(self->stack, obj, -1);
5959     return 0;
5960 
5961   error:
5962     Py_XDECREF(args);
5963     Py_XDECREF(clsraw);
5964     return -1;
5965 }
5966 
5967 static int
load_newobj_ex(UnpicklerObject * self)5968 load_newobj_ex(UnpicklerObject *self)
5969 {
5970     PyObject *cls, *args, *kwargs;
5971     PyObject *obj;
5972     PickleState *st = _Pickle_GetGlobalState();
5973 
5974     PDATA_POP(self->stack, kwargs);
5975     if (kwargs == NULL) {
5976         return -1;
5977     }
5978     PDATA_POP(self->stack, args);
5979     if (args == NULL) {
5980         Py_DECREF(kwargs);
5981         return -1;
5982     }
5983     PDATA_POP(self->stack, cls);
5984     if (cls == NULL) {
5985         Py_DECREF(kwargs);
5986         Py_DECREF(args);
5987         return -1;
5988     }
5989 
5990     if (!PyType_Check(cls)) {
5991         PyErr_Format(st->UnpicklingError,
5992                      "NEWOBJ_EX class argument must be a type, not %.200s",
5993                      Py_TYPE(cls)->tp_name);
5994         goto error;
5995     }
5996 
5997     if (((PyTypeObject *)cls)->tp_new == NULL) {
5998         PyErr_SetString(st->UnpicklingError,
5999                         "NEWOBJ_EX class argument doesn't have __new__");
6000         goto error;
6001     }
6002     if (!PyTuple_Check(args)) {
6003         PyErr_Format(st->UnpicklingError,
6004                      "NEWOBJ_EX args argument must be a tuple, not %.200s",
6005                      Py_TYPE(args)->tp_name);
6006         goto error;
6007     }
6008     if (!PyDict_Check(kwargs)) {
6009         PyErr_Format(st->UnpicklingError,
6010                      "NEWOBJ_EX kwargs argument must be a dict, not %.200s",
6011                      Py_TYPE(kwargs)->tp_name);
6012         goto error;
6013     }
6014 
6015     obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
6016     Py_DECREF(kwargs);
6017     Py_DECREF(args);
6018     Py_DECREF(cls);
6019     if (obj == NULL) {
6020         return -1;
6021     }
6022     PDATA_PUSH(self->stack, obj, -1);
6023     return 0;
6024 
6025 error:
6026     Py_DECREF(kwargs);
6027     Py_DECREF(args);
6028     Py_DECREF(cls);
6029     return -1;
6030 }
6031 
6032 static int
load_global(UnpicklerObject * self)6033 load_global(UnpicklerObject *self)
6034 {
6035     PyObject *global = NULL;
6036     PyObject *module_name;
6037     PyObject *global_name;
6038     Py_ssize_t len;
6039     char *s;
6040 
6041     if ((len = _Unpickler_Readline(self, &s)) < 0)
6042         return -1;
6043     if (len < 2)
6044         return bad_readline();
6045     module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6046     if (!module_name)
6047         return -1;
6048 
6049     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
6050         if (len < 2) {
6051             Py_DECREF(module_name);
6052             return bad_readline();
6053         }
6054         global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6055         if (global_name) {
6056             global = find_class(self, module_name, global_name);
6057             Py_DECREF(global_name);
6058         }
6059     }
6060     Py_DECREF(module_name);
6061 
6062     if (global == NULL)
6063         return -1;
6064     PDATA_PUSH(self->stack, global, -1);
6065     return 0;
6066 }
6067 
6068 static int
load_stack_global(UnpicklerObject * self)6069 load_stack_global(UnpicklerObject *self)
6070 {
6071     PyObject *global;
6072     PyObject *module_name;
6073     PyObject *global_name;
6074 
6075     PDATA_POP(self->stack, global_name);
6076     PDATA_POP(self->stack, module_name);
6077     if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6078         global_name == NULL || !PyUnicode_CheckExact(global_name)) {
6079         PickleState *st = _Pickle_GetGlobalState();
6080         PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
6081         Py_XDECREF(global_name);
6082         Py_XDECREF(module_name);
6083         return -1;
6084     }
6085     global = find_class(self, module_name, global_name);
6086     Py_DECREF(global_name);
6087     Py_DECREF(module_name);
6088     if (global == NULL)
6089         return -1;
6090     PDATA_PUSH(self->stack, global, -1);
6091     return 0;
6092 }
6093 
6094 static int
load_persid(UnpicklerObject * self)6095 load_persid(UnpicklerObject *self)
6096 {
6097     PyObject *pid, *obj;
6098     Py_ssize_t len;
6099     char *s;
6100 
6101     if (self->pers_func) {
6102         if ((len = _Unpickler_Readline(self, &s)) < 0)
6103             return -1;
6104         if (len < 1)
6105             return bad_readline();
6106 
6107         pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6108         if (pid == NULL) {
6109             if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6110                 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6111                                 "persistent IDs in protocol 0 must be "
6112                                 "ASCII strings");
6113             }
6114             return -1;
6115         }
6116 
6117         obj = call_method(self->pers_func, self->pers_func_self, pid);
6118         Py_DECREF(pid);
6119         if (obj == NULL)
6120             return -1;
6121 
6122         PDATA_PUSH(self->stack, obj, -1);
6123         return 0;
6124     }
6125     else {
6126         PickleState *st = _Pickle_GetGlobalState();
6127         PyErr_SetString(st->UnpicklingError,
6128                         "A load persistent id instruction was encountered,\n"
6129                         "but no persistent_load function was specified.");
6130         return -1;
6131     }
6132 }
6133 
6134 static int
load_binpersid(UnpicklerObject * self)6135 load_binpersid(UnpicklerObject *self)
6136 {
6137     PyObject *pid, *obj;
6138 
6139     if (self->pers_func) {
6140         PDATA_POP(self->stack, pid);
6141         if (pid == NULL)
6142             return -1;
6143 
6144         obj = call_method(self->pers_func, self->pers_func_self, pid);
6145         Py_DECREF(pid);
6146         if (obj == NULL)
6147             return -1;
6148 
6149         PDATA_PUSH(self->stack, obj, -1);
6150         return 0;
6151     }
6152     else {
6153         PickleState *st = _Pickle_GetGlobalState();
6154         PyErr_SetString(st->UnpicklingError,
6155                         "A load persistent id instruction was encountered,\n"
6156                         "but no persistent_load function was specified.");
6157         return -1;
6158     }
6159 }
6160 
6161 static int
load_pop(UnpicklerObject * self)6162 load_pop(UnpicklerObject *self)
6163 {
6164     Py_ssize_t len = Py_SIZE(self->stack);
6165 
6166     /* Note that we split the (pickle.py) stack into two stacks,
6167      * an object stack and a mark stack. We have to be clever and
6168      * pop the right one. We do this by looking at the top of the
6169      * mark stack first, and only signalling a stack underflow if
6170      * the object stack is empty and the mark stack doesn't match
6171      * our expectations.
6172      */
6173     if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
6174         self->num_marks--;
6175         self->stack->mark_set = self->num_marks != 0;
6176         self->stack->fence = self->num_marks ?
6177                 self->marks[self->num_marks - 1] : 0;
6178     } else if (len <= self->stack->fence)
6179         return Pdata_stack_underflow(self->stack);
6180     else {
6181         len--;
6182         Py_DECREF(self->stack->data[len]);
6183         Py_SIZE(self->stack) = len;
6184     }
6185     return 0;
6186 }
6187 
6188 static int
load_pop_mark(UnpicklerObject * self)6189 load_pop_mark(UnpicklerObject *self)
6190 {
6191     Py_ssize_t i;
6192 
6193     if ((i = marker(self)) < 0)
6194         return -1;
6195 
6196     Pdata_clear(self->stack, i);
6197 
6198     return 0;
6199 }
6200 
6201 static int
load_dup(UnpicklerObject * self)6202 load_dup(UnpicklerObject *self)
6203 {
6204     PyObject *last;
6205     Py_ssize_t len = Py_SIZE(self->stack);
6206 
6207     if (len <= self->stack->fence)
6208         return Pdata_stack_underflow(self->stack);
6209     last = self->stack->data[len - 1];
6210     PDATA_APPEND(self->stack, last, -1);
6211     return 0;
6212 }
6213 
6214 static int
load_get(UnpicklerObject * self)6215 load_get(UnpicklerObject *self)
6216 {
6217     PyObject *key, *value;
6218     Py_ssize_t idx;
6219     Py_ssize_t len;
6220     char *s;
6221 
6222     if ((len = _Unpickler_Readline(self, &s)) < 0)
6223         return -1;
6224     if (len < 2)
6225         return bad_readline();
6226 
6227     key = PyLong_FromString(s, NULL, 10);
6228     if (key == NULL)
6229         return -1;
6230     idx = PyLong_AsSsize_t(key);
6231     if (idx == -1 && PyErr_Occurred()) {
6232         Py_DECREF(key);
6233         return -1;
6234     }
6235 
6236     value = _Unpickler_MemoGet(self, idx);
6237     if (value == NULL) {
6238         if (!PyErr_Occurred())
6239             PyErr_SetObject(PyExc_KeyError, key);
6240         Py_DECREF(key);
6241         return -1;
6242     }
6243     Py_DECREF(key);
6244 
6245     PDATA_APPEND(self->stack, value, -1);
6246     return 0;
6247 }
6248 
6249 static int
load_binget(UnpicklerObject * self)6250 load_binget(UnpicklerObject *self)
6251 {
6252     PyObject *value;
6253     Py_ssize_t idx;
6254     char *s;
6255 
6256     if (_Unpickler_Read(self, &s, 1) < 0)
6257         return -1;
6258 
6259     idx = Py_CHARMASK(s[0]);
6260 
6261     value = _Unpickler_MemoGet(self, idx);
6262     if (value == NULL) {
6263         PyObject *key = PyLong_FromSsize_t(idx);
6264         if (key != NULL) {
6265             PyErr_SetObject(PyExc_KeyError, key);
6266             Py_DECREF(key);
6267         }
6268         return -1;
6269     }
6270 
6271     PDATA_APPEND(self->stack, value, -1);
6272     return 0;
6273 }
6274 
6275 static int
load_long_binget(UnpicklerObject * self)6276 load_long_binget(UnpicklerObject *self)
6277 {
6278     PyObject *value;
6279     Py_ssize_t idx;
6280     char *s;
6281 
6282     if (_Unpickler_Read(self, &s, 4) < 0)
6283         return -1;
6284 
6285     idx = calc_binsize(s, 4);
6286 
6287     value = _Unpickler_MemoGet(self, idx);
6288     if (value == NULL) {
6289         PyObject *key = PyLong_FromSsize_t(idx);
6290         if (key != NULL) {
6291             PyErr_SetObject(PyExc_KeyError, key);
6292             Py_DECREF(key);
6293         }
6294         return -1;
6295     }
6296 
6297     PDATA_APPEND(self->stack, value, -1);
6298     return 0;
6299 }
6300 
6301 /* Push an object from the extension registry (EXT[124]).  nbytes is
6302  * the number of bytes following the opcode, holding the index (code) value.
6303  */
6304 static int
load_extension(UnpicklerObject * self,int nbytes)6305 load_extension(UnpicklerObject *self, int nbytes)
6306 {
6307     char *codebytes;            /* the nbytes bytes after the opcode */
6308     long code;                  /* calc_binint returns long */
6309     PyObject *py_code;          /* code as a Python int */
6310     PyObject *obj;              /* the object to push */
6311     PyObject *pair;             /* (module_name, class_name) */
6312     PyObject *module_name, *class_name;
6313     PickleState *st = _Pickle_GetGlobalState();
6314 
6315     assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
6316     if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
6317         return -1;
6318     code = calc_binint(codebytes, nbytes);
6319     if (code <= 0) {            /* note that 0 is forbidden */
6320         /* Corrupt or hostile pickle. */
6321         PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
6322         return -1;
6323     }
6324 
6325     /* Look for the code in the cache. */
6326     py_code = PyLong_FromLong(code);
6327     if (py_code == NULL)
6328         return -1;
6329     obj = PyDict_GetItemWithError(st->extension_cache, py_code);
6330     if (obj != NULL) {
6331         /* Bingo. */
6332         Py_DECREF(py_code);
6333         PDATA_APPEND(self->stack, obj, -1);
6334         return 0;
6335     }
6336     if (PyErr_Occurred()) {
6337         Py_DECREF(py_code);
6338         return -1;
6339     }
6340 
6341     /* Look up the (module_name, class_name) pair. */
6342     pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
6343     if (pair == NULL) {
6344         Py_DECREF(py_code);
6345         if (!PyErr_Occurred()) {
6346             PyErr_Format(PyExc_ValueError, "unregistered extension "
6347                          "code %ld", code);
6348         }
6349         return -1;
6350     }
6351     /* Since the extension registry is manipulable via Python code,
6352      * confirm that pair is really a 2-tuple of strings.
6353      */
6354     if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6355         goto error;
6356     }
6357 
6358     module_name = PyTuple_GET_ITEM(pair, 0);
6359     if (!PyUnicode_Check(module_name)) {
6360         goto error;
6361     }
6362 
6363     class_name = PyTuple_GET_ITEM(pair, 1);
6364     if (!PyUnicode_Check(class_name)) {
6365         goto error;
6366     }
6367 
6368     /* Load the object. */
6369     obj = find_class(self, module_name, class_name);
6370     if (obj == NULL) {
6371         Py_DECREF(py_code);
6372         return -1;
6373     }
6374     /* Cache code -> obj. */
6375     code = PyDict_SetItem(st->extension_cache, py_code, obj);
6376     Py_DECREF(py_code);
6377     if (code < 0) {
6378         Py_DECREF(obj);
6379         return -1;
6380     }
6381     PDATA_PUSH(self->stack, obj, -1);
6382     return 0;
6383 
6384 error:
6385     Py_DECREF(py_code);
6386     PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6387                  "isn't a 2-tuple of strings", code);
6388     return -1;
6389 }
6390 
6391 static int
load_put(UnpicklerObject * self)6392 load_put(UnpicklerObject *self)
6393 {
6394     PyObject *key, *value;
6395     Py_ssize_t idx;
6396     Py_ssize_t len;
6397     char *s = NULL;
6398 
6399     if ((len = _Unpickler_Readline(self, &s)) < 0)
6400         return -1;
6401     if (len < 2)
6402         return bad_readline();
6403     if (Py_SIZE(self->stack) <= self->stack->fence)
6404         return Pdata_stack_underflow(self->stack);
6405     value = self->stack->data[Py_SIZE(self->stack) - 1];
6406 
6407     key = PyLong_FromString(s, NULL, 10);
6408     if (key == NULL)
6409         return -1;
6410     idx = PyLong_AsSsize_t(key);
6411     Py_DECREF(key);
6412     if (idx < 0) {
6413         if (!PyErr_Occurred())
6414             PyErr_SetString(PyExc_ValueError,
6415                             "negative PUT argument");
6416         return -1;
6417     }
6418 
6419     return _Unpickler_MemoPut(self, idx, value);
6420 }
6421 
6422 static int
load_binput(UnpicklerObject * self)6423 load_binput(UnpicklerObject *self)
6424 {
6425     PyObject *value;
6426     Py_ssize_t idx;
6427     char *s;
6428 
6429     if (_Unpickler_Read(self, &s, 1) < 0)
6430         return -1;
6431 
6432     if (Py_SIZE(self->stack) <= self->stack->fence)
6433         return Pdata_stack_underflow(self->stack);
6434     value = self->stack->data[Py_SIZE(self->stack) - 1];
6435 
6436     idx = Py_CHARMASK(s[0]);
6437 
6438     return _Unpickler_MemoPut(self, idx, value);
6439 }
6440 
6441 static int
load_long_binput(UnpicklerObject * self)6442 load_long_binput(UnpicklerObject *self)
6443 {
6444     PyObject *value;
6445     Py_ssize_t idx;
6446     char *s;
6447 
6448     if (_Unpickler_Read(self, &s, 4) < 0)
6449         return -1;
6450 
6451     if (Py_SIZE(self->stack) <= self->stack->fence)
6452         return Pdata_stack_underflow(self->stack);
6453     value = self->stack->data[Py_SIZE(self->stack) - 1];
6454 
6455     idx = calc_binsize(s, 4);
6456     if (idx < 0) {
6457         PyErr_SetString(PyExc_ValueError,
6458                         "negative LONG_BINPUT argument");
6459         return -1;
6460     }
6461 
6462     return _Unpickler_MemoPut(self, idx, value);
6463 }
6464 
6465 static int
load_memoize(UnpicklerObject * self)6466 load_memoize(UnpicklerObject *self)
6467 {
6468     PyObject *value;
6469 
6470     if (Py_SIZE(self->stack) <= self->stack->fence)
6471         return Pdata_stack_underflow(self->stack);
6472     value = self->stack->data[Py_SIZE(self->stack) - 1];
6473 
6474     return _Unpickler_MemoPut(self, self->memo_len, value);
6475 }
6476 
6477 static int
do_append(UnpicklerObject * self,Py_ssize_t x)6478 do_append(UnpicklerObject *self, Py_ssize_t x)
6479 {
6480     PyObject *value;
6481     PyObject *slice;
6482     PyObject *list;
6483     PyObject *result;
6484     Py_ssize_t len, i;
6485 
6486     len = Py_SIZE(self->stack);
6487     if (x > len || x <= self->stack->fence)
6488         return Pdata_stack_underflow(self->stack);
6489     if (len == x)  /* nothing to do */
6490         return 0;
6491 
6492     list = self->stack->data[x - 1];
6493 
6494     if (PyList_CheckExact(list)) {
6495         Py_ssize_t list_len;
6496         int ret;
6497 
6498         slice = Pdata_poplist(self->stack, x);
6499         if (!slice)
6500             return -1;
6501         list_len = PyList_GET_SIZE(list);
6502         ret = PyList_SetSlice(list, list_len, list_len, slice);
6503         Py_DECREF(slice);
6504         return ret;
6505     }
6506     else {
6507         PyObject *extend_func;
6508         _Py_IDENTIFIER(extend);
6509 
6510         if (_PyObject_LookupAttrId(list, &PyId_extend, &extend_func) < 0) {
6511             return -1;
6512         }
6513         if (extend_func != NULL) {
6514             slice = Pdata_poplist(self->stack, x);
6515             if (!slice) {
6516                 Py_DECREF(extend_func);
6517                 return -1;
6518             }
6519             result = _Pickle_FastCall(extend_func, slice);
6520             Py_DECREF(extend_func);
6521             if (result == NULL)
6522                 return -1;
6523             Py_DECREF(result);
6524         }
6525         else {
6526             PyObject *append_func;
6527             _Py_IDENTIFIER(append);
6528 
6529             /* Even if the PEP 307 requires extend() and append() methods,
6530                fall back on append() if the object has no extend() method
6531                for backward compatibility. */
6532             append_func = _PyObject_GetAttrId(list, &PyId_append);
6533             if (append_func == NULL)
6534                 return -1;
6535             for (i = x; i < len; i++) {
6536                 value = self->stack->data[i];
6537                 result = _Pickle_FastCall(append_func, value);
6538                 if (result == NULL) {
6539                     Pdata_clear(self->stack, i + 1);
6540                     Py_SIZE(self->stack) = x;
6541                     Py_DECREF(append_func);
6542                     return -1;
6543                 }
6544                 Py_DECREF(result);
6545             }
6546             Py_SIZE(self->stack) = x;
6547             Py_DECREF(append_func);
6548         }
6549     }
6550 
6551     return 0;
6552 }
6553 
6554 static int
load_append(UnpicklerObject * self)6555 load_append(UnpicklerObject *self)
6556 {
6557     if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6558         return Pdata_stack_underflow(self->stack);
6559     return do_append(self, Py_SIZE(self->stack) - 1);
6560 }
6561 
6562 static int
load_appends(UnpicklerObject * self)6563 load_appends(UnpicklerObject *self)
6564 {
6565     Py_ssize_t i = marker(self);
6566     if (i < 0)
6567         return -1;
6568     return do_append(self, i);
6569 }
6570 
6571 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6572 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6573 {
6574     PyObject *value, *key;
6575     PyObject *dict;
6576     Py_ssize_t len, i;
6577     int status = 0;
6578 
6579     len = Py_SIZE(self->stack);
6580     if (x > len || x <= self->stack->fence)
6581         return Pdata_stack_underflow(self->stack);
6582     if (len == x)  /* nothing to do */
6583         return 0;
6584     if ((len - x) % 2 != 0) {
6585         PickleState *st = _Pickle_GetGlobalState();
6586         /* Currupt or hostile pickle -- we never write one like this. */
6587         PyErr_SetString(st->UnpicklingError,
6588                         "odd number of items for SETITEMS");
6589         return -1;
6590     }
6591 
6592     /* Here, dict does not actually need to be a PyDict; it could be anything
6593        that supports the __setitem__ attribute. */
6594     dict = self->stack->data[x - 1];
6595 
6596     for (i = x + 1; i < len; i += 2) {
6597         key = self->stack->data[i - 1];
6598         value = self->stack->data[i];
6599         if (PyObject_SetItem(dict, key, value) < 0) {
6600             status = -1;
6601             break;
6602         }
6603     }
6604 
6605     Pdata_clear(self->stack, x);
6606     return status;
6607 }
6608 
6609 static int
load_setitem(UnpicklerObject * self)6610 load_setitem(UnpicklerObject *self)
6611 {
6612     return do_setitems(self, Py_SIZE(self->stack) - 2);
6613 }
6614 
6615 static int
load_setitems(UnpicklerObject * self)6616 load_setitems(UnpicklerObject *self)
6617 {
6618     Py_ssize_t i = marker(self);
6619     if (i < 0)
6620         return -1;
6621     return do_setitems(self, i);
6622 }
6623 
6624 static int
load_additems(UnpicklerObject * self)6625 load_additems(UnpicklerObject *self)
6626 {
6627     PyObject *set;
6628     Py_ssize_t mark, len, i;
6629 
6630     mark =  marker(self);
6631     if (mark < 0)
6632         return -1;
6633     len = Py_SIZE(self->stack);
6634     if (mark > len || mark <= self->stack->fence)
6635         return Pdata_stack_underflow(self->stack);
6636     if (len == mark)  /* nothing to do */
6637         return 0;
6638 
6639     set = self->stack->data[mark - 1];
6640 
6641     if (PySet_Check(set)) {
6642         PyObject *items;
6643         int status;
6644 
6645         items = Pdata_poptuple(self->stack, mark);
6646         if (items == NULL)
6647             return -1;
6648 
6649         status = _PySet_Update(set, items);
6650         Py_DECREF(items);
6651         return status;
6652     }
6653     else {
6654         PyObject *add_func;
6655         _Py_IDENTIFIER(add);
6656 
6657         add_func = _PyObject_GetAttrId(set, &PyId_add);
6658         if (add_func == NULL)
6659             return -1;
6660         for (i = mark; i < len; i++) {
6661             PyObject *result;
6662             PyObject *item;
6663 
6664             item = self->stack->data[i];
6665             result = _Pickle_FastCall(add_func, item);
6666             if (result == NULL) {
6667                 Pdata_clear(self->stack, i + 1);
6668                 Py_SIZE(self->stack) = mark;
6669                 return -1;
6670             }
6671             Py_DECREF(result);
6672         }
6673         Py_SIZE(self->stack) = mark;
6674     }
6675 
6676     return 0;
6677 }
6678 
6679 static int
load_build(UnpicklerObject * self)6680 load_build(UnpicklerObject *self)
6681 {
6682     PyObject *state, *inst, *slotstate;
6683     PyObject *setstate;
6684     int status = 0;
6685     _Py_IDENTIFIER(__setstate__);
6686 
6687     /* Stack is ... instance, state.  We want to leave instance at
6688      * the stack top, possibly mutated via instance.__setstate__(state).
6689      */
6690     if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6691         return Pdata_stack_underflow(self->stack);
6692 
6693     PDATA_POP(self->stack, state);
6694     if (state == NULL)
6695         return -1;
6696 
6697     inst = self->stack->data[Py_SIZE(self->stack) - 1];
6698 
6699     if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6700         Py_DECREF(state);
6701         return -1;
6702     }
6703     if (setstate != NULL) {
6704         PyObject *result;
6705 
6706         /* The explicit __setstate__ is responsible for everything. */
6707         result = _Pickle_FastCall(setstate, state);
6708         Py_DECREF(setstate);
6709         if (result == NULL)
6710             return -1;
6711         Py_DECREF(result);
6712         return 0;
6713     }
6714 
6715     /* A default __setstate__.  First see whether state embeds a
6716      * slot state dict too (a proto 2 addition).
6717      */
6718     if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6719         PyObject *tmp = state;
6720 
6721         state = PyTuple_GET_ITEM(tmp, 0);
6722         slotstate = PyTuple_GET_ITEM(tmp, 1);
6723         Py_INCREF(state);
6724         Py_INCREF(slotstate);
6725         Py_DECREF(tmp);
6726     }
6727     else
6728         slotstate = NULL;
6729 
6730     /* Set inst.__dict__ from the state dict (if any). */
6731     if (state != Py_None) {
6732         PyObject *dict;
6733         PyObject *d_key, *d_value;
6734         Py_ssize_t i;
6735         _Py_IDENTIFIER(__dict__);
6736 
6737         if (!PyDict_Check(state)) {
6738             PickleState *st = _Pickle_GetGlobalState();
6739             PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6740             goto error;
6741         }
6742         dict = _PyObject_GetAttrId(inst, &PyId___dict__);
6743         if (dict == NULL)
6744             goto error;
6745 
6746         i = 0;
6747         while (PyDict_Next(state, &i, &d_key, &d_value)) {
6748             /* normally the keys for instance attributes are
6749                interned.  we should try to do that here. */
6750             Py_INCREF(d_key);
6751             if (PyUnicode_CheckExact(d_key))
6752                 PyUnicode_InternInPlace(&d_key);
6753             if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6754                 Py_DECREF(d_key);
6755                 goto error;
6756             }
6757             Py_DECREF(d_key);
6758         }
6759         Py_DECREF(dict);
6760     }
6761 
6762     /* Also set instance attributes from the slotstate dict (if any). */
6763     if (slotstate != NULL) {
6764         PyObject *d_key, *d_value;
6765         Py_ssize_t i;
6766 
6767         if (!PyDict_Check(slotstate)) {
6768             PickleState *st = _Pickle_GetGlobalState();
6769             PyErr_SetString(st->UnpicklingError,
6770                             "slot state is not a dictionary");
6771             goto error;
6772         }
6773         i = 0;
6774         while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6775             if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6776                 goto error;
6777         }
6778     }
6779 
6780     if (0) {
6781   error:
6782         status = -1;
6783     }
6784 
6785     Py_DECREF(state);
6786     Py_XDECREF(slotstate);
6787     return status;
6788 }
6789 
6790 static int
load_mark(UnpicklerObject * self)6791 load_mark(UnpicklerObject *self)
6792 {
6793 
6794     /* Note that we split the (pickle.py) stack into two stacks, an
6795      * object stack and a mark stack. Here we push a mark onto the
6796      * mark stack.
6797      */
6798 
6799     if (self->num_marks >= self->marks_size) {
6800         size_t alloc = ((size_t)self->num_marks << 1) + 20;
6801         Py_ssize_t *marks_new = self->marks;
6802         PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6803         if (marks_new == NULL) {
6804             PyErr_NoMemory();
6805             return -1;
6806         }
6807         self->marks = marks_new;
6808         self->marks_size = (Py_ssize_t)alloc;
6809     }
6810 
6811     self->stack->mark_set = 1;
6812     self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6813 
6814     return 0;
6815 }
6816 
6817 static int
load_reduce(UnpicklerObject * self)6818 load_reduce(UnpicklerObject *self)
6819 {
6820     PyObject *callable = NULL;
6821     PyObject *argtup = NULL;
6822     PyObject *obj = NULL;
6823 
6824     PDATA_POP(self->stack, argtup);
6825     if (argtup == NULL)
6826         return -1;
6827     PDATA_POP(self->stack, callable);
6828     if (callable) {
6829         obj = PyObject_CallObject(callable, argtup);
6830         Py_DECREF(callable);
6831     }
6832     Py_DECREF(argtup);
6833 
6834     if (obj == NULL)
6835         return -1;
6836 
6837     PDATA_PUSH(self->stack, obj, -1);
6838     return 0;
6839 }
6840 
6841 /* Just raises an error if we don't know the protocol specified.  PROTO
6842  * is the first opcode for protocols >= 2.
6843  */
6844 static int
load_proto(UnpicklerObject * self)6845 load_proto(UnpicklerObject *self)
6846 {
6847     char *s;
6848     int i;
6849 
6850     if (_Unpickler_Read(self, &s, 1) < 0)
6851         return -1;
6852 
6853     i = (unsigned char)s[0];
6854     if (i <= HIGHEST_PROTOCOL) {
6855         self->proto = i;
6856         return 0;
6857     }
6858 
6859     PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6860     return -1;
6861 }
6862 
6863 static int
load_frame(UnpicklerObject * self)6864 load_frame(UnpicklerObject *self)
6865 {
6866     char *s;
6867     Py_ssize_t frame_len;
6868 
6869     if (_Unpickler_Read(self, &s, 8) < 0)
6870         return -1;
6871 
6872     frame_len = calc_binsize(s, 8);
6873     if (frame_len < 0) {
6874         PyErr_Format(PyExc_OverflowError,
6875                      "FRAME length exceeds system's maximum of %zd bytes",
6876                      PY_SSIZE_T_MAX);
6877         return -1;
6878     }
6879 
6880     if (_Unpickler_Read(self, &s, frame_len) < 0)
6881         return -1;
6882 
6883     /* Rewind to start of frame */
6884     self->next_read_idx -= frame_len;
6885     return 0;
6886 }
6887 
6888 static PyObject *
load(UnpicklerObject * self)6889 load(UnpicklerObject *self)
6890 {
6891     PyObject *value = NULL;
6892     char *s = NULL;
6893 
6894     self->num_marks = 0;
6895     self->stack->mark_set = 0;
6896     self->stack->fence = 0;
6897     self->proto = 0;
6898     if (Py_SIZE(self->stack))
6899         Pdata_clear(self->stack, 0);
6900 
6901     /* Convenient macros for the dispatch while-switch loop just below. */
6902 #define OP(opcode, load_func) \
6903     case opcode: if (load_func(self) < 0) break; continue;
6904 
6905 #define OP_ARG(opcode, load_func, arg) \
6906     case opcode: if (load_func(self, (arg)) < 0) break; continue;
6907 
6908     while (1) {
6909         if (_Unpickler_Read(self, &s, 1) < 0) {
6910             PickleState *st = _Pickle_GetGlobalState();
6911             if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6912                 PyErr_Format(PyExc_EOFError, "Ran out of input");
6913             }
6914             return NULL;
6915         }
6916 
6917         switch ((enum opcode)s[0]) {
6918         OP(NONE, load_none)
6919         OP(BININT, load_binint)
6920         OP(BININT1, load_binint1)
6921         OP(BININT2, load_binint2)
6922         OP(INT, load_int)
6923         OP(LONG, load_long)
6924         OP_ARG(LONG1, load_counted_long, 1)
6925         OP_ARG(LONG4, load_counted_long, 4)
6926         OP(FLOAT, load_float)
6927         OP(BINFLOAT, load_binfloat)
6928         OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6929         OP_ARG(BINBYTES, load_counted_binbytes, 4)
6930         OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6931         OP(BYTEARRAY8, load_counted_bytearray)
6932         OP(NEXT_BUFFER, load_next_buffer)
6933         OP(READONLY_BUFFER, load_readonly_buffer)
6934         OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6935         OP_ARG(BINSTRING, load_counted_binstring, 4)
6936         OP(STRING, load_string)
6937         OP(UNICODE, load_unicode)
6938         OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6939         OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6940         OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6941         OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6942         OP_ARG(TUPLE1, load_counted_tuple, 1)
6943         OP_ARG(TUPLE2, load_counted_tuple, 2)
6944         OP_ARG(TUPLE3, load_counted_tuple, 3)
6945         OP(TUPLE, load_tuple)
6946         OP(EMPTY_LIST, load_empty_list)
6947         OP(LIST, load_list)
6948         OP(EMPTY_DICT, load_empty_dict)
6949         OP(DICT, load_dict)
6950         OP(EMPTY_SET, load_empty_set)
6951         OP(ADDITEMS, load_additems)
6952         OP(FROZENSET, load_frozenset)
6953         OP(OBJ, load_obj)
6954         OP(INST, load_inst)
6955         OP(NEWOBJ, load_newobj)
6956         OP(NEWOBJ_EX, load_newobj_ex)
6957         OP(GLOBAL, load_global)
6958         OP(STACK_GLOBAL, load_stack_global)
6959         OP(APPEND, load_append)
6960         OP(APPENDS, load_appends)
6961         OP(BUILD, load_build)
6962         OP(DUP, load_dup)
6963         OP(BINGET, load_binget)
6964         OP(LONG_BINGET, load_long_binget)
6965         OP(GET, load_get)
6966         OP(MARK, load_mark)
6967         OP(BINPUT, load_binput)
6968         OP(LONG_BINPUT, load_long_binput)
6969         OP(PUT, load_put)
6970         OP(MEMOIZE, load_memoize)
6971         OP(POP, load_pop)
6972         OP(POP_MARK, load_pop_mark)
6973         OP(SETITEM, load_setitem)
6974         OP(SETITEMS, load_setitems)
6975         OP(PERSID, load_persid)
6976         OP(BINPERSID, load_binpersid)
6977         OP(REDUCE, load_reduce)
6978         OP(PROTO, load_proto)
6979         OP(FRAME, load_frame)
6980         OP_ARG(EXT1, load_extension, 1)
6981         OP_ARG(EXT2, load_extension, 2)
6982         OP_ARG(EXT4, load_extension, 4)
6983         OP_ARG(NEWTRUE, load_bool, Py_True)
6984         OP_ARG(NEWFALSE, load_bool, Py_False)
6985 
6986         case STOP:
6987             break;
6988 
6989         default:
6990             {
6991                 PickleState *st = _Pickle_GetGlobalState();
6992                 unsigned char c = (unsigned char) *s;
6993                 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6994                     PyErr_Format(st->UnpicklingError,
6995                                  "invalid load key, '%c'.", c);
6996                 }
6997                 else {
6998                     PyErr_Format(st->UnpicklingError,
6999                                  "invalid load key, '\\x%02x'.", c);
7000                 }
7001                 return NULL;
7002             }
7003         }
7004 
7005         break;                  /* and we are done! */
7006     }
7007 
7008     if (PyErr_Occurred()) {
7009         return NULL;
7010     }
7011 
7012     if (_Unpickler_SkipConsumed(self) < 0)
7013         return NULL;
7014 
7015     PDATA_POP(self->stack, value);
7016     return value;
7017 }
7018 
7019 /*[clinic input]
7020 
7021 _pickle.Unpickler.load
7022 
7023 Load a pickle.
7024 
7025 Read a pickled object representation from the open file object given
7026 in the constructor, and return the reconstituted object hierarchy
7027 specified therein.
7028 [clinic start generated code]*/
7029 
7030 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)7031 _pickle_Unpickler_load_impl(UnpicklerObject *self)
7032 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
7033 {
7034     UnpicklerObject *unpickler = (UnpicklerObject*)self;
7035 
7036     /* Check whether the Unpickler was initialized correctly. This prevents
7037        segfaulting if a subclass overridden __init__ with a function that does
7038        not call Unpickler.__init__(). Here, we simply ensure that self->read
7039        is not NULL. */
7040     if (unpickler->read == NULL) {
7041         PickleState *st = _Pickle_GetGlobalState();
7042         PyErr_Format(st->UnpicklingError,
7043                      "Unpickler.__init__() was not called by %s.__init__()",
7044                      Py_TYPE(unpickler)->tp_name);
7045         return NULL;
7046     }
7047 
7048     return load(unpickler);
7049 }
7050 
7051 /* The name of find_class() is misleading. In newer pickle protocols, this
7052    function is used for loading any global (i.e., functions), not just
7053    classes. The name is kept only for backward compatibility. */
7054 
7055 /*[clinic input]
7056 
7057 _pickle.Unpickler.find_class
7058 
7059   module_name: object
7060   global_name: object
7061   /
7062 
7063 Return an object from a specified module.
7064 
7065 If necessary, the module will be imported. Subclasses may override
7066 this method (e.g. to restrict unpickling of arbitrary classes and
7067 functions).
7068 
7069 This method is called whenever a class or a function object is
7070 needed.  Both arguments passed are str objects.
7071 [clinic start generated code]*/
7072 
7073 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)7074 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7075                                   PyObject *module_name,
7076                                   PyObject *global_name)
7077 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
7078 {
7079     PyObject *global;
7080     PyObject *module;
7081 
7082     if (PySys_Audit("pickle.find_class", "OO",
7083                     module_name, global_name) < 0) {
7084         return NULL;
7085     }
7086 
7087     /* Try to map the old names used in Python 2.x to the new ones used in
7088        Python 3.x.  We do this only with old pickle protocols and when the
7089        user has not disabled the feature. */
7090     if (self->proto < 3 && self->fix_imports) {
7091         PyObject *key;
7092         PyObject *item;
7093         PickleState *st = _Pickle_GetGlobalState();
7094 
7095         /* Check if the global (i.e., a function or a class) was renamed
7096            or moved to another module. */
7097         key = PyTuple_Pack(2, module_name, global_name);
7098         if (key == NULL)
7099             return NULL;
7100         item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
7101         Py_DECREF(key);
7102         if (item) {
7103             if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7104                 PyErr_Format(PyExc_RuntimeError,
7105                              "_compat_pickle.NAME_MAPPING values should be "
7106                              "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7107                 return NULL;
7108             }
7109             module_name = PyTuple_GET_ITEM(item, 0);
7110             global_name = PyTuple_GET_ITEM(item, 1);
7111             if (!PyUnicode_Check(module_name) ||
7112                 !PyUnicode_Check(global_name)) {
7113                 PyErr_Format(PyExc_RuntimeError,
7114                              "_compat_pickle.NAME_MAPPING values should be "
7115                              "pairs of str, not (%.200s, %.200s)",
7116                              Py_TYPE(module_name)->tp_name,
7117                              Py_TYPE(global_name)->tp_name);
7118                 return NULL;
7119             }
7120         }
7121         else if (PyErr_Occurred()) {
7122             return NULL;
7123         }
7124         else {
7125             /* Check if the module was renamed. */
7126             item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7127             if (item) {
7128                 if (!PyUnicode_Check(item)) {
7129                     PyErr_Format(PyExc_RuntimeError,
7130                                 "_compat_pickle.IMPORT_MAPPING values should be "
7131                                 "strings, not %.200s", Py_TYPE(item)->tp_name);
7132                     return NULL;
7133                 }
7134                 module_name = item;
7135             }
7136             else if (PyErr_Occurred()) {
7137                 return NULL;
7138             }
7139         }
7140     }
7141 
7142     /*
7143      * we don't use PyImport_GetModule here, because it can return partially-
7144      * initialised modules, which then cause the getattribute to fail.
7145      */
7146     module = PyImport_Import(module_name);
7147     if (module == NULL) {
7148         return NULL;
7149     }
7150     global = getattribute(module, global_name, self->proto >= 4);
7151     Py_DECREF(module);
7152     return global;
7153 }
7154 
7155 /*[clinic input]
7156 
7157 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
7158 
7159 Returns size in memory, in bytes.
7160 [clinic start generated code]*/
7161 
7162 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)7163 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7164 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7165 {
7166     Py_ssize_t res;
7167 
7168     res = _PyObject_SIZE(Py_TYPE(self));
7169     if (self->memo != NULL)
7170         res += self->memo_size * sizeof(PyObject *);
7171     if (self->marks != NULL)
7172         res += self->marks_size * sizeof(Py_ssize_t);
7173     if (self->input_line != NULL)
7174         res += strlen(self->input_line) + 1;
7175     if (self->encoding != NULL)
7176         res += strlen(self->encoding) + 1;
7177     if (self->errors != NULL)
7178         res += strlen(self->errors) + 1;
7179     return res;
7180 }
7181 
7182 static struct PyMethodDef Unpickler_methods[] = {
7183     _PICKLE_UNPICKLER_LOAD_METHODDEF
7184     _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
7185     _PICKLE_UNPICKLER___SIZEOF___METHODDEF
7186     {NULL, NULL}                /* sentinel */
7187 };
7188 
7189 static void
Unpickler_dealloc(UnpicklerObject * self)7190 Unpickler_dealloc(UnpicklerObject *self)
7191 {
7192     PyObject_GC_UnTrack((PyObject *)self);
7193     Py_XDECREF(self->readline);
7194     Py_XDECREF(self->readinto);
7195     Py_XDECREF(self->read);
7196     Py_XDECREF(self->peek);
7197     Py_XDECREF(self->stack);
7198     Py_XDECREF(self->pers_func);
7199     Py_XDECREF(self->buffers);
7200     if (self->buffer.buf != NULL) {
7201         PyBuffer_Release(&self->buffer);
7202         self->buffer.buf = NULL;
7203     }
7204 
7205     _Unpickler_MemoCleanup(self);
7206     PyMem_Free(self->marks);
7207     PyMem_Free(self->input_line);
7208     PyMem_Free(self->encoding);
7209     PyMem_Free(self->errors);
7210 
7211     Py_TYPE(self)->tp_free((PyObject *)self);
7212 }
7213 
7214 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)7215 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7216 {
7217     Py_VISIT(self->readline);
7218     Py_VISIT(self->readinto);
7219     Py_VISIT(self->read);
7220     Py_VISIT(self->peek);
7221     Py_VISIT(self->stack);
7222     Py_VISIT(self->pers_func);
7223     Py_VISIT(self->buffers);
7224     return 0;
7225 }
7226 
7227 static int
Unpickler_clear(UnpicklerObject * self)7228 Unpickler_clear(UnpicklerObject *self)
7229 {
7230     Py_CLEAR(self->readline);
7231     Py_CLEAR(self->readinto);
7232     Py_CLEAR(self->read);
7233     Py_CLEAR(self->peek);
7234     Py_CLEAR(self->stack);
7235     Py_CLEAR(self->pers_func);
7236     Py_CLEAR(self->buffers);
7237     if (self->buffer.buf != NULL) {
7238         PyBuffer_Release(&self->buffer);
7239         self->buffer.buf = NULL;
7240     }
7241 
7242     _Unpickler_MemoCleanup(self);
7243     PyMem_Free(self->marks);
7244     self->marks = NULL;
7245     PyMem_Free(self->input_line);
7246     self->input_line = NULL;
7247     PyMem_Free(self->encoding);
7248     self->encoding = NULL;
7249     PyMem_Free(self->errors);
7250     self->errors = NULL;
7251 
7252     return 0;
7253 }
7254 
7255 /*[clinic input]
7256 
7257 _pickle.Unpickler.__init__
7258 
7259   file: object
7260   *
7261   fix_imports: bool = True
7262   encoding: str = 'ASCII'
7263   errors: str = 'strict'
7264   buffers: object(c_default="NULL") = ()
7265 
7266 This takes a binary file for reading a pickle data stream.
7267 
7268 The protocol version of the pickle is detected automatically, so no
7269 protocol argument is needed.  Bytes past the pickled object's
7270 representation are ignored.
7271 
7272 The argument *file* must have two methods, a read() method that takes
7273 an integer argument, and a readline() method that requires no
7274 arguments.  Both methods should return bytes.  Thus *file* can be a
7275 binary file object opened for reading, an io.BytesIO object, or any
7276 other custom object that meets this interface.
7277 
7278 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7279 which are used to control compatibility support for pickle stream
7280 generated by Python 2.  If *fix_imports* is True, pickle will try to
7281 map the old Python 2 names to the new names used in Python 3.  The
7282 *encoding* and *errors* tell pickle how to decode 8-bit string
7283 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7284 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7285 string instances as bytes objects.
7286 [clinic start generated code]*/
7287 
7288 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7289 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7290                                 int fix_imports, const char *encoding,
7291                                 const char *errors, PyObject *buffers)
7292 /*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
7293 {
7294     _Py_IDENTIFIER(persistent_load);
7295 
7296     /* In case of multiple __init__() calls, clear previous content. */
7297     if (self->read != NULL)
7298         (void)Unpickler_clear(self);
7299 
7300     if (_Unpickler_SetInputStream(self, file) < 0)
7301         return -1;
7302 
7303     if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
7304         return -1;
7305 
7306     if (_Unpickler_SetBuffers(self, buffers) < 0)
7307         return -1;
7308 
7309     self->fix_imports = fix_imports;
7310 
7311     if (init_method_ref((PyObject *)self, &PyId_persistent_load,
7312                         &self->pers_func, &self->pers_func_self) < 0)
7313     {
7314         return -1;
7315     }
7316 
7317     self->stack = (Pdata *)Pdata_New();
7318     if (self->stack == NULL)
7319         return -1;
7320 
7321     self->memo_size = 32;
7322     self->memo = _Unpickler_NewMemo(self->memo_size);
7323     if (self->memo == NULL)
7324         return -1;
7325 
7326     self->proto = 0;
7327 
7328     return 0;
7329 }
7330 
7331 
7332 /* Define a proxy object for the Unpickler's internal memo object. This is to
7333  * avoid breaking code like:
7334  *  unpickler.memo.clear()
7335  * and
7336  *  unpickler.memo = saved_memo
7337  * Is this a good idea? Not really, but we don't want to break code that uses
7338  * it. Note that we don't implement the entire mapping API here. This is
7339  * intentional, as these should be treated as black-box implementation details.
7340  *
7341  * We do, however, have to implement pickling/unpickling support because of
7342  * real-world code like cvs2svn.
7343  */
7344 
7345 /*[clinic input]
7346 _pickle.UnpicklerMemoProxy.clear
7347 
7348 Remove all items from memo.
7349 [clinic start generated code]*/
7350 
7351 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)7352 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
7353 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
7354 {
7355     _Unpickler_MemoCleanup(self->unpickler);
7356     self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7357     if (self->unpickler->memo == NULL)
7358         return NULL;
7359     Py_RETURN_NONE;
7360 }
7361 
7362 /*[clinic input]
7363 _pickle.UnpicklerMemoProxy.copy
7364 
7365 Copy the memo to a new object.
7366 [clinic start generated code]*/
7367 
7368 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)7369 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
7370 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
7371 {
7372     size_t i;
7373     PyObject *new_memo = PyDict_New();
7374     if (new_memo == NULL)
7375         return NULL;
7376 
7377     for (i = 0; i < self->unpickler->memo_size; i++) {
7378         int status;
7379         PyObject *key, *value;
7380 
7381         value = self->unpickler->memo[i];
7382         if (value == NULL)
7383             continue;
7384 
7385         key = PyLong_FromSsize_t(i);
7386         if (key == NULL)
7387             goto error;
7388         status = PyDict_SetItem(new_memo, key, value);
7389         Py_DECREF(key);
7390         if (status < 0)
7391             goto error;
7392     }
7393     return new_memo;
7394 
7395 error:
7396     Py_DECREF(new_memo);
7397     return NULL;
7398 }
7399 
7400 /*[clinic input]
7401 _pickle.UnpicklerMemoProxy.__reduce__
7402 
7403 Implement pickling support.
7404 [clinic start generated code]*/
7405 
7406 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)7407 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
7408 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
7409 {
7410     PyObject *reduce_value;
7411     PyObject *constructor_args;
7412     PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
7413     if (contents == NULL)
7414         return NULL;
7415 
7416     reduce_value = PyTuple_New(2);
7417     if (reduce_value == NULL) {
7418         Py_DECREF(contents);
7419         return NULL;
7420     }
7421     constructor_args = PyTuple_New(1);
7422     if (constructor_args == NULL) {
7423         Py_DECREF(contents);
7424         Py_DECREF(reduce_value);
7425         return NULL;
7426     }
7427     PyTuple_SET_ITEM(constructor_args, 0, contents);
7428     Py_INCREF((PyObject *)&PyDict_Type);
7429     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7430     PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7431     return reduce_value;
7432 }
7433 
7434 static PyMethodDef unpicklerproxy_methods[] = {
7435     _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7436     _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7437     _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
7438     {NULL, NULL}    /* sentinel */
7439 };
7440 
7441 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)7442 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7443 {
7444     PyObject_GC_UnTrack(self);
7445     Py_XDECREF(self->unpickler);
7446     PyObject_GC_Del((PyObject *)self);
7447 }
7448 
7449 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)7450 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7451                             visitproc visit, void *arg)
7452 {
7453     Py_VISIT(self->unpickler);
7454     return 0;
7455 }
7456 
7457 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)7458 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7459 {
7460     Py_CLEAR(self->unpickler);
7461     return 0;
7462 }
7463 
7464 static PyTypeObject UnpicklerMemoProxyType = {
7465     PyVarObject_HEAD_INIT(NULL, 0)
7466     "_pickle.UnpicklerMemoProxy",               /*tp_name*/
7467     sizeof(UnpicklerMemoProxyObject),           /*tp_basicsize*/
7468     0,
7469     (destructor)UnpicklerMemoProxy_dealloc,     /* tp_dealloc */
7470     0,                                          /* tp_vectorcall_offset */
7471     0,                                          /* tp_getattr */
7472     0,                                          /* tp_setattr */
7473     0,                                          /* tp_as_async */
7474     0,                                          /* tp_repr */
7475     0,                                          /* tp_as_number */
7476     0,                                          /* tp_as_sequence */
7477     0,                                          /* tp_as_mapping */
7478     PyObject_HashNotImplemented,                /* tp_hash */
7479     0,                                          /* tp_call */
7480     0,                                          /* tp_str */
7481     PyObject_GenericGetAttr,                    /* tp_getattro */
7482     PyObject_GenericSetAttr,                    /* tp_setattro */
7483     0,                                          /* tp_as_buffer */
7484     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7485     0,                                          /* tp_doc */
7486     (traverseproc)UnpicklerMemoProxy_traverse,  /* tp_traverse */
7487     (inquiry)UnpicklerMemoProxy_clear,          /* tp_clear */
7488     0,                                          /* tp_richcompare */
7489     0,                                          /* tp_weaklistoffset */
7490     0,                                          /* tp_iter */
7491     0,                                          /* tp_iternext */
7492     unpicklerproxy_methods,                     /* tp_methods */
7493 };
7494 
7495 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7496 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7497 {
7498     UnpicklerMemoProxyObject *self;
7499 
7500     self = PyObject_GC_New(UnpicklerMemoProxyObject,
7501                            &UnpicklerMemoProxyType);
7502     if (self == NULL)
7503         return NULL;
7504     Py_INCREF(unpickler);
7505     self->unpickler = unpickler;
7506     PyObject_GC_Track(self);
7507     return (PyObject *)self;
7508 }
7509 
7510 /*****************************************************************************/
7511 
7512 
7513 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7514 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7515 {
7516     return UnpicklerMemoProxy_New(self);
7517 }
7518 
7519 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7520 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7521 {
7522     PyObject **new_memo;
7523     size_t new_memo_size = 0;
7524 
7525     if (obj == NULL) {
7526         PyErr_SetString(PyExc_TypeError,
7527                         "attribute deletion is not supported");
7528         return -1;
7529     }
7530 
7531     if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
7532         UnpicklerObject *unpickler =
7533             ((UnpicklerMemoProxyObject *)obj)->unpickler;
7534 
7535         new_memo_size = unpickler->memo_size;
7536         new_memo = _Unpickler_NewMemo(new_memo_size);
7537         if (new_memo == NULL)
7538             return -1;
7539 
7540         for (size_t i = 0; i < new_memo_size; i++) {
7541             Py_XINCREF(unpickler->memo[i]);
7542             new_memo[i] = unpickler->memo[i];
7543         }
7544     }
7545     else if (PyDict_Check(obj)) {
7546         Py_ssize_t i = 0;
7547         PyObject *key, *value;
7548 
7549         new_memo_size = PyDict_GET_SIZE(obj);
7550         new_memo = _Unpickler_NewMemo(new_memo_size);
7551         if (new_memo == NULL)
7552             return -1;
7553 
7554         while (PyDict_Next(obj, &i, &key, &value)) {
7555             Py_ssize_t idx;
7556             if (!PyLong_Check(key)) {
7557                 PyErr_SetString(PyExc_TypeError,
7558                                 "memo key must be integers");
7559                 goto error;
7560             }
7561             idx = PyLong_AsSsize_t(key);
7562             if (idx == -1 && PyErr_Occurred())
7563                 goto error;
7564             if (idx < 0) {
7565                 PyErr_SetString(PyExc_ValueError,
7566                                 "memo key must be positive integers.");
7567                 goto error;
7568             }
7569             if (_Unpickler_MemoPut(self, idx, value) < 0)
7570                 goto error;
7571         }
7572     }
7573     else {
7574         PyErr_Format(PyExc_TypeError,
7575                      "'memo' attribute must be an UnpicklerMemoProxy object "
7576                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7577         return -1;
7578     }
7579 
7580     _Unpickler_MemoCleanup(self);
7581     self->memo_size = new_memo_size;
7582     self->memo = new_memo;
7583 
7584     return 0;
7585 
7586   error:
7587     if (new_memo_size) {
7588         for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7589             Py_XDECREF(new_memo[i]);
7590         }
7591         PyMem_FREE(new_memo);
7592     }
7593     return -1;
7594 }
7595 
7596 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7597 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7598 {
7599     if (self->pers_func == NULL) {
7600         PyErr_SetString(PyExc_AttributeError, "persistent_load");
7601         return NULL;
7602     }
7603     return reconstruct_method(self->pers_func, self->pers_func_self);
7604 }
7605 
7606 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7607 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7608 {
7609     if (value == NULL) {
7610         PyErr_SetString(PyExc_TypeError,
7611                         "attribute deletion is not supported");
7612         return -1;
7613     }
7614     if (!PyCallable_Check(value)) {
7615         PyErr_SetString(PyExc_TypeError,
7616                         "persistent_load must be a callable taking "
7617                         "one argument");
7618         return -1;
7619     }
7620 
7621     self->pers_func_self = NULL;
7622     Py_INCREF(value);
7623     Py_XSETREF(self->pers_func, value);
7624 
7625     return 0;
7626 }
7627 
7628 static PyGetSetDef Unpickler_getsets[] = {
7629     {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7630     {"persistent_load", (getter)Unpickler_get_persload,
7631                         (setter)Unpickler_set_persload},
7632     {NULL}
7633 };
7634 
7635 static PyTypeObject Unpickler_Type = {
7636     PyVarObject_HEAD_INIT(NULL, 0)
7637     "_pickle.Unpickler",                /*tp_name*/
7638     sizeof(UnpicklerObject),            /*tp_basicsize*/
7639     0,                                  /*tp_itemsize*/
7640     (destructor)Unpickler_dealloc,      /*tp_dealloc*/
7641     0,                                  /*tp_vectorcall_offset*/
7642     0,                                  /*tp_getattr*/
7643     0,                                  /*tp_setattr*/
7644     0,                                  /*tp_as_async*/
7645     0,                                  /*tp_repr*/
7646     0,                                  /*tp_as_number*/
7647     0,                                  /*tp_as_sequence*/
7648     0,                                  /*tp_as_mapping*/
7649     0,                                  /*tp_hash*/
7650     0,                                  /*tp_call*/
7651     0,                                  /*tp_str*/
7652     0,                                  /*tp_getattro*/
7653     0,                                  /*tp_setattro*/
7654     0,                                  /*tp_as_buffer*/
7655     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7656     _pickle_Unpickler___init____doc__,  /*tp_doc*/
7657     (traverseproc)Unpickler_traverse,   /*tp_traverse*/
7658     (inquiry)Unpickler_clear,           /*tp_clear*/
7659     0,                                  /*tp_richcompare*/
7660     0,                                  /*tp_weaklistoffset*/
7661     0,                                  /*tp_iter*/
7662     0,                                  /*tp_iternext*/
7663     Unpickler_methods,                  /*tp_methods*/
7664     0,                                  /*tp_members*/
7665     Unpickler_getsets,                  /*tp_getset*/
7666     0,                                  /*tp_base*/
7667     0,                                  /*tp_dict*/
7668     0,                                  /*tp_descr_get*/
7669     0,                                  /*tp_descr_set*/
7670     0,                                  /*tp_dictoffset*/
7671     _pickle_Unpickler___init__,         /*tp_init*/
7672     PyType_GenericAlloc,                /*tp_alloc*/
7673     PyType_GenericNew,                  /*tp_new*/
7674     PyObject_GC_Del,                    /*tp_free*/
7675     0,                                  /*tp_is_gc*/
7676 };
7677 
7678 /*[clinic input]
7679 
7680 _pickle.dump
7681 
7682   obj: object
7683   file: object
7684   protocol: object = None
7685   *
7686   fix_imports: bool = True
7687   buffer_callback: object = None
7688 
7689 Write a pickled representation of obj to the open file object file.
7690 
7691 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7692 be more efficient.
7693 
7694 The optional *protocol* argument tells the pickler to use the given
7695 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
7696 protocol is 4. It was introduced in Python 3.4, and is incompatible
7697 with previous versions.
7698 
7699 Specifying a negative protocol version selects the highest protocol
7700 version supported.  The higher the protocol used, the more recent the
7701 version of Python needed to read the pickle produced.
7702 
7703 The *file* argument must have a write() method that accepts a single
7704 bytes argument.  It can thus be a file object opened for binary
7705 writing, an io.BytesIO instance, or any other custom object that meets
7706 this interface.
7707 
7708 If *fix_imports* is True and protocol is less than 3, pickle will try
7709 to map the new Python 3 names to the old module names used in Python
7710 2, so that the pickle data stream is readable with Python 2.
7711 
7712 If *buffer_callback* is None (the default), buffer views are serialized
7713 into *file* as part of the pickle stream.  It is an error if
7714 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7715 
7716 [clinic start generated code]*/
7717 
7718 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7719 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7720                   PyObject *protocol, int fix_imports,
7721                   PyObject *buffer_callback)
7722 /*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
7723 {
7724     PicklerObject *pickler = _Pickler_New();
7725 
7726     if (pickler == NULL)
7727         return NULL;
7728 
7729     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7730         goto error;
7731 
7732     if (_Pickler_SetOutputStream(pickler, file) < 0)
7733         goto error;
7734 
7735     if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7736         goto error;
7737 
7738     if (dump(pickler, obj) < 0)
7739         goto error;
7740 
7741     if (_Pickler_FlushToFile(pickler) < 0)
7742         goto error;
7743 
7744     Py_DECREF(pickler);
7745     Py_RETURN_NONE;
7746 
7747   error:
7748     Py_XDECREF(pickler);
7749     return NULL;
7750 }
7751 
7752 /*[clinic input]
7753 
7754 _pickle.dumps
7755 
7756   obj: object
7757   protocol: object = None
7758   *
7759   fix_imports: bool = True
7760   buffer_callback: object = None
7761 
7762 Return the pickled representation of the object as a bytes object.
7763 
7764 The optional *protocol* argument tells the pickler to use the given
7765 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
7766 protocol is 4. It was introduced in Python 3.4, and is incompatible
7767 with previous versions.
7768 
7769 Specifying a negative protocol version selects the highest protocol
7770 version supported.  The higher the protocol used, the more recent the
7771 version of Python needed to read the pickle produced.
7772 
7773 If *fix_imports* is True and *protocol* is less than 3, pickle will
7774 try to map the new Python 3 names to the old module names used in
7775 Python 2, so that the pickle data stream is readable with Python 2.
7776 
7777 If *buffer_callback* is None (the default), buffer views are serialized
7778 into *file* as part of the pickle stream.  It is an error if
7779 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7780 
7781 [clinic start generated code]*/
7782 
7783 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7784 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7785                    int fix_imports, PyObject *buffer_callback)
7786 /*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
7787 {
7788     PyObject *result;
7789     PicklerObject *pickler = _Pickler_New();
7790 
7791     if (pickler == NULL)
7792         return NULL;
7793 
7794     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7795         goto error;
7796 
7797     if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7798         goto error;
7799 
7800     if (dump(pickler, obj) < 0)
7801         goto error;
7802 
7803     result = _Pickler_GetString(pickler);
7804     Py_DECREF(pickler);
7805     return result;
7806 
7807   error:
7808     Py_XDECREF(pickler);
7809     return NULL;
7810 }
7811 
7812 /*[clinic input]
7813 
7814 _pickle.load
7815 
7816   file: object
7817   *
7818   fix_imports: bool = True
7819   encoding: str = 'ASCII'
7820   errors: str = 'strict'
7821   buffers: object(c_default="NULL") = ()
7822 
7823 Read and return an object from the pickle data stored in a file.
7824 
7825 This is equivalent to ``Unpickler(file).load()``, but may be more
7826 efficient.
7827 
7828 The protocol version of the pickle is detected automatically, so no
7829 protocol argument is needed.  Bytes past the pickled object's
7830 representation are ignored.
7831 
7832 The argument *file* must have two methods, a read() method that takes
7833 an integer argument, and a readline() method that requires no
7834 arguments.  Both methods should return bytes.  Thus *file* can be a
7835 binary file object opened for reading, an io.BytesIO object, or any
7836 other custom object that meets this interface.
7837 
7838 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7839 which are used to control compatibility support for pickle stream
7840 generated by Python 2.  If *fix_imports* is True, pickle will try to
7841 map the old Python 2 names to the new names used in Python 3.  The
7842 *encoding* and *errors* tell pickle how to decode 8-bit string
7843 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7844 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7845 string instances as bytes objects.
7846 [clinic start generated code]*/
7847 
7848 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7849 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7850                   const char *encoding, const char *errors,
7851                   PyObject *buffers)
7852 /*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
7853 {
7854     PyObject *result;
7855     UnpicklerObject *unpickler = _Unpickler_New();
7856 
7857     if (unpickler == NULL)
7858         return NULL;
7859 
7860     if (_Unpickler_SetInputStream(unpickler, file) < 0)
7861         goto error;
7862 
7863     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7864         goto error;
7865 
7866     if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7867         goto error;
7868 
7869     unpickler->fix_imports = fix_imports;
7870 
7871     result = load(unpickler);
7872     Py_DECREF(unpickler);
7873     return result;
7874 
7875   error:
7876     Py_XDECREF(unpickler);
7877     return NULL;
7878 }
7879 
7880 /*[clinic input]
7881 
7882 _pickle.loads
7883 
7884   data: object
7885   *
7886   fix_imports: bool = True
7887   encoding: str = 'ASCII'
7888   errors: str = 'strict'
7889   buffers: object(c_default="NULL") = ()
7890 
7891 Read and return an object from the given pickle data.
7892 
7893 The protocol version of the pickle is detected automatically, so no
7894 protocol argument is needed.  Bytes past the pickled object's
7895 representation are ignored.
7896 
7897 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7898 which are used to control compatibility support for pickle stream
7899 generated by Python 2.  If *fix_imports* is True, pickle will try to
7900 map the old Python 2 names to the new names used in Python 3.  The
7901 *encoding* and *errors* tell pickle how to decode 8-bit string
7902 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7903 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7904 string instances as bytes objects.
7905 [clinic start generated code]*/
7906 
7907 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7908 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7909                    const char *encoding, const char *errors,
7910                    PyObject *buffers)
7911 /*[clinic end generated code: output=82ac1e6b588e6d02 input=9c2ab6a0960185ea]*/
7912 {
7913     PyObject *result;
7914     UnpicklerObject *unpickler = _Unpickler_New();
7915 
7916     if (unpickler == NULL)
7917         return NULL;
7918 
7919     if (_Unpickler_SetStringInput(unpickler, data) < 0)
7920         goto error;
7921 
7922     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7923         goto error;
7924 
7925     if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7926         goto error;
7927 
7928     unpickler->fix_imports = fix_imports;
7929 
7930     result = load(unpickler);
7931     Py_DECREF(unpickler);
7932     return result;
7933 
7934   error:
7935     Py_XDECREF(unpickler);
7936     return NULL;
7937 }
7938 
7939 static struct PyMethodDef pickle_methods[] = {
7940     _PICKLE_DUMP_METHODDEF
7941     _PICKLE_DUMPS_METHODDEF
7942     _PICKLE_LOAD_METHODDEF
7943     _PICKLE_LOADS_METHODDEF
7944     {NULL, NULL} /* sentinel */
7945 };
7946 
7947 static int
pickle_clear(PyObject * m)7948 pickle_clear(PyObject *m)
7949 {
7950     _Pickle_ClearState(_Pickle_GetState(m));
7951     return 0;
7952 }
7953 
7954 static void
pickle_free(PyObject * m)7955 pickle_free(PyObject *m)
7956 {
7957     _Pickle_ClearState(_Pickle_GetState(m));
7958 }
7959 
7960 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7961 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7962 {
7963     PickleState *st = _Pickle_GetState(m);
7964     Py_VISIT(st->PickleError);
7965     Py_VISIT(st->PicklingError);
7966     Py_VISIT(st->UnpicklingError);
7967     Py_VISIT(st->dispatch_table);
7968     Py_VISIT(st->extension_registry);
7969     Py_VISIT(st->extension_cache);
7970     Py_VISIT(st->inverted_registry);
7971     Py_VISIT(st->name_mapping_2to3);
7972     Py_VISIT(st->import_mapping_2to3);
7973     Py_VISIT(st->name_mapping_3to2);
7974     Py_VISIT(st->import_mapping_3to2);
7975     Py_VISIT(st->codecs_encode);
7976     Py_VISIT(st->getattr);
7977     Py_VISIT(st->partial);
7978     return 0;
7979 }
7980 
7981 static struct PyModuleDef _picklemodule = {
7982     PyModuleDef_HEAD_INIT,
7983     "_pickle",            /* m_name */
7984     pickle_module_doc,    /* m_doc */
7985     sizeof(PickleState),  /* m_size */
7986     pickle_methods,       /* m_methods */
7987     NULL,                 /* m_reload */
7988     pickle_traverse,      /* m_traverse */
7989     pickle_clear,         /* m_clear */
7990     (freefunc)pickle_free /* m_free */
7991 };
7992 
7993 PyMODINIT_FUNC
PyInit__pickle(void)7994 PyInit__pickle(void)
7995 {
7996     PyObject *m;
7997     PickleState *st;
7998 
7999     m = PyState_FindModule(&_picklemodule);
8000     if (m) {
8001         Py_INCREF(m);
8002         return m;
8003     }
8004 
8005     if (PyType_Ready(&Unpickler_Type) < 0)
8006         return NULL;
8007     if (PyType_Ready(&Pickler_Type) < 0)
8008         return NULL;
8009     if (PyType_Ready(&Pdata_Type) < 0)
8010         return NULL;
8011     if (PyType_Ready(&PicklerMemoProxyType) < 0)
8012         return NULL;
8013     if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
8014         return NULL;
8015 
8016     /* Create the module and add the functions. */
8017     m = PyModule_Create(&_picklemodule);
8018     if (m == NULL)
8019         return NULL;
8020 
8021     /* Add types */
8022     Py_INCREF(&Pickler_Type);
8023     if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
8024         return NULL;
8025     Py_INCREF(&Unpickler_Type);
8026     if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
8027         return NULL;
8028     Py_INCREF(&PyPickleBuffer_Type);
8029     if (PyModule_AddObject(m, "PickleBuffer",
8030                            (PyObject *)&PyPickleBuffer_Type) < 0)
8031         return NULL;
8032 
8033     st = _Pickle_GetState(m);
8034 
8035     /* Initialize the exceptions. */
8036     st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
8037     if (st->PickleError == NULL)
8038         return NULL;
8039     st->PicklingError = \
8040         PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
8041     if (st->PicklingError == NULL)
8042         return NULL;
8043     st->UnpicklingError = \
8044         PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
8045     if (st->UnpicklingError == NULL)
8046         return NULL;
8047 
8048     Py_INCREF(st->PickleError);
8049     if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
8050         return NULL;
8051     Py_INCREF(st->PicklingError);
8052     if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
8053         return NULL;
8054     Py_INCREF(st->UnpicklingError);
8055     if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
8056         return NULL;
8057 
8058     if (_Pickle_InitState(st) < 0)
8059         return NULL;
8060 
8061     return m;
8062 }
8063