1
2 /* Core extension modules are built-in on some platforms (e.g. Windows). */
3 #ifdef Py_BUILD_CORE
4 #define Py_BUILD_CORE_BUILTIN
5 #undef Py_BUILD_CORE
6 #endif
7
8 #include "Python.h"
9 #include "structmember.h"
10
11 PyDoc_STRVAR(pickle_module_doc,
12 "Optimized C implementation for the Python pickle module.");
13
14 /*[clinic input]
15 module _pickle
16 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
17 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
18 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
19 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
20 [clinic start generated code]*/
21 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
22
23 /* Bump this when new opcodes are added to the pickle protocol. */
24 enum {
25 HIGHEST_PROTOCOL = 4,
26 DEFAULT_PROTOCOL = 3
27 };
28
29 /* Pickle opcodes. These must be kept updated with pickle.py.
30 Extensive docs are in pickletools.py. */
31 enum opcode {
32 MARK = '(',
33 STOP = '.',
34 POP = '0',
35 POP_MARK = '1',
36 DUP = '2',
37 FLOAT = 'F',
38 INT = 'I',
39 BININT = 'J',
40 BININT1 = 'K',
41 LONG = 'L',
42 BININT2 = 'M',
43 NONE = 'N',
44 PERSID = 'P',
45 BINPERSID = 'Q',
46 REDUCE = 'R',
47 STRING = 'S',
48 BINSTRING = 'T',
49 SHORT_BINSTRING = 'U',
50 UNICODE = 'V',
51 BINUNICODE = 'X',
52 APPEND = 'a',
53 BUILD = 'b',
54 GLOBAL = 'c',
55 DICT = 'd',
56 EMPTY_DICT = '}',
57 APPENDS = 'e',
58 GET = 'g',
59 BINGET = 'h',
60 INST = 'i',
61 LONG_BINGET = 'j',
62 LIST = 'l',
63 EMPTY_LIST = ']',
64 OBJ = 'o',
65 PUT = 'p',
66 BINPUT = 'q',
67 LONG_BINPUT = 'r',
68 SETITEM = 's',
69 TUPLE = 't',
70 EMPTY_TUPLE = ')',
71 SETITEMS = 'u',
72 BINFLOAT = 'G',
73
74 /* Protocol 2. */
75 PROTO = '\x80',
76 NEWOBJ = '\x81',
77 EXT1 = '\x82',
78 EXT2 = '\x83',
79 EXT4 = '\x84',
80 TUPLE1 = '\x85',
81 TUPLE2 = '\x86',
82 TUPLE3 = '\x87',
83 NEWTRUE = '\x88',
84 NEWFALSE = '\x89',
85 LONG1 = '\x8a',
86 LONG4 = '\x8b',
87
88 /* Protocol 3 (Python 3.x) */
89 BINBYTES = 'B',
90 SHORT_BINBYTES = 'C',
91
92 /* Protocol 4 */
93 SHORT_BINUNICODE = '\x8c',
94 BINUNICODE8 = '\x8d',
95 BINBYTES8 = '\x8e',
96 EMPTY_SET = '\x8f',
97 ADDITEMS = '\x90',
98 FROZENSET = '\x91',
99 NEWOBJ_EX = '\x92',
100 STACK_GLOBAL = '\x93',
101 MEMOIZE = '\x94',
102 FRAME = '\x95'
103 };
104
105 enum {
106 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
107 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
108 break if this gets out of synch with pickle.py, but it's unclear that would
109 help anything either. */
110 BATCHSIZE = 1000,
111
112 /* Nesting limit until Pickler, when running in "fast mode", starts
113 checking for self-referential data-structures. */
114 FAST_NESTING_LIMIT = 50,
115
116 /* Initial size of the write buffer of Pickler. */
117 WRITE_BUF_SIZE = 4096,
118
119 /* Prefetch size when unpickling (disabled on unpeekable streams) */
120 PREFETCH = 8192 * 16,
121
122 FRAME_SIZE_MIN = 4,
123 FRAME_SIZE_TARGET = 64 * 1024,
124 FRAME_HEADER_SIZE = 9
125 };
126
127 /*************************************************************************/
128
129 /* State of the pickle module, per PEP 3121. */
130 typedef struct {
131 /* Exception classes for pickle. */
132 PyObject *PickleError;
133 PyObject *PicklingError;
134 PyObject *UnpicklingError;
135
136 /* copyreg.dispatch_table, {type_object: pickling_function} */
137 PyObject *dispatch_table;
138
139 /* For the extension opcodes EXT1, EXT2 and EXT4. */
140
141 /* copyreg._extension_registry, {(module_name, function_name): code} */
142 PyObject *extension_registry;
143 /* copyreg._extension_cache, {code: object} */
144 PyObject *extension_cache;
145 /* copyreg._inverted_registry, {code: (module_name, function_name)} */
146 PyObject *inverted_registry;
147
148 /* Import mappings for compatibility with Python 2.x */
149
150 /* _compat_pickle.NAME_MAPPING,
151 {(oldmodule, oldname): (newmodule, newname)} */
152 PyObject *name_mapping_2to3;
153 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
154 PyObject *import_mapping_2to3;
155 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
156 PyObject *name_mapping_3to2;
157 PyObject *import_mapping_3to2;
158
159 /* codecs.encode, used for saving bytes in older protocols */
160 PyObject *codecs_encode;
161 /* builtins.getattr, used for saving nested names with protocol < 4 */
162 PyObject *getattr;
163 /* functools.partial, used for implementing __newobj_ex__ with protocols
164 2 and 3 */
165 PyObject *partial;
166 } PickleState;
167
168 /* Forward declaration of the _pickle module definition. */
169 static struct PyModuleDef _picklemodule;
170
171 /* Given a module object, get its per-module state. */
172 static PickleState *
_Pickle_GetState(PyObject * module)173 _Pickle_GetState(PyObject *module)
174 {
175 return (PickleState *)PyModule_GetState(module);
176 }
177
178 /* Find the module instance imported in the currently running sub-interpreter
179 and get its state. */
180 static PickleState *
_Pickle_GetGlobalState(void)181 _Pickle_GetGlobalState(void)
182 {
183 return _Pickle_GetState(PyState_FindModule(&_picklemodule));
184 }
185
186 /* Clear the given pickle module state. */
187 static void
_Pickle_ClearState(PickleState * st)188 _Pickle_ClearState(PickleState *st)
189 {
190 Py_CLEAR(st->PickleError);
191 Py_CLEAR(st->PicklingError);
192 Py_CLEAR(st->UnpicklingError);
193 Py_CLEAR(st->dispatch_table);
194 Py_CLEAR(st->extension_registry);
195 Py_CLEAR(st->extension_cache);
196 Py_CLEAR(st->inverted_registry);
197 Py_CLEAR(st->name_mapping_2to3);
198 Py_CLEAR(st->import_mapping_2to3);
199 Py_CLEAR(st->name_mapping_3to2);
200 Py_CLEAR(st->import_mapping_3to2);
201 Py_CLEAR(st->codecs_encode);
202 Py_CLEAR(st->getattr);
203 Py_CLEAR(st->partial);
204 }
205
206 /* Initialize the given pickle module state. */
207 static int
_Pickle_InitState(PickleState * st)208 _Pickle_InitState(PickleState *st)
209 {
210 PyObject *copyreg = NULL;
211 PyObject *compat_pickle = NULL;
212 PyObject *codecs = NULL;
213 PyObject *functools = NULL;
214 _Py_IDENTIFIER(getattr);
215
216 st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
217 if (st->getattr == NULL)
218 goto error;
219
220 copyreg = PyImport_ImportModule("copyreg");
221 if (!copyreg)
222 goto error;
223 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
224 if (!st->dispatch_table)
225 goto error;
226 if (!PyDict_CheckExact(st->dispatch_table)) {
227 PyErr_Format(PyExc_RuntimeError,
228 "copyreg.dispatch_table should be a dict, not %.200s",
229 Py_TYPE(st->dispatch_table)->tp_name);
230 goto error;
231 }
232 st->extension_registry = \
233 PyObject_GetAttrString(copyreg, "_extension_registry");
234 if (!st->extension_registry)
235 goto error;
236 if (!PyDict_CheckExact(st->extension_registry)) {
237 PyErr_Format(PyExc_RuntimeError,
238 "copyreg._extension_registry should be a dict, "
239 "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
240 goto error;
241 }
242 st->inverted_registry = \
243 PyObject_GetAttrString(copyreg, "_inverted_registry");
244 if (!st->inverted_registry)
245 goto error;
246 if (!PyDict_CheckExact(st->inverted_registry)) {
247 PyErr_Format(PyExc_RuntimeError,
248 "copyreg._inverted_registry should be a dict, "
249 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
250 goto error;
251 }
252 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
253 if (!st->extension_cache)
254 goto error;
255 if (!PyDict_CheckExact(st->extension_cache)) {
256 PyErr_Format(PyExc_RuntimeError,
257 "copyreg._extension_cache should be a dict, "
258 "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
259 goto error;
260 }
261 Py_CLEAR(copyreg);
262
263 /* Load the 2.x -> 3.x stdlib module mapping tables */
264 compat_pickle = PyImport_ImportModule("_compat_pickle");
265 if (!compat_pickle)
266 goto error;
267 st->name_mapping_2to3 = \
268 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
269 if (!st->name_mapping_2to3)
270 goto error;
271 if (!PyDict_CheckExact(st->name_mapping_2to3)) {
272 PyErr_Format(PyExc_RuntimeError,
273 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
274 Py_TYPE(st->name_mapping_2to3)->tp_name);
275 goto error;
276 }
277 st->import_mapping_2to3 = \
278 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
279 if (!st->import_mapping_2to3)
280 goto error;
281 if (!PyDict_CheckExact(st->import_mapping_2to3)) {
282 PyErr_Format(PyExc_RuntimeError,
283 "_compat_pickle.IMPORT_MAPPING should be a dict, "
284 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
285 goto error;
286 }
287 /* ... and the 3.x -> 2.x mapping tables */
288 st->name_mapping_3to2 = \
289 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
290 if (!st->name_mapping_3to2)
291 goto error;
292 if (!PyDict_CheckExact(st->name_mapping_3to2)) {
293 PyErr_Format(PyExc_RuntimeError,
294 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
295 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
296 goto error;
297 }
298 st->import_mapping_3to2 = \
299 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
300 if (!st->import_mapping_3to2)
301 goto error;
302 if (!PyDict_CheckExact(st->import_mapping_3to2)) {
303 PyErr_Format(PyExc_RuntimeError,
304 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
305 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
306 goto error;
307 }
308 Py_CLEAR(compat_pickle);
309
310 codecs = PyImport_ImportModule("codecs");
311 if (codecs == NULL)
312 goto error;
313 st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
314 if (st->codecs_encode == NULL) {
315 goto error;
316 }
317 if (!PyCallable_Check(st->codecs_encode)) {
318 PyErr_Format(PyExc_RuntimeError,
319 "codecs.encode should be a callable, not %.200s",
320 Py_TYPE(st->codecs_encode)->tp_name);
321 goto error;
322 }
323 Py_CLEAR(codecs);
324
325 functools = PyImport_ImportModule("functools");
326 if (!functools)
327 goto error;
328 st->partial = PyObject_GetAttrString(functools, "partial");
329 if (!st->partial)
330 goto error;
331 Py_CLEAR(functools);
332
333 return 0;
334
335 error:
336 Py_CLEAR(copyreg);
337 Py_CLEAR(compat_pickle);
338 Py_CLEAR(codecs);
339 Py_CLEAR(functools);
340 _Pickle_ClearState(st);
341 return -1;
342 }
343
344 /* Helper for calling a function with a single argument quickly.
345
346 This function steals the reference of the given argument. */
347 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)348 _Pickle_FastCall(PyObject *func, PyObject *obj)
349 {
350 PyObject *result;
351
352 result = PyObject_CallFunctionObjArgs(func, obj, NULL);
353 Py_DECREF(obj);
354 return result;
355 }
356
357 /*************************************************************************/
358
359 /* Retrieve and deconstruct a method for avoiding a reference cycle
360 (pickler -> bound method of pickler -> pickler) */
361 static int
init_method_ref(PyObject * self,_Py_Identifier * name,PyObject ** method_func,PyObject ** method_self)362 init_method_ref(PyObject *self, _Py_Identifier *name,
363 PyObject **method_func, PyObject **method_self)
364 {
365 PyObject *func, *func2;
366 int ret;
367
368 /* *method_func and *method_self should be consistent. All refcount decrements
369 should be occurred after setting *method_self and *method_func. */
370 ret = _PyObject_LookupAttrId(self, name, &func);
371 if (func == NULL) {
372 *method_self = NULL;
373 Py_CLEAR(*method_func);
374 return ret;
375 }
376
377 if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
378 /* Deconstruct a bound Python method */
379 func2 = PyMethod_GET_FUNCTION(func);
380 Py_INCREF(func2);
381 *method_self = self; /* borrowed */
382 Py_XSETREF(*method_func, func2);
383 Py_DECREF(func);
384 return 0;
385 }
386 else {
387 *method_self = NULL;
388 Py_XSETREF(*method_func, func);
389 return 0;
390 }
391 }
392
393 /* Bind a method if it was deconstructed */
394 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)395 reconstruct_method(PyObject *func, PyObject *self)
396 {
397 if (self) {
398 return PyMethod_New(func, self);
399 }
400 else {
401 Py_INCREF(func);
402 return func;
403 }
404 }
405
406 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)407 call_method(PyObject *func, PyObject *self, PyObject *obj)
408 {
409 if (self) {
410 return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
411 }
412 else {
413 return PyObject_CallFunctionObjArgs(func, obj, NULL);
414 }
415 }
416
417 /*************************************************************************/
418
419 /* Internal data type used as the unpickling stack. */
420 typedef struct {
421 PyObject_VAR_HEAD
422 PyObject **data;
423 int mark_set; /* is MARK set? */
424 Py_ssize_t fence; /* position of top MARK or 0 */
425 Py_ssize_t allocated; /* number of slots in data allocated */
426 } Pdata;
427
428 static void
Pdata_dealloc(Pdata * self)429 Pdata_dealloc(Pdata *self)
430 {
431 Py_ssize_t i = Py_SIZE(self);
432 while (--i >= 0) {
433 Py_DECREF(self->data[i]);
434 }
435 PyMem_FREE(self->data);
436 PyObject_Del(self);
437 }
438
439 static PyTypeObject Pdata_Type = {
440 PyVarObject_HEAD_INIT(NULL, 0)
441 "_pickle.Pdata", /*tp_name*/
442 sizeof(Pdata), /*tp_basicsize*/
443 sizeof(PyObject *), /*tp_itemsize*/
444 (destructor)Pdata_dealloc, /*tp_dealloc*/
445 };
446
447 static PyObject *
Pdata_New(void)448 Pdata_New(void)
449 {
450 Pdata *self;
451
452 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
453 return NULL;
454 Py_SIZE(self) = 0;
455 self->mark_set = 0;
456 self->fence = 0;
457 self->allocated = 8;
458 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
459 if (self->data)
460 return (PyObject *)self;
461 Py_DECREF(self);
462 return PyErr_NoMemory();
463 }
464
465
466 /* Retain only the initial clearto items. If clearto >= the current
467 * number of items, this is a (non-erroneous) NOP.
468 */
469 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)470 Pdata_clear(Pdata *self, Py_ssize_t clearto)
471 {
472 Py_ssize_t i = Py_SIZE(self);
473
474 assert(clearto >= self->fence);
475 if (clearto >= i)
476 return 0;
477
478 while (--i >= clearto) {
479 Py_CLEAR(self->data[i]);
480 }
481 Py_SIZE(self) = clearto;
482 return 0;
483 }
484
485 static int
Pdata_grow(Pdata * self)486 Pdata_grow(Pdata *self)
487 {
488 PyObject **data = self->data;
489 size_t allocated = (size_t)self->allocated;
490 size_t new_allocated;
491
492 new_allocated = (allocated >> 3) + 6;
493 /* check for integer overflow */
494 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
495 goto nomemory;
496 new_allocated += allocated;
497 PyMem_RESIZE(data, PyObject *, new_allocated);
498 if (data == NULL)
499 goto nomemory;
500
501 self->data = data;
502 self->allocated = (Py_ssize_t)new_allocated;
503 return 0;
504
505 nomemory:
506 PyErr_NoMemory();
507 return -1;
508 }
509
510 static int
Pdata_stack_underflow(Pdata * self)511 Pdata_stack_underflow(Pdata *self)
512 {
513 PickleState *st = _Pickle_GetGlobalState();
514 PyErr_SetString(st->UnpicklingError,
515 self->mark_set ?
516 "unexpected MARK found" :
517 "unpickling stack underflow");
518 return -1;
519 }
520
521 /* D is a Pdata*. Pop the topmost element and store it into V, which
522 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
523 * is raised and V is set to NULL.
524 */
525 static PyObject *
Pdata_pop(Pdata * self)526 Pdata_pop(Pdata *self)
527 {
528 if (Py_SIZE(self) <= self->fence) {
529 Pdata_stack_underflow(self);
530 return NULL;
531 }
532 return self->data[--Py_SIZE(self)];
533 }
534 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
535
536 static int
Pdata_push(Pdata * self,PyObject * obj)537 Pdata_push(Pdata *self, PyObject *obj)
538 {
539 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
540 return -1;
541 }
542 self->data[Py_SIZE(self)++] = obj;
543 return 0;
544 }
545
546 /* Push an object on stack, transferring its ownership to the stack. */
547 #define PDATA_PUSH(D, O, ER) do { \
548 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
549
550 /* Push an object on stack, adding a new reference to the object. */
551 #define PDATA_APPEND(D, O, ER) do { \
552 Py_INCREF((O)); \
553 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
554
555 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)556 Pdata_poptuple(Pdata *self, Py_ssize_t start)
557 {
558 PyObject *tuple;
559 Py_ssize_t len, i, j;
560
561 if (start < self->fence) {
562 Pdata_stack_underflow(self);
563 return NULL;
564 }
565 len = Py_SIZE(self) - start;
566 tuple = PyTuple_New(len);
567 if (tuple == NULL)
568 return NULL;
569 for (i = start, j = 0; j < len; i++, j++)
570 PyTuple_SET_ITEM(tuple, j, self->data[i]);
571
572 Py_SIZE(self) = start;
573 return tuple;
574 }
575
576 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)577 Pdata_poplist(Pdata *self, Py_ssize_t start)
578 {
579 PyObject *list;
580 Py_ssize_t len, i, j;
581
582 len = Py_SIZE(self) - start;
583 list = PyList_New(len);
584 if (list == NULL)
585 return NULL;
586 for (i = start, j = 0; j < len; i++, j++)
587 PyList_SET_ITEM(list, j, self->data[i]);
588
589 Py_SIZE(self) = start;
590 return list;
591 }
592
593 typedef struct {
594 PyObject *me_key;
595 Py_ssize_t me_value;
596 } PyMemoEntry;
597
598 typedef struct {
599 size_t mt_mask;
600 size_t mt_used;
601 size_t mt_allocated;
602 PyMemoEntry *mt_table;
603 } PyMemoTable;
604
605 typedef struct PicklerObject {
606 PyObject_HEAD
607 PyMemoTable *memo; /* Memo table, keep track of the seen
608 objects to support self-referential objects
609 pickling. */
610 PyObject *pers_func; /* persistent_id() method, can be NULL */
611 PyObject *pers_func_self; /* borrowed reference to self if pers_func
612 is an unbound method, NULL otherwise */
613 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
614
615 PyObject *write; /* write() method of the output stream. */
616 PyObject *output_buffer; /* Write into a local bytearray buffer before
617 flushing to the stream. */
618 Py_ssize_t output_len; /* Length of output_buffer. */
619 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
620 int proto; /* Pickle protocol number, >= 0 */
621 int bin; /* Boolean, true if proto > 0 */
622 int framing; /* True when framing is enabled, proto >= 4 */
623 Py_ssize_t frame_start; /* Position in output_buffer where the
624 current frame begins. -1 if there
625 is no frame currently open. */
626
627 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
628 int fast; /* Enable fast mode if set to a true value.
629 The fast mode disable the usage of memo,
630 therefore speeding the pickling process by
631 not generating superfluous PUT opcodes. It
632 should not be used if with self-referential
633 objects. */
634 int fast_nesting;
635 int fix_imports; /* Indicate whether Pickler should fix
636 the name of globals for Python 2.x. */
637 PyObject *fast_memo;
638 } PicklerObject;
639
640 typedef struct UnpicklerObject {
641 PyObject_HEAD
642 Pdata *stack; /* Pickle data stack, store unpickled objects. */
643
644 /* The unpickler memo is just an array of PyObject *s. Using a dict
645 is unnecessary, since the keys are contiguous ints. */
646 PyObject **memo;
647 size_t memo_size; /* Capacity of the memo array */
648 size_t memo_len; /* Number of objects in the memo */
649
650 PyObject *pers_func; /* persistent_load() method, can be NULL. */
651 PyObject *pers_func_self; /* borrowed reference to self if pers_func
652 is an unbound method, NULL otherwise */
653
654 Py_buffer buffer;
655 char *input_buffer;
656 char *input_line;
657 Py_ssize_t input_len;
658 Py_ssize_t next_read_idx;
659 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
660
661 PyObject *read; /* read() method of the input stream. */
662 PyObject *readline; /* readline() method of the input stream. */
663 PyObject *peek; /* peek() method of the input stream, or NULL */
664
665 char *encoding; /* Name of the encoding to be used for
666 decoding strings pickled using Python
667 2.x. The default value is "ASCII" */
668 char *errors; /* Name of errors handling scheme to used when
669 decoding strings. The default value is
670 "strict". */
671 Py_ssize_t *marks; /* Mark stack, used for unpickling container
672 objects. */
673 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
674 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
675 int proto; /* Protocol of the pickle loaded. */
676 int fix_imports; /* Indicate whether Unpickler should fix
677 the name of globals pickled by Python 2.x. */
678 } UnpicklerObject;
679
680 typedef struct {
681 PyObject_HEAD
682 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
683 } PicklerMemoProxyObject;
684
685 typedef struct {
686 PyObject_HEAD
687 UnpicklerObject *unpickler;
688 } UnpicklerMemoProxyObject;
689
690 /* Forward declarations */
691 static int save(PicklerObject *, PyObject *, int);
692 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
693 static PyTypeObject Pickler_Type;
694 static PyTypeObject Unpickler_Type;
695
696 #include "clinic/_pickle.c.h"
697
698 /*************************************************************************
699 A custom hashtable mapping void* to Python ints. This is used by the pickler
700 for memoization. Using a custom hashtable rather than PyDict allows us to skip
701 a bunch of unnecessary object creation. This makes a huge performance
702 difference. */
703
704 #define MT_MINSIZE 8
705 #define PERTURB_SHIFT 5
706
707
708 static PyMemoTable *
PyMemoTable_New(void)709 PyMemoTable_New(void)
710 {
711 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
712 if (memo == NULL) {
713 PyErr_NoMemory();
714 return NULL;
715 }
716
717 memo->mt_used = 0;
718 memo->mt_allocated = MT_MINSIZE;
719 memo->mt_mask = MT_MINSIZE - 1;
720 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
721 if (memo->mt_table == NULL) {
722 PyMem_FREE(memo);
723 PyErr_NoMemory();
724 return NULL;
725 }
726 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
727
728 return memo;
729 }
730
731 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)732 PyMemoTable_Copy(PyMemoTable *self)
733 {
734 PyMemoTable *new = PyMemoTable_New();
735 if (new == NULL)
736 return NULL;
737
738 new->mt_used = self->mt_used;
739 new->mt_allocated = self->mt_allocated;
740 new->mt_mask = self->mt_mask;
741 /* The table we get from _New() is probably smaller than we wanted.
742 Free it and allocate one that's the right size. */
743 PyMem_FREE(new->mt_table);
744 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
745 if (new->mt_table == NULL) {
746 PyMem_FREE(new);
747 PyErr_NoMemory();
748 return NULL;
749 }
750 for (size_t i = 0; i < self->mt_allocated; i++) {
751 Py_XINCREF(self->mt_table[i].me_key);
752 }
753 memcpy(new->mt_table, self->mt_table,
754 sizeof(PyMemoEntry) * self->mt_allocated);
755
756 return new;
757 }
758
759 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)760 PyMemoTable_Size(PyMemoTable *self)
761 {
762 return self->mt_used;
763 }
764
765 static int
PyMemoTable_Clear(PyMemoTable * self)766 PyMemoTable_Clear(PyMemoTable *self)
767 {
768 Py_ssize_t i = self->mt_allocated;
769
770 while (--i >= 0) {
771 Py_XDECREF(self->mt_table[i].me_key);
772 }
773 self->mt_used = 0;
774 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
775 return 0;
776 }
777
778 static void
PyMemoTable_Del(PyMemoTable * self)779 PyMemoTable_Del(PyMemoTable *self)
780 {
781 if (self == NULL)
782 return;
783 PyMemoTable_Clear(self);
784
785 PyMem_FREE(self->mt_table);
786 PyMem_FREE(self);
787 }
788
789 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
790 can be considerably simpler than dictobject.c's lookdict(). */
791 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)792 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
793 {
794 size_t i;
795 size_t perturb;
796 size_t mask = self->mt_mask;
797 PyMemoEntry *table = self->mt_table;
798 PyMemoEntry *entry;
799 Py_hash_t hash = (Py_hash_t)key >> 3;
800
801 i = hash & mask;
802 entry = &table[i];
803 if (entry->me_key == NULL || entry->me_key == key)
804 return entry;
805
806 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
807 i = (i << 2) + i + perturb + 1;
808 entry = &table[i & mask];
809 if (entry->me_key == NULL || entry->me_key == key)
810 return entry;
811 }
812 Py_UNREACHABLE();
813 }
814
815 /* Returns -1 on failure, 0 on success. */
816 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)817 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
818 {
819 PyMemoEntry *oldtable = NULL;
820 PyMemoEntry *oldentry, *newentry;
821 size_t new_size = MT_MINSIZE;
822 size_t to_process;
823
824 assert(min_size > 0);
825
826 if (min_size > PY_SSIZE_T_MAX) {
827 PyErr_NoMemory();
828 return -1;
829 }
830
831 /* Find the smallest valid table size >= min_size. */
832 while (new_size < min_size) {
833 new_size <<= 1;
834 }
835 /* new_size needs to be a power of two. */
836 assert((new_size & (new_size - 1)) == 0);
837
838 /* Allocate new table. */
839 oldtable = self->mt_table;
840 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
841 if (self->mt_table == NULL) {
842 self->mt_table = oldtable;
843 PyErr_NoMemory();
844 return -1;
845 }
846 self->mt_allocated = new_size;
847 self->mt_mask = new_size - 1;
848 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
849
850 /* Copy entries from the old table. */
851 to_process = self->mt_used;
852 for (oldentry = oldtable; to_process > 0; oldentry++) {
853 if (oldentry->me_key != NULL) {
854 to_process--;
855 /* newentry is a pointer to a chunk of the new
856 mt_table, so we're setting the key:value pair
857 in-place. */
858 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
859 newentry->me_key = oldentry->me_key;
860 newentry->me_value = oldentry->me_value;
861 }
862 }
863
864 /* Deallocate the old table. */
865 PyMem_FREE(oldtable);
866 return 0;
867 }
868
869 /* Returns NULL on failure, a pointer to the value otherwise. */
870 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)871 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
872 {
873 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
874 if (entry->me_key == NULL)
875 return NULL;
876 return &entry->me_value;
877 }
878
879 /* Returns -1 on failure, 0 on success. */
880 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)881 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
882 {
883 PyMemoEntry *entry;
884
885 assert(key != NULL);
886
887 entry = _PyMemoTable_Lookup(self, key);
888 if (entry->me_key != NULL) {
889 entry->me_value = value;
890 return 0;
891 }
892 Py_INCREF(key);
893 entry->me_key = key;
894 entry->me_value = value;
895 self->mt_used++;
896
897 /* If we added a key, we can safely resize. Otherwise just return!
898 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
899 *
900 * Quadrupling the size improves average table sparseness
901 * (reducing collisions) at the cost of some memory. It also halves
902 * the number of expensive resize operations in a growing memo table.
903 *
904 * Very large memo tables (over 50K items) use doubling instead.
905 * This may help applications with severe memory constraints.
906 */
907 if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
908 return 0;
909 }
910 // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
911 size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
912 return _PyMemoTable_ResizeTable(self, desired_size);
913 }
914
915 #undef MT_MINSIZE
916 #undef PERTURB_SHIFT
917
918 /*************************************************************************/
919
920
921 static int
_Pickler_ClearBuffer(PicklerObject * self)922 _Pickler_ClearBuffer(PicklerObject *self)
923 {
924 Py_XSETREF(self->output_buffer,
925 PyBytes_FromStringAndSize(NULL, self->max_output_len));
926 if (self->output_buffer == NULL)
927 return -1;
928 self->output_len = 0;
929 self->frame_start = -1;
930 return 0;
931 }
932
933 static void
_write_size64(char * out,size_t value)934 _write_size64(char *out, size_t value)
935 {
936 size_t i;
937
938 Py_BUILD_ASSERT(sizeof(size_t) <= 8);
939
940 for (i = 0; i < sizeof(size_t); i++) {
941 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
942 }
943 for (i = sizeof(size_t); i < 8; i++) {
944 out[i] = 0;
945 }
946 }
947
948 static int
_Pickler_CommitFrame(PicklerObject * self)949 _Pickler_CommitFrame(PicklerObject *self)
950 {
951 size_t frame_len;
952 char *qdata;
953
954 if (!self->framing || self->frame_start == -1)
955 return 0;
956 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
957 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
958 if (frame_len >= FRAME_SIZE_MIN) {
959 qdata[0] = FRAME;
960 _write_size64(qdata + 1, frame_len);
961 }
962 else {
963 memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
964 self->output_len -= FRAME_HEADER_SIZE;
965 }
966 self->frame_start = -1;
967 return 0;
968 }
969
970 static PyObject *
_Pickler_GetString(PicklerObject * self)971 _Pickler_GetString(PicklerObject *self)
972 {
973 PyObject *output_buffer = self->output_buffer;
974
975 assert(self->output_buffer != NULL);
976
977 if (_Pickler_CommitFrame(self))
978 return NULL;
979
980 self->output_buffer = NULL;
981 /* Resize down to exact size */
982 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
983 return NULL;
984 return output_buffer;
985 }
986
987 static int
_Pickler_FlushToFile(PicklerObject * self)988 _Pickler_FlushToFile(PicklerObject *self)
989 {
990 PyObject *output, *result;
991
992 assert(self->write != NULL);
993
994 /* This will commit the frame first */
995 output = _Pickler_GetString(self);
996 if (output == NULL)
997 return -1;
998
999 result = _Pickle_FastCall(self->write, output);
1000 Py_XDECREF(result);
1001 return (result == NULL) ? -1 : 0;
1002 }
1003
1004 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1005 _Pickler_OpcodeBoundary(PicklerObject *self)
1006 {
1007 Py_ssize_t frame_len;
1008
1009 if (!self->framing || self->frame_start == -1) {
1010 return 0;
1011 }
1012 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1013 if (frame_len >= FRAME_SIZE_TARGET) {
1014 if(_Pickler_CommitFrame(self)) {
1015 return -1;
1016 }
1017 /* Flush the content of the committed frame to the underlying
1018 * file and reuse the pickler buffer for the next frame so as
1019 * to limit memory usage when dumping large complex objects to
1020 * a file.
1021 *
1022 * self->write is NULL when called via dumps.
1023 */
1024 if (self->write != NULL) {
1025 if (_Pickler_FlushToFile(self) < 0) {
1026 return -1;
1027 }
1028 if (_Pickler_ClearBuffer(self) < 0) {
1029 return -1;
1030 }
1031 }
1032 }
1033 return 0;
1034 }
1035
1036 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1037 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1038 {
1039 Py_ssize_t i, n, required;
1040 char *buffer;
1041 int need_new_frame;
1042
1043 assert(s != NULL);
1044 need_new_frame = (self->framing && self->frame_start == -1);
1045
1046 if (need_new_frame)
1047 n = data_len + FRAME_HEADER_SIZE;
1048 else
1049 n = data_len;
1050
1051 required = self->output_len + n;
1052 if (required > self->max_output_len) {
1053 /* Make place in buffer for the pickle chunk */
1054 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1055 PyErr_NoMemory();
1056 return -1;
1057 }
1058 self->max_output_len = (self->output_len + n) / 2 * 3;
1059 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1060 return -1;
1061 }
1062 buffer = PyBytes_AS_STRING(self->output_buffer);
1063 if (need_new_frame) {
1064 /* Setup new frame */
1065 Py_ssize_t frame_start = self->output_len;
1066 self->frame_start = frame_start;
1067 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1068 /* Write an invalid value, for debugging */
1069 buffer[frame_start + i] = 0xFE;
1070 }
1071 self->output_len += FRAME_HEADER_SIZE;
1072 }
1073 if (data_len < 8) {
1074 /* This is faster than memcpy when the string is short. */
1075 for (i = 0; i < data_len; i++) {
1076 buffer[self->output_len + i] = s[i];
1077 }
1078 }
1079 else {
1080 memcpy(buffer + self->output_len, s, data_len);
1081 }
1082 self->output_len += data_len;
1083 return data_len;
1084 }
1085
1086 static PicklerObject *
_Pickler_New(void)1087 _Pickler_New(void)
1088 {
1089 PicklerObject *self;
1090
1091 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1092 if (self == NULL)
1093 return NULL;
1094
1095 self->pers_func = NULL;
1096 self->dispatch_table = NULL;
1097 self->write = NULL;
1098 self->proto = 0;
1099 self->bin = 0;
1100 self->framing = 0;
1101 self->frame_start = -1;
1102 self->fast = 0;
1103 self->fast_nesting = 0;
1104 self->fix_imports = 0;
1105 self->fast_memo = NULL;
1106 self->max_output_len = WRITE_BUF_SIZE;
1107 self->output_len = 0;
1108
1109 self->memo = PyMemoTable_New();
1110 self->output_buffer = PyBytes_FromStringAndSize(NULL,
1111 self->max_output_len);
1112
1113 if (self->memo == NULL || self->output_buffer == NULL) {
1114 Py_DECREF(self);
1115 return NULL;
1116 }
1117
1118 PyObject_GC_Track(self);
1119 return self;
1120 }
1121
1122 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1123 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1124 {
1125 long proto;
1126
1127 if (protocol == NULL || protocol == Py_None) {
1128 proto = DEFAULT_PROTOCOL;
1129 }
1130 else {
1131 proto = PyLong_AsLong(protocol);
1132 if (proto < 0) {
1133 if (proto == -1 && PyErr_Occurred())
1134 return -1;
1135 proto = HIGHEST_PROTOCOL;
1136 }
1137 else if (proto > HIGHEST_PROTOCOL) {
1138 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1139 HIGHEST_PROTOCOL);
1140 return -1;
1141 }
1142 }
1143 self->proto = (int)proto;
1144 self->bin = proto > 0;
1145 self->fix_imports = fix_imports && proto < 3;
1146 return 0;
1147 }
1148
1149 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1150 be called once on a freshly created Pickler. */
1151 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1152 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1153 {
1154 _Py_IDENTIFIER(write);
1155 assert(file != NULL);
1156 if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1157 return -1;
1158 }
1159 if (self->write == NULL) {
1160 PyErr_SetString(PyExc_TypeError,
1161 "file must have a 'write' attribute");
1162 return -1;
1163 }
1164
1165 return 0;
1166 }
1167
1168 /* Returns the size of the input on success, -1 on failure. This takes its
1169 own reference to `input`. */
1170 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1171 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1172 {
1173 if (self->buffer.buf != NULL)
1174 PyBuffer_Release(&self->buffer);
1175 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1176 return -1;
1177 self->input_buffer = self->buffer.buf;
1178 self->input_len = self->buffer.len;
1179 self->next_read_idx = 0;
1180 self->prefetched_idx = self->input_len;
1181 return self->input_len;
1182 }
1183
1184 static int
bad_readline(void)1185 bad_readline(void)
1186 {
1187 PickleState *st = _Pickle_GetGlobalState();
1188 PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1189 return -1;
1190 }
1191
1192 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1193 _Unpickler_SkipConsumed(UnpicklerObject *self)
1194 {
1195 Py_ssize_t consumed;
1196 PyObject *r;
1197
1198 consumed = self->next_read_idx - self->prefetched_idx;
1199 if (consumed <= 0)
1200 return 0;
1201
1202 assert(self->peek); /* otherwise we did something wrong */
1203 /* This makes a useless copy... */
1204 r = PyObject_CallFunction(self->read, "n", consumed);
1205 if (r == NULL)
1206 return -1;
1207 Py_DECREF(r);
1208
1209 self->prefetched_idx = self->next_read_idx;
1210 return 0;
1211 }
1212
1213 static const Py_ssize_t READ_WHOLE_LINE = -1;
1214
1215 /* If reading from a file, we need to only pull the bytes we need, since there
1216 may be multiple pickle objects arranged contiguously in the same input
1217 buffer.
1218
1219 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1220 bytes from the input stream/buffer.
1221
1222 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1223 failure; on success, returns the number of bytes read from the file.
1224
1225 On success, self->input_len will be 0; this is intentional so that when
1226 unpickling from a file, the "we've run out of data" code paths will trigger,
1227 causing the Unpickler to go back to the file for more data. Use the returned
1228 size to tell you how much data you can process. */
1229 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1230 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1231 {
1232 PyObject *data;
1233 Py_ssize_t read_size;
1234
1235 assert(self->read != NULL);
1236
1237 if (_Unpickler_SkipConsumed(self) < 0)
1238 return -1;
1239
1240 if (n == READ_WHOLE_LINE) {
1241 data = _PyObject_CallNoArg(self->readline);
1242 }
1243 else {
1244 PyObject *len;
1245 /* Prefetch some data without advancing the file pointer, if possible */
1246 if (self->peek && n < PREFETCH) {
1247 len = PyLong_FromSsize_t(PREFETCH);
1248 if (len == NULL)
1249 return -1;
1250 data = _Pickle_FastCall(self->peek, len);
1251 if (data == NULL) {
1252 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1253 return -1;
1254 /* peek() is probably not supported by the given file object */
1255 PyErr_Clear();
1256 Py_CLEAR(self->peek);
1257 }
1258 else {
1259 read_size = _Unpickler_SetStringInput(self, data);
1260 Py_DECREF(data);
1261 self->prefetched_idx = 0;
1262 if (n <= read_size)
1263 return n;
1264 }
1265 }
1266 len = PyLong_FromSsize_t(n);
1267 if (len == NULL)
1268 return -1;
1269 data = _Pickle_FastCall(self->read, len);
1270 }
1271 if (data == NULL)
1272 return -1;
1273
1274 read_size = _Unpickler_SetStringInput(self, data);
1275 Py_DECREF(data);
1276 return read_size;
1277 }
1278
1279 /* Don't call it directly: use _Unpickler_Read() */
1280 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1281 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1282 {
1283 Py_ssize_t num_read;
1284
1285 *s = NULL;
1286 if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1287 PickleState *st = _Pickle_GetGlobalState();
1288 PyErr_SetString(st->UnpicklingError,
1289 "read would overflow (invalid bytecode)");
1290 return -1;
1291 }
1292
1293 /* This case is handled by the _Unpickler_Read() macro for efficiency */
1294 assert(self->next_read_idx + n > self->input_len);
1295
1296 if (!self->read)
1297 return bad_readline();
1298
1299 num_read = _Unpickler_ReadFromFile(self, n);
1300 if (num_read < 0)
1301 return -1;
1302 if (num_read < n)
1303 return bad_readline();
1304 *s = self->input_buffer;
1305 self->next_read_idx = n;
1306 return n;
1307 }
1308
1309 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1310
1311 This should be used for all data reads, rather than accessing the unpickler's
1312 input buffer directly. This method deals correctly with reading from input
1313 streams, which the input buffer doesn't deal with.
1314
1315 Note that when reading from a file-like object, self->next_read_idx won't
1316 be updated (it should remain at 0 for the entire unpickling process). You
1317 should use this function's return value to know how many bytes you can
1318 consume.
1319
1320 Returns -1 (with an exception set) on failure. On success, return the
1321 number of chars read. */
1322 #define _Unpickler_Read(self, s, n) \
1323 (((n) <= (self)->input_len - (self)->next_read_idx) \
1324 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1325 (self)->next_read_idx += (n), \
1326 (n)) \
1327 : _Unpickler_ReadImpl(self, (s), (n)))
1328
1329 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1330 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1331 char **result)
1332 {
1333 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1334 if (input_line == NULL) {
1335 PyErr_NoMemory();
1336 return -1;
1337 }
1338
1339 memcpy(input_line, line, len);
1340 input_line[len] = '\0';
1341 self->input_line = input_line;
1342 *result = self->input_line;
1343 return len;
1344 }
1345
1346 /* Read a line from the input stream/buffer. If we run off the end of the input
1347 before hitting \n, raise an error.
1348
1349 Returns the number of chars read, or -1 on failure. */
1350 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1351 _Unpickler_Readline(UnpicklerObject *self, char **result)
1352 {
1353 Py_ssize_t i, num_read;
1354
1355 for (i = self->next_read_idx; i < self->input_len; i++) {
1356 if (self->input_buffer[i] == '\n') {
1357 char *line_start = self->input_buffer + self->next_read_idx;
1358 num_read = i - self->next_read_idx + 1;
1359 self->next_read_idx = i + 1;
1360 return _Unpickler_CopyLine(self, line_start, num_read, result);
1361 }
1362 }
1363 if (!self->read)
1364 return bad_readline();
1365
1366 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1367 if (num_read < 0)
1368 return -1;
1369 if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1370 return bad_readline();
1371 self->next_read_idx = num_read;
1372 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1373 }
1374
1375 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1376 will be modified in place. */
1377 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1378 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1379 {
1380 size_t i;
1381
1382 assert(new_size > self->memo_size);
1383
1384 PyObject **memo_new = self->memo;
1385 PyMem_RESIZE(memo_new, PyObject *, new_size);
1386 if (memo_new == NULL) {
1387 PyErr_NoMemory();
1388 return -1;
1389 }
1390 self->memo = memo_new;
1391 for (i = self->memo_size; i < new_size; i++)
1392 self->memo[i] = NULL;
1393 self->memo_size = new_size;
1394 return 0;
1395 }
1396
1397 /* Returns NULL if idx is out of bounds. */
1398 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1399 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1400 {
1401 if (idx >= self->memo_size)
1402 return NULL;
1403
1404 return self->memo[idx];
1405 }
1406
1407 /* Returns -1 (with an exception set) on failure, 0 on success.
1408 This takes its own reference to `value`. */
1409 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1410 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1411 {
1412 PyObject *old_item;
1413
1414 if (idx >= self->memo_size) {
1415 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1416 return -1;
1417 assert(idx < self->memo_size);
1418 }
1419 Py_INCREF(value);
1420 old_item = self->memo[idx];
1421 self->memo[idx] = value;
1422 if (old_item != NULL) {
1423 Py_DECREF(old_item);
1424 }
1425 else {
1426 self->memo_len++;
1427 }
1428 return 0;
1429 }
1430
1431 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1432 _Unpickler_NewMemo(Py_ssize_t new_size)
1433 {
1434 PyObject **memo = PyMem_NEW(PyObject *, new_size);
1435 if (memo == NULL) {
1436 PyErr_NoMemory();
1437 return NULL;
1438 }
1439 memset(memo, 0, new_size * sizeof(PyObject *));
1440 return memo;
1441 }
1442
1443 /* Free the unpickler's memo, taking care to decref any items left in it. */
1444 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1445 _Unpickler_MemoCleanup(UnpicklerObject *self)
1446 {
1447 Py_ssize_t i;
1448 PyObject **memo = self->memo;
1449
1450 if (self->memo == NULL)
1451 return;
1452 self->memo = NULL;
1453 i = self->memo_size;
1454 while (--i >= 0) {
1455 Py_XDECREF(memo[i]);
1456 }
1457 PyMem_FREE(memo);
1458 }
1459
1460 static UnpicklerObject *
_Unpickler_New(void)1461 _Unpickler_New(void)
1462 {
1463 UnpicklerObject *self;
1464
1465 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1466 if (self == NULL)
1467 return NULL;
1468
1469 self->pers_func = NULL;
1470 self->input_buffer = NULL;
1471 self->input_line = NULL;
1472 self->input_len = 0;
1473 self->next_read_idx = 0;
1474 self->prefetched_idx = 0;
1475 self->read = NULL;
1476 self->readline = NULL;
1477 self->peek = NULL;
1478 self->encoding = NULL;
1479 self->errors = NULL;
1480 self->marks = NULL;
1481 self->num_marks = 0;
1482 self->marks_size = 0;
1483 self->proto = 0;
1484 self->fix_imports = 0;
1485 memset(&self->buffer, 0, sizeof(Py_buffer));
1486 self->memo_size = 32;
1487 self->memo_len = 0;
1488 self->memo = _Unpickler_NewMemo(self->memo_size);
1489 self->stack = (Pdata *)Pdata_New();
1490
1491 if (self->memo == NULL || self->stack == NULL) {
1492 Py_DECREF(self);
1493 return NULL;
1494 }
1495
1496 PyObject_GC_Track(self);
1497 return self;
1498 }
1499
1500 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1501 be called once on a freshly created Pickler. */
1502 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1503 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1504 {
1505 _Py_IDENTIFIER(peek);
1506 _Py_IDENTIFIER(read);
1507 _Py_IDENTIFIER(readline);
1508
1509 if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1510 return -1;
1511 }
1512 (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1513 (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
1514 if (self->readline == NULL || self->read == NULL) {
1515 if (!PyErr_Occurred()) {
1516 PyErr_SetString(PyExc_TypeError,
1517 "file must have 'read' and 'readline' attributes");
1518 }
1519 Py_CLEAR(self->read);
1520 Py_CLEAR(self->readline);
1521 Py_CLEAR(self->peek);
1522 return -1;
1523 }
1524 return 0;
1525 }
1526
1527 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1528 be called once on a freshly created Pickler. */
1529 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1530 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1531 const char *encoding,
1532 const char *errors)
1533 {
1534 if (encoding == NULL)
1535 encoding = "ASCII";
1536 if (errors == NULL)
1537 errors = "strict";
1538
1539 self->encoding = _PyMem_Strdup(encoding);
1540 self->errors = _PyMem_Strdup(errors);
1541 if (self->encoding == NULL || self->errors == NULL) {
1542 PyErr_NoMemory();
1543 return -1;
1544 }
1545 return 0;
1546 }
1547
1548 /* Generate a GET opcode for an object stored in the memo. */
1549 static int
memo_get(PicklerObject * self,PyObject * key)1550 memo_get(PicklerObject *self, PyObject *key)
1551 {
1552 Py_ssize_t *value;
1553 char pdata[30];
1554 Py_ssize_t len;
1555
1556 value = PyMemoTable_Get(self->memo, key);
1557 if (value == NULL) {
1558 PyErr_SetObject(PyExc_KeyError, key);
1559 return -1;
1560 }
1561
1562 if (!self->bin) {
1563 pdata[0] = GET;
1564 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1565 "%" PY_FORMAT_SIZE_T "d\n", *value);
1566 len = strlen(pdata);
1567 }
1568 else {
1569 if (*value < 256) {
1570 pdata[0] = BINGET;
1571 pdata[1] = (unsigned char)(*value & 0xff);
1572 len = 2;
1573 }
1574 else if ((size_t)*value <= 0xffffffffUL) {
1575 pdata[0] = LONG_BINGET;
1576 pdata[1] = (unsigned char)(*value & 0xff);
1577 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1578 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1579 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1580 len = 5;
1581 }
1582 else { /* unlikely */
1583 PickleState *st = _Pickle_GetGlobalState();
1584 PyErr_SetString(st->PicklingError,
1585 "memo id too large for LONG_BINGET");
1586 return -1;
1587 }
1588 }
1589
1590 if (_Pickler_Write(self, pdata, len) < 0)
1591 return -1;
1592
1593 return 0;
1594 }
1595
1596 /* Store an object in the memo, assign it a new unique ID based on the number
1597 of objects currently stored in the memo and generate a PUT opcode. */
1598 static int
memo_put(PicklerObject * self,PyObject * obj)1599 memo_put(PicklerObject *self, PyObject *obj)
1600 {
1601 char pdata[30];
1602 Py_ssize_t len;
1603 Py_ssize_t idx;
1604
1605 const char memoize_op = MEMOIZE;
1606
1607 if (self->fast)
1608 return 0;
1609
1610 idx = PyMemoTable_Size(self->memo);
1611 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1612 return -1;
1613
1614 if (self->proto >= 4) {
1615 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1616 return -1;
1617 return 0;
1618 }
1619 else if (!self->bin) {
1620 pdata[0] = PUT;
1621 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1622 "%" PY_FORMAT_SIZE_T "d\n", idx);
1623 len = strlen(pdata);
1624 }
1625 else {
1626 if (idx < 256) {
1627 pdata[0] = BINPUT;
1628 pdata[1] = (unsigned char)idx;
1629 len = 2;
1630 }
1631 else if ((size_t)idx <= 0xffffffffUL) {
1632 pdata[0] = LONG_BINPUT;
1633 pdata[1] = (unsigned char)(idx & 0xff);
1634 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1635 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1636 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1637 len = 5;
1638 }
1639 else { /* unlikely */
1640 PickleState *st = _Pickle_GetGlobalState();
1641 PyErr_SetString(st->PicklingError,
1642 "memo id too large for LONG_BINPUT");
1643 return -1;
1644 }
1645 }
1646 if (_Pickler_Write(self, pdata, len) < 0)
1647 return -1;
1648
1649 return 0;
1650 }
1651
1652 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1653 get_dotted_path(PyObject *obj, PyObject *name)
1654 {
1655 _Py_static_string(PyId_dot, ".");
1656 PyObject *dotted_path;
1657 Py_ssize_t i, n;
1658
1659 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1660 if (dotted_path == NULL)
1661 return NULL;
1662 n = PyList_GET_SIZE(dotted_path);
1663 assert(n >= 1);
1664 for (i = 0; i < n; i++) {
1665 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1666 if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1667 if (obj == NULL)
1668 PyErr_Format(PyExc_AttributeError,
1669 "Can't pickle local object %R", name);
1670 else
1671 PyErr_Format(PyExc_AttributeError,
1672 "Can't pickle local attribute %R on %R", name, obj);
1673 Py_DECREF(dotted_path);
1674 return NULL;
1675 }
1676 }
1677 return dotted_path;
1678 }
1679
1680 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1681 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1682 {
1683 Py_ssize_t i, n;
1684 PyObject *parent = NULL;
1685
1686 assert(PyList_CheckExact(names));
1687 Py_INCREF(obj);
1688 n = PyList_GET_SIZE(names);
1689 for (i = 0; i < n; i++) {
1690 PyObject *name = PyList_GET_ITEM(names, i);
1691 Py_XDECREF(parent);
1692 parent = obj;
1693 (void)_PyObject_LookupAttr(parent, name, &obj);
1694 if (obj == NULL) {
1695 Py_DECREF(parent);
1696 return NULL;
1697 }
1698 }
1699 if (pparent != NULL)
1700 *pparent = parent;
1701 else
1702 Py_XDECREF(parent);
1703 return obj;
1704 }
1705
1706
1707 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1708 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1709 {
1710 PyObject *dotted_path, *attr;
1711
1712 if (allow_qualname) {
1713 dotted_path = get_dotted_path(obj, name);
1714 if (dotted_path == NULL)
1715 return NULL;
1716 attr = get_deep_attribute(obj, dotted_path, NULL);
1717 Py_DECREF(dotted_path);
1718 }
1719 else {
1720 (void)_PyObject_LookupAttr(obj, name, &attr);
1721 }
1722 if (attr == NULL && !PyErr_Occurred()) {
1723 PyErr_Format(PyExc_AttributeError,
1724 "Can't get attribute %R on %R", name, obj);
1725 }
1726 return attr;
1727 }
1728
1729 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1730 _checkmodule(PyObject *module_name, PyObject *module,
1731 PyObject *global, PyObject *dotted_path)
1732 {
1733 if (module == Py_None) {
1734 return -1;
1735 }
1736 if (PyUnicode_Check(module_name) &&
1737 _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1738 return -1;
1739 }
1740
1741 PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1742 if (candidate == NULL) {
1743 return -1;
1744 }
1745 if (candidate != global) {
1746 Py_DECREF(candidate);
1747 return -1;
1748 }
1749 Py_DECREF(candidate);
1750 return 0;
1751 }
1752
1753 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1754 whichmodule(PyObject *global, PyObject *dotted_path)
1755 {
1756 PyObject *module_name;
1757 PyObject *module = NULL;
1758 Py_ssize_t i;
1759 PyObject *modules;
1760 _Py_IDENTIFIER(__module__);
1761 _Py_IDENTIFIER(modules);
1762 _Py_IDENTIFIER(__main__);
1763
1764 if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1765 return NULL;
1766 }
1767 if (module_name) {
1768 /* In some rare cases (e.g., bound methods of extension types),
1769 __module__ can be None. If it is so, then search sys.modules for
1770 the module of global. */
1771 if (module_name != Py_None)
1772 return module_name;
1773 Py_CLEAR(module_name);
1774 }
1775 assert(module_name == NULL);
1776
1777 /* Fallback on walking sys.modules */
1778 modules = _PySys_GetObjectId(&PyId_modules);
1779 if (modules == NULL) {
1780 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1781 return NULL;
1782 }
1783 if (PyDict_CheckExact(modules)) {
1784 i = 0;
1785 while (PyDict_Next(modules, &i, &module_name, &module)) {
1786 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1787 Py_INCREF(module_name);
1788 return module_name;
1789 }
1790 if (PyErr_Occurred()) {
1791 return NULL;
1792 }
1793 }
1794 }
1795 else {
1796 PyObject *iterator = PyObject_GetIter(modules);
1797 if (iterator == NULL) {
1798 return NULL;
1799 }
1800 while ((module_name = PyIter_Next(iterator))) {
1801 module = PyObject_GetItem(modules, module_name);
1802 if (module == NULL) {
1803 Py_DECREF(module_name);
1804 Py_DECREF(iterator);
1805 return NULL;
1806 }
1807 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1808 Py_DECREF(module);
1809 Py_DECREF(iterator);
1810 return module_name;
1811 }
1812 Py_DECREF(module);
1813 Py_DECREF(module_name);
1814 if (PyErr_Occurred()) {
1815 Py_DECREF(iterator);
1816 return NULL;
1817 }
1818 }
1819 Py_DECREF(iterator);
1820 }
1821
1822 /* If no module is found, use __main__. */
1823 module_name = _PyUnicode_FromId(&PyId___main__);
1824 Py_XINCREF(module_name);
1825 return module_name;
1826 }
1827
1828 /* fast_save_enter() and fast_save_leave() are guards against recursive
1829 objects when Pickler is used with the "fast mode" (i.e., with object
1830 memoization disabled). If the nesting of a list or dict object exceed
1831 FAST_NESTING_LIMIT, these guards will start keeping an internal
1832 reference to the seen list or dict objects and check whether these objects
1833 are recursive. These are not strictly necessary, since save() has a
1834 hard-coded recursion limit, but they give a nicer error message than the
1835 typical RuntimeError. */
1836 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1837 fast_save_enter(PicklerObject *self, PyObject *obj)
1838 {
1839 /* if fast_nesting < 0, we're doing an error exit. */
1840 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1841 PyObject *key = NULL;
1842 if (self->fast_memo == NULL) {
1843 self->fast_memo = PyDict_New();
1844 if (self->fast_memo == NULL) {
1845 self->fast_nesting = -1;
1846 return 0;
1847 }
1848 }
1849 key = PyLong_FromVoidPtr(obj);
1850 if (key == NULL) {
1851 self->fast_nesting = -1;
1852 return 0;
1853 }
1854 if (PyDict_GetItemWithError(self->fast_memo, key)) {
1855 Py_DECREF(key);
1856 PyErr_Format(PyExc_ValueError,
1857 "fast mode: can't pickle cyclic objects "
1858 "including object type %.200s at %p",
1859 obj->ob_type->tp_name, obj);
1860 self->fast_nesting = -1;
1861 return 0;
1862 }
1863 if (PyErr_Occurred()) {
1864 Py_DECREF(key);
1865 self->fast_nesting = -1;
1866 return 0;
1867 }
1868 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1869 Py_DECREF(key);
1870 self->fast_nesting = -1;
1871 return 0;
1872 }
1873 Py_DECREF(key);
1874 }
1875 return 1;
1876 }
1877
1878 static int
fast_save_leave(PicklerObject * self,PyObject * obj)1879 fast_save_leave(PicklerObject *self, PyObject *obj)
1880 {
1881 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1882 PyObject *key = PyLong_FromVoidPtr(obj);
1883 if (key == NULL)
1884 return 0;
1885 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1886 Py_DECREF(key);
1887 return 0;
1888 }
1889 Py_DECREF(key);
1890 }
1891 return 1;
1892 }
1893
1894 static int
save_none(PicklerObject * self,PyObject * obj)1895 save_none(PicklerObject *self, PyObject *obj)
1896 {
1897 const char none_op = NONE;
1898 if (_Pickler_Write(self, &none_op, 1) < 0)
1899 return -1;
1900
1901 return 0;
1902 }
1903
1904 static int
save_bool(PicklerObject * self,PyObject * obj)1905 save_bool(PicklerObject *self, PyObject *obj)
1906 {
1907 if (self->proto >= 2) {
1908 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
1909 if (_Pickler_Write(self, &bool_op, 1) < 0)
1910 return -1;
1911 }
1912 else {
1913 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
1914 * so that unpicklers written before bools were introduced unpickle them
1915 * as ints, but unpicklers after can recognize that bools were intended.
1916 * Note that protocol 2 added direct ways to pickle bools.
1917 */
1918 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
1919 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
1920 return -1;
1921 }
1922 return 0;
1923 }
1924
1925 static int
save_long(PicklerObject * self,PyObject * obj)1926 save_long(PicklerObject *self, PyObject *obj)
1927 {
1928 PyObject *repr = NULL;
1929 Py_ssize_t size;
1930 long val;
1931 int overflow;
1932 int status = 0;
1933
1934 val= PyLong_AsLongAndOverflow(obj, &overflow);
1935 if (!overflow && (sizeof(long) <= 4 ||
1936 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
1937 {
1938 /* result fits in a signed 4-byte integer.
1939
1940 Note: we can't use -0x80000000L in the above condition because some
1941 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
1942 before applying the unary minus when sizeof(long) <= 4. The
1943 resulting value stays unsigned which is commonly not what we want,
1944 so MSVC happily warns us about it. However, that result would have
1945 been fine because we guard for sizeof(long) <= 4 which turns the
1946 condition true in that particular case. */
1947 char pdata[32];
1948 Py_ssize_t len = 0;
1949
1950 if (self->bin) {
1951 pdata[1] = (unsigned char)(val & 0xff);
1952 pdata[2] = (unsigned char)((val >> 8) & 0xff);
1953 pdata[3] = (unsigned char)((val >> 16) & 0xff);
1954 pdata[4] = (unsigned char)((val >> 24) & 0xff);
1955
1956 if ((pdata[4] != 0) || (pdata[3] != 0)) {
1957 pdata[0] = BININT;
1958 len = 5;
1959 }
1960 else if (pdata[2] != 0) {
1961 pdata[0] = BININT2;
1962 len = 3;
1963 }
1964 else {
1965 pdata[0] = BININT1;
1966 len = 2;
1967 }
1968 }
1969 else {
1970 sprintf(pdata, "%c%ld\n", INT, val);
1971 len = strlen(pdata);
1972 }
1973 if (_Pickler_Write(self, pdata, len) < 0)
1974 return -1;
1975
1976 return 0;
1977 }
1978 assert(!PyErr_Occurred());
1979
1980 if (self->proto >= 2) {
1981 /* Linear-time pickling. */
1982 size_t nbits;
1983 size_t nbytes;
1984 unsigned char *pdata;
1985 char header[5];
1986 int i;
1987 int sign = _PyLong_Sign(obj);
1988
1989 if (sign == 0) {
1990 header[0] = LONG1;
1991 header[1] = 0; /* It's 0 -- an empty bytestring. */
1992 if (_Pickler_Write(self, header, 2) < 0)
1993 goto error;
1994 return 0;
1995 }
1996 nbits = _PyLong_NumBits(obj);
1997 if (nbits == (size_t)-1 && PyErr_Occurred())
1998 goto error;
1999 /* How many bytes do we need? There are nbits >> 3 full
2000 * bytes of data, and nbits & 7 leftover bits. If there
2001 * are any leftover bits, then we clearly need another
2002 * byte. What's not so obvious is that we *probably*
2003 * need another byte even if there aren't any leftovers:
2004 * the most-significant bit of the most-significant byte
2005 * acts like a sign bit, and it's usually got a sense
2006 * opposite of the one we need. The exception is ints
2007 * of the form -(2**(8*j-1)) for j > 0. Such an int is
2008 * its own 256's-complement, so has the right sign bit
2009 * even without the extra byte. That's a pain to check
2010 * for in advance, though, so we always grab an extra
2011 * byte at the start, and cut it back later if possible.
2012 */
2013 nbytes = (nbits >> 3) + 1;
2014 if (nbytes > 0x7fffffffL) {
2015 PyErr_SetString(PyExc_OverflowError,
2016 "int too large to pickle");
2017 goto error;
2018 }
2019 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2020 if (repr == NULL)
2021 goto error;
2022 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2023 i = _PyLong_AsByteArray((PyLongObject *)obj,
2024 pdata, nbytes,
2025 1 /* little endian */ , 1 /* signed */ );
2026 if (i < 0)
2027 goto error;
2028 /* If the int is negative, this may be a byte more than
2029 * needed. This is so iff the MSB is all redundant sign
2030 * bits.
2031 */
2032 if (sign < 0 &&
2033 nbytes > 1 &&
2034 pdata[nbytes - 1] == 0xff &&
2035 (pdata[nbytes - 2] & 0x80) != 0) {
2036 nbytes--;
2037 }
2038
2039 if (nbytes < 256) {
2040 header[0] = LONG1;
2041 header[1] = (unsigned char)nbytes;
2042 size = 2;
2043 }
2044 else {
2045 header[0] = LONG4;
2046 size = (Py_ssize_t) nbytes;
2047 for (i = 1; i < 5; i++) {
2048 header[i] = (unsigned char)(size & 0xff);
2049 size >>= 8;
2050 }
2051 size = 5;
2052 }
2053 if (_Pickler_Write(self, header, size) < 0 ||
2054 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2055 goto error;
2056 }
2057 else {
2058 const char long_op = LONG;
2059 const char *string;
2060
2061 /* proto < 2: write the repr and newline. This is quadratic-time (in
2062 the number of digits), in both directions. We add a trailing 'L'
2063 to the repr, for compatibility with Python 2.x. */
2064
2065 repr = PyObject_Repr(obj);
2066 if (repr == NULL)
2067 goto error;
2068
2069 string = PyUnicode_AsUTF8AndSize(repr, &size);
2070 if (string == NULL)
2071 goto error;
2072
2073 if (_Pickler_Write(self, &long_op, 1) < 0 ||
2074 _Pickler_Write(self, string, size) < 0 ||
2075 _Pickler_Write(self, "L\n", 2) < 0)
2076 goto error;
2077 }
2078
2079 if (0) {
2080 error:
2081 status = -1;
2082 }
2083 Py_XDECREF(repr);
2084
2085 return status;
2086 }
2087
2088 static int
save_float(PicklerObject * self,PyObject * obj)2089 save_float(PicklerObject *self, PyObject *obj)
2090 {
2091 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2092
2093 if (self->bin) {
2094 char pdata[9];
2095 pdata[0] = BINFLOAT;
2096 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2097 return -1;
2098 if (_Pickler_Write(self, pdata, 9) < 0)
2099 return -1;
2100 }
2101 else {
2102 int result = -1;
2103 char *buf = NULL;
2104 char op = FLOAT;
2105
2106 if (_Pickler_Write(self, &op, 1) < 0)
2107 goto done;
2108
2109 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2110 if (!buf) {
2111 PyErr_NoMemory();
2112 goto done;
2113 }
2114
2115 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2116 goto done;
2117
2118 if (_Pickler_Write(self, "\n", 1) < 0)
2119 goto done;
2120
2121 result = 0;
2122 done:
2123 PyMem_Free(buf);
2124 return result;
2125 }
2126
2127 return 0;
2128 }
2129
2130 /* Perform direct write of the header and payload of the binary object.
2131
2132 The large contiguous data is written directly into the underlying file
2133 object, bypassing the output_buffer of the Pickler. We intentionally
2134 do not insert a protocol 4 frame opcode to make it possible to optimize
2135 file.read calls in the loader.
2136 */
2137 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2138 _Pickler_write_bytes(PicklerObject *self,
2139 const char *header, Py_ssize_t header_size,
2140 const char *data, Py_ssize_t data_size,
2141 PyObject *payload)
2142 {
2143 int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2144 int framing = self->framing;
2145
2146 if (bypass_buffer) {
2147 assert(self->output_buffer != NULL);
2148 /* Commit the previous frame. */
2149 if (_Pickler_CommitFrame(self)) {
2150 return -1;
2151 }
2152 /* Disable framing temporarily */
2153 self->framing = 0;
2154 }
2155
2156 if (_Pickler_Write(self, header, header_size) < 0) {
2157 return -1;
2158 }
2159
2160 if (bypass_buffer && self->write != NULL) {
2161 /* Bypass the in-memory buffer to directly stream large data
2162 into the underlying file object. */
2163 PyObject *result, *mem = NULL;
2164 /* Dump the output buffer to the file. */
2165 if (_Pickler_FlushToFile(self) < 0) {
2166 return -1;
2167 }
2168
2169 /* Stream write the payload into the file without going through the
2170 output buffer. */
2171 if (payload == NULL) {
2172 /* TODO: It would be better to use a memoryview with a linked
2173 original string if this is possible. */
2174 payload = mem = PyBytes_FromStringAndSize(data, data_size);
2175 if (payload == NULL) {
2176 return -1;
2177 }
2178 }
2179 result = PyObject_CallFunctionObjArgs(self->write, payload, NULL);
2180 Py_XDECREF(mem);
2181 if (result == NULL) {
2182 return -1;
2183 }
2184 Py_DECREF(result);
2185
2186 /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2187 if (_Pickler_ClearBuffer(self) < 0) {
2188 return -1;
2189 }
2190 }
2191 else {
2192 if (_Pickler_Write(self, data, data_size) < 0) {
2193 return -1;
2194 }
2195 }
2196
2197 /* Re-enable framing for subsequent calls to _Pickler_Write. */
2198 self->framing = framing;
2199
2200 return 0;
2201 }
2202
2203 static int
save_bytes(PicklerObject * self,PyObject * obj)2204 save_bytes(PicklerObject *self, PyObject *obj)
2205 {
2206 if (self->proto < 3) {
2207 /* Older pickle protocols do not have an opcode for pickling bytes
2208 objects. Therefore, we need to fake the copy protocol (i.e.,
2209 the __reduce__ method) to permit bytes object unpickling.
2210
2211 Here we use a hack to be compatible with Python 2. Since in Python
2212 2 'bytes' is just an alias for 'str' (which has different
2213 parameters than the actual bytes object), we use codecs.encode
2214 to create the appropriate 'str' object when unpickled using
2215 Python 2 *and* the appropriate 'bytes' object when unpickled
2216 using Python 3. Again this is a hack and we don't need to do this
2217 with newer protocols. */
2218 PyObject *reduce_value = NULL;
2219 int status;
2220
2221 if (PyBytes_GET_SIZE(obj) == 0) {
2222 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2223 }
2224 else {
2225 PickleState *st = _Pickle_GetGlobalState();
2226 PyObject *unicode_str =
2227 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2228 PyBytes_GET_SIZE(obj),
2229 "strict");
2230 _Py_IDENTIFIER(latin1);
2231
2232 if (unicode_str == NULL)
2233 return -1;
2234 reduce_value = Py_BuildValue("(O(OO))",
2235 st->codecs_encode, unicode_str,
2236 _PyUnicode_FromId(&PyId_latin1));
2237 Py_DECREF(unicode_str);
2238 }
2239
2240 if (reduce_value == NULL)
2241 return -1;
2242
2243 /* save_reduce() will memoize the object automatically. */
2244 status = save_reduce(self, reduce_value, obj);
2245 Py_DECREF(reduce_value);
2246 return status;
2247 }
2248 else {
2249 Py_ssize_t size;
2250 char header[9];
2251 Py_ssize_t len;
2252
2253 size = PyBytes_GET_SIZE(obj);
2254 if (size < 0)
2255 return -1;
2256
2257 if (size <= 0xff) {
2258 header[0] = SHORT_BINBYTES;
2259 header[1] = (unsigned char)size;
2260 len = 2;
2261 }
2262 else if ((size_t)size <= 0xffffffffUL) {
2263 header[0] = BINBYTES;
2264 header[1] = (unsigned char)(size & 0xff);
2265 header[2] = (unsigned char)((size >> 8) & 0xff);
2266 header[3] = (unsigned char)((size >> 16) & 0xff);
2267 header[4] = (unsigned char)((size >> 24) & 0xff);
2268 len = 5;
2269 }
2270 else if (self->proto >= 4) {
2271 header[0] = BINBYTES8;
2272 _write_size64(header + 1, size);
2273 len = 9;
2274 }
2275 else {
2276 PyErr_SetString(PyExc_OverflowError,
2277 "cannot serialize a bytes object larger than 4 GiB");
2278 return -1; /* string too large */
2279 }
2280
2281 if (_Pickler_write_bytes(self, header, len,
2282 PyBytes_AS_STRING(obj), size, obj) < 0)
2283 {
2284 return -1;
2285 }
2286
2287 if (memo_put(self, obj) < 0)
2288 return -1;
2289
2290 return 0;
2291 }
2292 }
2293
2294 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2295 backslash and newline characters to \uXXXX escapes. */
2296 static PyObject *
raw_unicode_escape(PyObject * obj)2297 raw_unicode_escape(PyObject *obj)
2298 {
2299 char *p;
2300 Py_ssize_t i, size;
2301 void *data;
2302 unsigned int kind;
2303 _PyBytesWriter writer;
2304
2305 if (PyUnicode_READY(obj))
2306 return NULL;
2307
2308 _PyBytesWriter_Init(&writer);
2309
2310 size = PyUnicode_GET_LENGTH(obj);
2311 data = PyUnicode_DATA(obj);
2312 kind = PyUnicode_KIND(obj);
2313
2314 p = _PyBytesWriter_Alloc(&writer, size);
2315 if (p == NULL)
2316 goto error;
2317 writer.overallocate = 1;
2318
2319 for (i=0; i < size; i++) {
2320 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2321 /* Map 32-bit characters to '\Uxxxxxxxx' */
2322 if (ch >= 0x10000) {
2323 /* -1: subtract 1 preallocated byte */
2324 p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2325 if (p == NULL)
2326 goto error;
2327
2328 *p++ = '\\';
2329 *p++ = 'U';
2330 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2331 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2332 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2333 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2334 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2335 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2336 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2337 *p++ = Py_hexdigits[ch & 15];
2338 }
2339 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2340 else if (ch >= 256 ||
2341 ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2342 ch == 0x1a)
2343 {
2344 /* -1: subtract 1 preallocated byte */
2345 p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2346 if (p == NULL)
2347 goto error;
2348
2349 *p++ = '\\';
2350 *p++ = 'u';
2351 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2352 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2353 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2354 *p++ = Py_hexdigits[ch & 15];
2355 }
2356 /* Copy everything else as-is */
2357 else
2358 *p++ = (char) ch;
2359 }
2360
2361 return _PyBytesWriter_Finish(&writer, p);
2362
2363 error:
2364 _PyBytesWriter_Dealloc(&writer);
2365 return NULL;
2366 }
2367
2368 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2369 write_unicode_binary(PicklerObject *self, PyObject *obj)
2370 {
2371 char header[9];
2372 Py_ssize_t len;
2373 PyObject *encoded = NULL;
2374 Py_ssize_t size;
2375 const char *data;
2376
2377 if (PyUnicode_READY(obj))
2378 return -1;
2379
2380 data = PyUnicode_AsUTF8AndSize(obj, &size);
2381 if (data == NULL) {
2382 /* Issue #8383: for strings with lone surrogates, fallback on the
2383 "surrogatepass" error handler. */
2384 PyErr_Clear();
2385 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2386 if (encoded == NULL)
2387 return -1;
2388
2389 data = PyBytes_AS_STRING(encoded);
2390 size = PyBytes_GET_SIZE(encoded);
2391 }
2392
2393 assert(size >= 0);
2394 if (size <= 0xff && self->proto >= 4) {
2395 header[0] = SHORT_BINUNICODE;
2396 header[1] = (unsigned char)(size & 0xff);
2397 len = 2;
2398 }
2399 else if ((size_t)size <= 0xffffffffUL) {
2400 header[0] = BINUNICODE;
2401 header[1] = (unsigned char)(size & 0xff);
2402 header[2] = (unsigned char)((size >> 8) & 0xff);
2403 header[3] = (unsigned char)((size >> 16) & 0xff);
2404 header[4] = (unsigned char)((size >> 24) & 0xff);
2405 len = 5;
2406 }
2407 else if (self->proto >= 4) {
2408 header[0] = BINUNICODE8;
2409 _write_size64(header + 1, size);
2410 len = 9;
2411 }
2412 else {
2413 PyErr_SetString(PyExc_OverflowError,
2414 "cannot serialize a string larger than 4GiB");
2415 Py_XDECREF(encoded);
2416 return -1;
2417 }
2418
2419 if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2420 Py_XDECREF(encoded);
2421 return -1;
2422 }
2423 Py_XDECREF(encoded);
2424 return 0;
2425 }
2426
2427 static int
save_unicode(PicklerObject * self,PyObject * obj)2428 save_unicode(PicklerObject *self, PyObject *obj)
2429 {
2430 if (self->bin) {
2431 if (write_unicode_binary(self, obj) < 0)
2432 return -1;
2433 }
2434 else {
2435 PyObject *encoded;
2436 Py_ssize_t size;
2437 const char unicode_op = UNICODE;
2438
2439 encoded = raw_unicode_escape(obj);
2440 if (encoded == NULL)
2441 return -1;
2442
2443 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2444 Py_DECREF(encoded);
2445 return -1;
2446 }
2447
2448 size = PyBytes_GET_SIZE(encoded);
2449 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2450 Py_DECREF(encoded);
2451 return -1;
2452 }
2453 Py_DECREF(encoded);
2454
2455 if (_Pickler_Write(self, "\n", 1) < 0)
2456 return -1;
2457 }
2458 if (memo_put(self, obj) < 0)
2459 return -1;
2460
2461 return 0;
2462 }
2463
2464 /* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2465 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2466 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2467 {
2468 Py_ssize_t i;
2469
2470 assert(PyTuple_Size(t) == len);
2471
2472 for (i = 0; i < len; i++) {
2473 PyObject *element = PyTuple_GET_ITEM(t, i);
2474
2475 if (element == NULL)
2476 return -1;
2477 if (save(self, element, 0) < 0)
2478 return -1;
2479 }
2480
2481 return 0;
2482 }
2483
2484 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2485 * used across protocols to minimize the space needed to pickle them.
2486 * Tuples are also the only builtin immutable type that can be recursive
2487 * (a tuple can be reached from itself), and that requires some subtle
2488 * magic so that it works in all cases. IOW, this is a long routine.
2489 */
2490 static int
save_tuple(PicklerObject * self,PyObject * obj)2491 save_tuple(PicklerObject *self, PyObject *obj)
2492 {
2493 Py_ssize_t len, i;
2494
2495 const char mark_op = MARK;
2496 const char tuple_op = TUPLE;
2497 const char pop_op = POP;
2498 const char pop_mark_op = POP_MARK;
2499 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2500
2501 if ((len = PyTuple_Size(obj)) < 0)
2502 return -1;
2503
2504 if (len == 0) {
2505 char pdata[2];
2506
2507 if (self->proto) {
2508 pdata[0] = EMPTY_TUPLE;
2509 len = 1;
2510 }
2511 else {
2512 pdata[0] = MARK;
2513 pdata[1] = TUPLE;
2514 len = 2;
2515 }
2516 if (_Pickler_Write(self, pdata, len) < 0)
2517 return -1;
2518 return 0;
2519 }
2520
2521 /* The tuple isn't in the memo now. If it shows up there after
2522 * saving the tuple elements, the tuple must be recursive, in
2523 * which case we'll pop everything we put on the stack, and fetch
2524 * its value from the memo.
2525 */
2526 if (len <= 3 && self->proto >= 2) {
2527 /* Use TUPLE{1,2,3} opcodes. */
2528 if (store_tuple_elements(self, obj, len) < 0)
2529 return -1;
2530
2531 if (PyMemoTable_Get(self->memo, obj)) {
2532 /* pop the len elements */
2533 for (i = 0; i < len; i++)
2534 if (_Pickler_Write(self, &pop_op, 1) < 0)
2535 return -1;
2536 /* fetch from memo */
2537 if (memo_get(self, obj) < 0)
2538 return -1;
2539
2540 return 0;
2541 }
2542 else { /* Not recursive. */
2543 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2544 return -1;
2545 }
2546 goto memoize;
2547 }
2548
2549 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2550 * Generate MARK e1 e2 ... TUPLE
2551 */
2552 if (_Pickler_Write(self, &mark_op, 1) < 0)
2553 return -1;
2554
2555 if (store_tuple_elements(self, obj, len) < 0)
2556 return -1;
2557
2558 if (PyMemoTable_Get(self->memo, obj)) {
2559 /* pop the stack stuff we pushed */
2560 if (self->bin) {
2561 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2562 return -1;
2563 }
2564 else {
2565 /* Note that we pop one more than len, to remove
2566 * the MARK too.
2567 */
2568 for (i = 0; i <= len; i++)
2569 if (_Pickler_Write(self, &pop_op, 1) < 0)
2570 return -1;
2571 }
2572 /* fetch from memo */
2573 if (memo_get(self, obj) < 0)
2574 return -1;
2575
2576 return 0;
2577 }
2578 else { /* Not recursive. */
2579 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2580 return -1;
2581 }
2582
2583 memoize:
2584 if (memo_put(self, obj) < 0)
2585 return -1;
2586
2587 return 0;
2588 }
2589
2590 /* iter is an iterator giving items, and we batch up chunks of
2591 * MARK item item ... item APPENDS
2592 * opcode sequences. Calling code should have arranged to first create an
2593 * empty list, or list-like object, for the APPENDS to operate on.
2594 * Returns 0 on success, <0 on error.
2595 */
2596 static int
batch_list(PicklerObject * self,PyObject * iter)2597 batch_list(PicklerObject *self, PyObject *iter)
2598 {
2599 PyObject *obj = NULL;
2600 PyObject *firstitem = NULL;
2601 int i, n;
2602
2603 const char mark_op = MARK;
2604 const char append_op = APPEND;
2605 const char appends_op = APPENDS;
2606
2607 assert(iter != NULL);
2608
2609 /* XXX: I think this function could be made faster by avoiding the
2610 iterator interface and fetching objects directly from list using
2611 PyList_GET_ITEM.
2612 */
2613
2614 if (self->proto == 0) {
2615 /* APPENDS isn't available; do one at a time. */
2616 for (;;) {
2617 obj = PyIter_Next(iter);
2618 if (obj == NULL) {
2619 if (PyErr_Occurred())
2620 return -1;
2621 break;
2622 }
2623 i = save(self, obj, 0);
2624 Py_DECREF(obj);
2625 if (i < 0)
2626 return -1;
2627 if (_Pickler_Write(self, &append_op, 1) < 0)
2628 return -1;
2629 }
2630 return 0;
2631 }
2632
2633 /* proto > 0: write in batches of BATCHSIZE. */
2634 do {
2635 /* Get first item */
2636 firstitem = PyIter_Next(iter);
2637 if (firstitem == NULL) {
2638 if (PyErr_Occurred())
2639 goto error;
2640
2641 /* nothing more to add */
2642 break;
2643 }
2644
2645 /* Try to get a second item */
2646 obj = PyIter_Next(iter);
2647 if (obj == NULL) {
2648 if (PyErr_Occurred())
2649 goto error;
2650
2651 /* Only one item to write */
2652 if (save(self, firstitem, 0) < 0)
2653 goto error;
2654 if (_Pickler_Write(self, &append_op, 1) < 0)
2655 goto error;
2656 Py_CLEAR(firstitem);
2657 break;
2658 }
2659
2660 /* More than one item to write */
2661
2662 /* Pump out MARK, items, APPENDS. */
2663 if (_Pickler_Write(self, &mark_op, 1) < 0)
2664 goto error;
2665
2666 if (save(self, firstitem, 0) < 0)
2667 goto error;
2668 Py_CLEAR(firstitem);
2669 n = 1;
2670
2671 /* Fetch and save up to BATCHSIZE items */
2672 while (obj) {
2673 if (save(self, obj, 0) < 0)
2674 goto error;
2675 Py_CLEAR(obj);
2676 n += 1;
2677
2678 if (n == BATCHSIZE)
2679 break;
2680
2681 obj = PyIter_Next(iter);
2682 if (obj == NULL) {
2683 if (PyErr_Occurred())
2684 goto error;
2685 break;
2686 }
2687 }
2688
2689 if (_Pickler_Write(self, &appends_op, 1) < 0)
2690 goto error;
2691
2692 } while (n == BATCHSIZE);
2693 return 0;
2694
2695 error:
2696 Py_XDECREF(firstitem);
2697 Py_XDECREF(obj);
2698 return -1;
2699 }
2700
2701 /* This is a variant of batch_list() above, specialized for lists (with no
2702 * support for list subclasses). Like batch_list(), we batch up chunks of
2703 * MARK item item ... item APPENDS
2704 * opcode sequences. Calling code should have arranged to first create an
2705 * empty list, or list-like object, for the APPENDS to operate on.
2706 * Returns 0 on success, -1 on error.
2707 *
2708 * This version is considerably faster than batch_list(), if less general.
2709 *
2710 * Note that this only works for protocols > 0.
2711 */
2712 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2713 batch_list_exact(PicklerObject *self, PyObject *obj)
2714 {
2715 PyObject *item = NULL;
2716 Py_ssize_t this_batch, total;
2717
2718 const char append_op = APPEND;
2719 const char appends_op = APPENDS;
2720 const char mark_op = MARK;
2721
2722 assert(obj != NULL);
2723 assert(self->proto > 0);
2724 assert(PyList_CheckExact(obj));
2725
2726 if (PyList_GET_SIZE(obj) == 1) {
2727 item = PyList_GET_ITEM(obj, 0);
2728 if (save(self, item, 0) < 0)
2729 return -1;
2730 if (_Pickler_Write(self, &append_op, 1) < 0)
2731 return -1;
2732 return 0;
2733 }
2734
2735 /* Write in batches of BATCHSIZE. */
2736 total = 0;
2737 do {
2738 this_batch = 0;
2739 if (_Pickler_Write(self, &mark_op, 1) < 0)
2740 return -1;
2741 while (total < PyList_GET_SIZE(obj)) {
2742 item = PyList_GET_ITEM(obj, total);
2743 if (save(self, item, 0) < 0)
2744 return -1;
2745 total++;
2746 if (++this_batch == BATCHSIZE)
2747 break;
2748 }
2749 if (_Pickler_Write(self, &appends_op, 1) < 0)
2750 return -1;
2751
2752 } while (total < PyList_GET_SIZE(obj));
2753
2754 return 0;
2755 }
2756
2757 static int
save_list(PicklerObject * self,PyObject * obj)2758 save_list(PicklerObject *self, PyObject *obj)
2759 {
2760 char header[3];
2761 Py_ssize_t len;
2762 int status = 0;
2763
2764 if (self->fast && !fast_save_enter(self, obj))
2765 goto error;
2766
2767 /* Create an empty list. */
2768 if (self->bin) {
2769 header[0] = EMPTY_LIST;
2770 len = 1;
2771 }
2772 else {
2773 header[0] = MARK;
2774 header[1] = LIST;
2775 len = 2;
2776 }
2777
2778 if (_Pickler_Write(self, header, len) < 0)
2779 goto error;
2780
2781 /* Get list length, and bow out early if empty. */
2782 if ((len = PyList_Size(obj)) < 0)
2783 goto error;
2784
2785 if (memo_put(self, obj) < 0)
2786 goto error;
2787
2788 if (len != 0) {
2789 /* Materialize the list elements. */
2790 if (PyList_CheckExact(obj) && self->proto > 0) {
2791 if (Py_EnterRecursiveCall(" while pickling an object"))
2792 goto error;
2793 status = batch_list_exact(self, obj);
2794 Py_LeaveRecursiveCall();
2795 } else {
2796 PyObject *iter = PyObject_GetIter(obj);
2797 if (iter == NULL)
2798 goto error;
2799
2800 if (Py_EnterRecursiveCall(" while pickling an object")) {
2801 Py_DECREF(iter);
2802 goto error;
2803 }
2804 status = batch_list(self, iter);
2805 Py_LeaveRecursiveCall();
2806 Py_DECREF(iter);
2807 }
2808 }
2809 if (0) {
2810 error:
2811 status = -1;
2812 }
2813
2814 if (self->fast && !fast_save_leave(self, obj))
2815 status = -1;
2816
2817 return status;
2818 }
2819
2820 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2821 * MARK key value ... key value SETITEMS
2822 * opcode sequences. Calling code should have arranged to first create an
2823 * empty dict, or dict-like object, for the SETITEMS to operate on.
2824 * Returns 0 on success, <0 on error.
2825 *
2826 * This is very much like batch_list(). The difference between saving
2827 * elements directly, and picking apart two-tuples, is so long-winded at
2828 * the C level, though, that attempts to combine these routines were too
2829 * ugly to bear.
2830 */
2831 static int
batch_dict(PicklerObject * self,PyObject * iter)2832 batch_dict(PicklerObject *self, PyObject *iter)
2833 {
2834 PyObject *obj = NULL;
2835 PyObject *firstitem = NULL;
2836 int i, n;
2837
2838 const char mark_op = MARK;
2839 const char setitem_op = SETITEM;
2840 const char setitems_op = SETITEMS;
2841
2842 assert(iter != NULL);
2843
2844 if (self->proto == 0) {
2845 /* SETITEMS isn't available; do one at a time. */
2846 for (;;) {
2847 obj = PyIter_Next(iter);
2848 if (obj == NULL) {
2849 if (PyErr_Occurred())
2850 return -1;
2851 break;
2852 }
2853 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2854 PyErr_SetString(PyExc_TypeError, "dict items "
2855 "iterator must return 2-tuples");
2856 return -1;
2857 }
2858 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2859 if (i >= 0)
2860 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2861 Py_DECREF(obj);
2862 if (i < 0)
2863 return -1;
2864 if (_Pickler_Write(self, &setitem_op, 1) < 0)
2865 return -1;
2866 }
2867 return 0;
2868 }
2869
2870 /* proto > 0: write in batches of BATCHSIZE. */
2871 do {
2872 /* Get first item */
2873 firstitem = PyIter_Next(iter);
2874 if (firstitem == NULL) {
2875 if (PyErr_Occurred())
2876 goto error;
2877
2878 /* nothing more to add */
2879 break;
2880 }
2881 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2882 PyErr_SetString(PyExc_TypeError, "dict items "
2883 "iterator must return 2-tuples");
2884 goto error;
2885 }
2886
2887 /* Try to get a second item */
2888 obj = PyIter_Next(iter);
2889 if (obj == NULL) {
2890 if (PyErr_Occurred())
2891 goto error;
2892
2893 /* Only one item to write */
2894 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2895 goto error;
2896 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2897 goto error;
2898 if (_Pickler_Write(self, &setitem_op, 1) < 0)
2899 goto error;
2900 Py_CLEAR(firstitem);
2901 break;
2902 }
2903
2904 /* More than one item to write */
2905
2906 /* Pump out MARK, items, SETITEMS. */
2907 if (_Pickler_Write(self, &mark_op, 1) < 0)
2908 goto error;
2909
2910 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2911 goto error;
2912 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2913 goto error;
2914 Py_CLEAR(firstitem);
2915 n = 1;
2916
2917 /* Fetch and save up to BATCHSIZE items */
2918 while (obj) {
2919 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2920 PyErr_SetString(PyExc_TypeError, "dict items "
2921 "iterator must return 2-tuples");
2922 goto error;
2923 }
2924 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2925 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2926 goto error;
2927 Py_CLEAR(obj);
2928 n += 1;
2929
2930 if (n == BATCHSIZE)
2931 break;
2932
2933 obj = PyIter_Next(iter);
2934 if (obj == NULL) {
2935 if (PyErr_Occurred())
2936 goto error;
2937 break;
2938 }
2939 }
2940
2941 if (_Pickler_Write(self, &setitems_op, 1) < 0)
2942 goto error;
2943
2944 } while (n == BATCHSIZE);
2945 return 0;
2946
2947 error:
2948 Py_XDECREF(firstitem);
2949 Py_XDECREF(obj);
2950 return -1;
2951 }
2952
2953 /* This is a variant of batch_dict() above that specializes for dicts, with no
2954 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2955 * MARK key value ... key value SETITEMS
2956 * opcode sequences. Calling code should have arranged to first create an
2957 * empty dict, or dict-like object, for the SETITEMS to operate on.
2958 * Returns 0 on success, -1 on error.
2959 *
2960 * Note that this currently doesn't work for protocol 0.
2961 */
2962 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)2963 batch_dict_exact(PicklerObject *self, PyObject *obj)
2964 {
2965 PyObject *key = NULL, *value = NULL;
2966 int i;
2967 Py_ssize_t dict_size, ppos = 0;
2968
2969 const char mark_op = MARK;
2970 const char setitem_op = SETITEM;
2971 const char setitems_op = SETITEMS;
2972
2973 assert(obj != NULL && PyDict_CheckExact(obj));
2974 assert(self->proto > 0);
2975
2976 dict_size = PyDict_GET_SIZE(obj);
2977
2978 /* Special-case len(d) == 1 to save space. */
2979 if (dict_size == 1) {
2980 PyDict_Next(obj, &ppos, &key, &value);
2981 if (save(self, key, 0) < 0)
2982 return -1;
2983 if (save(self, value, 0) < 0)
2984 return -1;
2985 if (_Pickler_Write(self, &setitem_op, 1) < 0)
2986 return -1;
2987 return 0;
2988 }
2989
2990 /* Write in batches of BATCHSIZE. */
2991 do {
2992 i = 0;
2993 if (_Pickler_Write(self, &mark_op, 1) < 0)
2994 return -1;
2995 while (PyDict_Next(obj, &ppos, &key, &value)) {
2996 if (save(self, key, 0) < 0)
2997 return -1;
2998 if (save(self, value, 0) < 0)
2999 return -1;
3000 if (++i == BATCHSIZE)
3001 break;
3002 }
3003 if (_Pickler_Write(self, &setitems_op, 1) < 0)
3004 return -1;
3005 if (PyDict_GET_SIZE(obj) != dict_size) {
3006 PyErr_Format(
3007 PyExc_RuntimeError,
3008 "dictionary changed size during iteration");
3009 return -1;
3010 }
3011
3012 } while (i == BATCHSIZE);
3013 return 0;
3014 }
3015
3016 static int
save_dict(PicklerObject * self,PyObject * obj)3017 save_dict(PicklerObject *self, PyObject *obj)
3018 {
3019 PyObject *items, *iter;
3020 char header[3];
3021 Py_ssize_t len;
3022 int status = 0;
3023 assert(PyDict_Check(obj));
3024
3025 if (self->fast && !fast_save_enter(self, obj))
3026 goto error;
3027
3028 /* Create an empty dict. */
3029 if (self->bin) {
3030 header[0] = EMPTY_DICT;
3031 len = 1;
3032 }
3033 else {
3034 header[0] = MARK;
3035 header[1] = DICT;
3036 len = 2;
3037 }
3038
3039 if (_Pickler_Write(self, header, len) < 0)
3040 goto error;
3041
3042 if (memo_put(self, obj) < 0)
3043 goto error;
3044
3045 if (PyDict_GET_SIZE(obj)) {
3046 /* Save the dict items. */
3047 if (PyDict_CheckExact(obj) && self->proto > 0) {
3048 /* We can take certain shortcuts if we know this is a dict and
3049 not a dict subclass. */
3050 if (Py_EnterRecursiveCall(" while pickling an object"))
3051 goto error;
3052 status = batch_dict_exact(self, obj);
3053 Py_LeaveRecursiveCall();
3054 } else {
3055 _Py_IDENTIFIER(items);
3056
3057 items = _PyObject_CallMethodId(obj, &PyId_items, NULL);
3058 if (items == NULL)
3059 goto error;
3060 iter = PyObject_GetIter(items);
3061 Py_DECREF(items);
3062 if (iter == NULL)
3063 goto error;
3064 if (Py_EnterRecursiveCall(" while pickling an object")) {
3065 Py_DECREF(iter);
3066 goto error;
3067 }
3068 status = batch_dict(self, iter);
3069 Py_LeaveRecursiveCall();
3070 Py_DECREF(iter);
3071 }
3072 }
3073
3074 if (0) {
3075 error:
3076 status = -1;
3077 }
3078
3079 if (self->fast && !fast_save_leave(self, obj))
3080 status = -1;
3081
3082 return status;
3083 }
3084
3085 static int
save_set(PicklerObject * self,PyObject * obj)3086 save_set(PicklerObject *self, PyObject *obj)
3087 {
3088 PyObject *item;
3089 int i;
3090 Py_ssize_t set_size, ppos = 0;
3091 Py_hash_t hash;
3092
3093 const char empty_set_op = EMPTY_SET;
3094 const char mark_op = MARK;
3095 const char additems_op = ADDITEMS;
3096
3097 if (self->proto < 4) {
3098 PyObject *items;
3099 PyObject *reduce_value;
3100 int status;
3101
3102 items = PySequence_List(obj);
3103 if (items == NULL) {
3104 return -1;
3105 }
3106 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3107 Py_DECREF(items);
3108 if (reduce_value == NULL) {
3109 return -1;
3110 }
3111 /* save_reduce() will memoize the object automatically. */
3112 status = save_reduce(self, reduce_value, obj);
3113 Py_DECREF(reduce_value);
3114 return status;
3115 }
3116
3117 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3118 return -1;
3119
3120 if (memo_put(self, obj) < 0)
3121 return -1;
3122
3123 set_size = PySet_GET_SIZE(obj);
3124 if (set_size == 0)
3125 return 0; /* nothing to do */
3126
3127 /* Write in batches of BATCHSIZE. */
3128 do {
3129 i = 0;
3130 if (_Pickler_Write(self, &mark_op, 1) < 0)
3131 return -1;
3132 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3133 if (save(self, item, 0) < 0)
3134 return -1;
3135 if (++i == BATCHSIZE)
3136 break;
3137 }
3138 if (_Pickler_Write(self, &additems_op, 1) < 0)
3139 return -1;
3140 if (PySet_GET_SIZE(obj) != set_size) {
3141 PyErr_Format(
3142 PyExc_RuntimeError,
3143 "set changed size during iteration");
3144 return -1;
3145 }
3146 } while (i == BATCHSIZE);
3147
3148 return 0;
3149 }
3150
3151 static int
save_frozenset(PicklerObject * self,PyObject * obj)3152 save_frozenset(PicklerObject *self, PyObject *obj)
3153 {
3154 PyObject *iter;
3155
3156 const char mark_op = MARK;
3157 const char frozenset_op = FROZENSET;
3158
3159 if (self->fast && !fast_save_enter(self, obj))
3160 return -1;
3161
3162 if (self->proto < 4) {
3163 PyObject *items;
3164 PyObject *reduce_value;
3165 int status;
3166
3167 items = PySequence_List(obj);
3168 if (items == NULL) {
3169 return -1;
3170 }
3171 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3172 items);
3173 Py_DECREF(items);
3174 if (reduce_value == NULL) {
3175 return -1;
3176 }
3177 /* save_reduce() will memoize the object automatically. */
3178 status = save_reduce(self, reduce_value, obj);
3179 Py_DECREF(reduce_value);
3180 return status;
3181 }
3182
3183 if (_Pickler_Write(self, &mark_op, 1) < 0)
3184 return -1;
3185
3186 iter = PyObject_GetIter(obj);
3187 if (iter == NULL) {
3188 return -1;
3189 }
3190 for (;;) {
3191 PyObject *item;
3192
3193 item = PyIter_Next(iter);
3194 if (item == NULL) {
3195 if (PyErr_Occurred()) {
3196 Py_DECREF(iter);
3197 return -1;
3198 }
3199 break;
3200 }
3201 if (save(self, item, 0) < 0) {
3202 Py_DECREF(item);
3203 Py_DECREF(iter);
3204 return -1;
3205 }
3206 Py_DECREF(item);
3207 }
3208 Py_DECREF(iter);
3209
3210 /* If the object is already in the memo, this means it is
3211 recursive. In this case, throw away everything we put on the
3212 stack, and fetch the object back from the memo. */
3213 if (PyMemoTable_Get(self->memo, obj)) {
3214 const char pop_mark_op = POP_MARK;
3215
3216 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3217 return -1;
3218 if (memo_get(self, obj) < 0)
3219 return -1;
3220 return 0;
3221 }
3222
3223 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3224 return -1;
3225 if (memo_put(self, obj) < 0)
3226 return -1;
3227
3228 return 0;
3229 }
3230
3231 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3232 fix_imports(PyObject **module_name, PyObject **global_name)
3233 {
3234 PyObject *key;
3235 PyObject *item;
3236 PickleState *st = _Pickle_GetGlobalState();
3237
3238 key = PyTuple_Pack(2, *module_name, *global_name);
3239 if (key == NULL)
3240 return -1;
3241 item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3242 Py_DECREF(key);
3243 if (item) {
3244 PyObject *fixed_module_name;
3245 PyObject *fixed_global_name;
3246
3247 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3248 PyErr_Format(PyExc_RuntimeError,
3249 "_compat_pickle.REVERSE_NAME_MAPPING values "
3250 "should be 2-tuples, not %.200s",
3251 Py_TYPE(item)->tp_name);
3252 return -1;
3253 }
3254 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3255 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3256 if (!PyUnicode_Check(fixed_module_name) ||
3257 !PyUnicode_Check(fixed_global_name)) {
3258 PyErr_Format(PyExc_RuntimeError,
3259 "_compat_pickle.REVERSE_NAME_MAPPING values "
3260 "should be pairs of str, not (%.200s, %.200s)",
3261 Py_TYPE(fixed_module_name)->tp_name,
3262 Py_TYPE(fixed_global_name)->tp_name);
3263 return -1;
3264 }
3265
3266 Py_CLEAR(*module_name);
3267 Py_CLEAR(*global_name);
3268 Py_INCREF(fixed_module_name);
3269 Py_INCREF(fixed_global_name);
3270 *module_name = fixed_module_name;
3271 *global_name = fixed_global_name;
3272 return 0;
3273 }
3274 else if (PyErr_Occurred()) {
3275 return -1;
3276 }
3277
3278 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3279 if (item) {
3280 if (!PyUnicode_Check(item)) {
3281 PyErr_Format(PyExc_RuntimeError,
3282 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3283 "should be strings, not %.200s",
3284 Py_TYPE(item)->tp_name);
3285 return -1;
3286 }
3287 Py_INCREF(item);
3288 Py_XSETREF(*module_name, item);
3289 }
3290 else if (PyErr_Occurred()) {
3291 return -1;
3292 }
3293
3294 return 0;
3295 }
3296
3297 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3298 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3299 {
3300 PyObject *global_name = NULL;
3301 PyObject *module_name = NULL;
3302 PyObject *module = NULL;
3303 PyObject *parent = NULL;
3304 PyObject *dotted_path = NULL;
3305 PyObject *lastname = NULL;
3306 PyObject *cls;
3307 PickleState *st = _Pickle_GetGlobalState();
3308 int status = 0;
3309 _Py_IDENTIFIER(__name__);
3310 _Py_IDENTIFIER(__qualname__);
3311
3312 const char global_op = GLOBAL;
3313
3314 if (name) {
3315 Py_INCREF(name);
3316 global_name = name;
3317 }
3318 else {
3319 if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3320 goto error;
3321 if (global_name == NULL) {
3322 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3323 if (global_name == NULL)
3324 goto error;
3325 }
3326 }
3327
3328 dotted_path = get_dotted_path(module, global_name);
3329 if (dotted_path == NULL)
3330 goto error;
3331 module_name = whichmodule(obj, dotted_path);
3332 if (module_name == NULL)
3333 goto error;
3334
3335 /* XXX: Change to use the import C API directly with level=0 to disallow
3336 relative imports.
3337
3338 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3339 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3340 custom import functions (IMHO, this would be a nice security
3341 feature). The import C API would need to be extended to support the
3342 extra parameters of __import__ to fix that. */
3343 module = PyImport_Import(module_name);
3344 if (module == NULL) {
3345 PyErr_Format(st->PicklingError,
3346 "Can't pickle %R: import of module %R failed",
3347 obj, module_name);
3348 goto error;
3349 }
3350 lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3351 Py_INCREF(lastname);
3352 cls = get_deep_attribute(module, dotted_path, &parent);
3353 Py_CLEAR(dotted_path);
3354 if (cls == NULL) {
3355 PyErr_Format(st->PicklingError,
3356 "Can't pickle %R: attribute lookup %S on %S failed",
3357 obj, global_name, module_name);
3358 goto error;
3359 }
3360 if (cls != obj) {
3361 Py_DECREF(cls);
3362 PyErr_Format(st->PicklingError,
3363 "Can't pickle %R: it's not the same object as %S.%S",
3364 obj, module_name, global_name);
3365 goto error;
3366 }
3367 Py_DECREF(cls);
3368
3369 if (self->proto >= 2) {
3370 /* See whether this is in the extension registry, and if
3371 * so generate an EXT opcode.
3372 */
3373 PyObject *extension_key;
3374 PyObject *code_obj; /* extension code as Python object */
3375 long code; /* extension code as C value */
3376 char pdata[5];
3377 Py_ssize_t n;
3378
3379 extension_key = PyTuple_Pack(2, module_name, global_name);
3380 if (extension_key == NULL) {
3381 goto error;
3382 }
3383 code_obj = PyDict_GetItemWithError(st->extension_registry,
3384 extension_key);
3385 Py_DECREF(extension_key);
3386 /* The object is not registered in the extension registry.
3387 This is the most likely code path. */
3388 if (code_obj == NULL) {
3389 if (PyErr_Occurred()) {
3390 goto error;
3391 }
3392 goto gen_global;
3393 }
3394
3395 /* XXX: pickle.py doesn't check neither the type, nor the range
3396 of the value returned by the extension_registry. It should for
3397 consistency. */
3398
3399 /* Verify code_obj has the right type and value. */
3400 if (!PyLong_Check(code_obj)) {
3401 PyErr_Format(st->PicklingError,
3402 "Can't pickle %R: extension code %R isn't an integer",
3403 obj, code_obj);
3404 goto error;
3405 }
3406 code = PyLong_AS_LONG(code_obj);
3407 if (code <= 0 || code > 0x7fffffffL) {
3408 if (!PyErr_Occurred())
3409 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3410 "code %ld is out of range", obj, code);
3411 goto error;
3412 }
3413
3414 /* Generate an EXT opcode. */
3415 if (code <= 0xff) {
3416 pdata[0] = EXT1;
3417 pdata[1] = (unsigned char)code;
3418 n = 2;
3419 }
3420 else if (code <= 0xffff) {
3421 pdata[0] = EXT2;
3422 pdata[1] = (unsigned char)(code & 0xff);
3423 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3424 n = 3;
3425 }
3426 else {
3427 pdata[0] = EXT4;
3428 pdata[1] = (unsigned char)(code & 0xff);
3429 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3430 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3431 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3432 n = 5;
3433 }
3434
3435 if (_Pickler_Write(self, pdata, n) < 0)
3436 goto error;
3437 }
3438 else {
3439 gen_global:
3440 if (parent == module) {
3441 Py_INCREF(lastname);
3442 Py_DECREF(global_name);
3443 global_name = lastname;
3444 }
3445 if (self->proto >= 4) {
3446 const char stack_global_op = STACK_GLOBAL;
3447
3448 if (save(self, module_name, 0) < 0)
3449 goto error;
3450 if (save(self, global_name, 0) < 0)
3451 goto error;
3452
3453 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3454 goto error;
3455 }
3456 else if (parent != module) {
3457 PickleState *st = _Pickle_GetGlobalState();
3458 PyObject *reduce_value = Py_BuildValue("(O(OO))",
3459 st->getattr, parent, lastname);
3460 if (reduce_value == NULL)
3461 goto error;
3462 status = save_reduce(self, reduce_value, NULL);
3463 Py_DECREF(reduce_value);
3464 if (status < 0)
3465 goto error;
3466 }
3467 else {
3468 /* Generate a normal global opcode if we are using a pickle
3469 protocol < 4, or if the object is not registered in the
3470 extension registry. */
3471 PyObject *encoded;
3472 PyObject *(*unicode_encoder)(PyObject *);
3473
3474 if (_Pickler_Write(self, &global_op, 1) < 0)
3475 goto error;
3476
3477 /* For protocol < 3 and if the user didn't request against doing
3478 so, we convert module names to the old 2.x module names. */
3479 if (self->proto < 3 && self->fix_imports) {
3480 if (fix_imports(&module_name, &global_name) < 0) {
3481 goto error;
3482 }
3483 }
3484
3485 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3486 both the module name and the global name using UTF-8. We do so
3487 only when we are using the pickle protocol newer than version
3488 3. This is to ensure compatibility with older Unpickler running
3489 on Python 2.x. */
3490 if (self->proto == 3) {
3491 unicode_encoder = PyUnicode_AsUTF8String;
3492 }
3493 else {
3494 unicode_encoder = PyUnicode_AsASCIIString;
3495 }
3496 encoded = unicode_encoder(module_name);
3497 if (encoded == NULL) {
3498 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3499 PyErr_Format(st->PicklingError,
3500 "can't pickle module identifier '%S' using "
3501 "pickle protocol %i",
3502 module_name, self->proto);
3503 goto error;
3504 }
3505 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3506 PyBytes_GET_SIZE(encoded)) < 0) {
3507 Py_DECREF(encoded);
3508 goto error;
3509 }
3510 Py_DECREF(encoded);
3511 if(_Pickler_Write(self, "\n", 1) < 0)
3512 goto error;
3513
3514 /* Save the name of the module. */
3515 encoded = unicode_encoder(global_name);
3516 if (encoded == NULL) {
3517 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3518 PyErr_Format(st->PicklingError,
3519 "can't pickle global identifier '%S' using "
3520 "pickle protocol %i",
3521 global_name, self->proto);
3522 goto error;
3523 }
3524 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3525 PyBytes_GET_SIZE(encoded)) < 0) {
3526 Py_DECREF(encoded);
3527 goto error;
3528 }
3529 Py_DECREF(encoded);
3530 if (_Pickler_Write(self, "\n", 1) < 0)
3531 goto error;
3532 }
3533 /* Memoize the object. */
3534 if (memo_put(self, obj) < 0)
3535 goto error;
3536 }
3537
3538 if (0) {
3539 error:
3540 status = -1;
3541 }
3542 Py_XDECREF(module_name);
3543 Py_XDECREF(global_name);
3544 Py_XDECREF(module);
3545 Py_XDECREF(parent);
3546 Py_XDECREF(dotted_path);
3547 Py_XDECREF(lastname);
3548
3549 return status;
3550 }
3551
3552 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3553 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3554 {
3555 PyObject *reduce_value;
3556 int status;
3557
3558 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3559 if (reduce_value == NULL) {
3560 return -1;
3561 }
3562 status = save_reduce(self, reduce_value, obj);
3563 Py_DECREF(reduce_value);
3564 return status;
3565 }
3566
3567 static int
save_type(PicklerObject * self,PyObject * obj)3568 save_type(PicklerObject *self, PyObject *obj)
3569 {
3570 if (obj == (PyObject *)&_PyNone_Type) {
3571 return save_singleton_type(self, obj, Py_None);
3572 }
3573 else if (obj == (PyObject *)&PyEllipsis_Type) {
3574 return save_singleton_type(self, obj, Py_Ellipsis);
3575 }
3576 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3577 return save_singleton_type(self, obj, Py_NotImplemented);
3578 }
3579 return save_global(self, obj, NULL);
3580 }
3581
3582 static int
save_pers(PicklerObject * self,PyObject * obj)3583 save_pers(PicklerObject *self, PyObject *obj)
3584 {
3585 PyObject *pid = NULL;
3586 int status = 0;
3587
3588 const char persid_op = PERSID;
3589 const char binpersid_op = BINPERSID;
3590
3591 pid = call_method(self->pers_func, self->pers_func_self, obj);
3592 if (pid == NULL)
3593 return -1;
3594
3595 if (pid != Py_None) {
3596 if (self->bin) {
3597 if (save(self, pid, 1) < 0 ||
3598 _Pickler_Write(self, &binpersid_op, 1) < 0)
3599 goto error;
3600 }
3601 else {
3602 PyObject *pid_str;
3603
3604 pid_str = PyObject_Str(pid);
3605 if (pid_str == NULL)
3606 goto error;
3607
3608 /* XXX: Should it check whether the pid contains embedded
3609 newlines? */
3610 if (!PyUnicode_IS_ASCII(pid_str)) {
3611 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3612 "persistent IDs in protocol 0 must be "
3613 "ASCII strings");
3614 Py_DECREF(pid_str);
3615 goto error;
3616 }
3617
3618 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3619 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3620 PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3621 _Pickler_Write(self, "\n", 1) < 0) {
3622 Py_DECREF(pid_str);
3623 goto error;
3624 }
3625 Py_DECREF(pid_str);
3626 }
3627 status = 1;
3628 }
3629
3630 if (0) {
3631 error:
3632 status = -1;
3633 }
3634 Py_XDECREF(pid);
3635
3636 return status;
3637 }
3638
3639 static PyObject *
get_class(PyObject * obj)3640 get_class(PyObject *obj)
3641 {
3642 PyObject *cls;
3643 _Py_IDENTIFIER(__class__);
3644
3645 if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3646 cls = (PyObject *) Py_TYPE(obj);
3647 Py_INCREF(cls);
3648 }
3649 return cls;
3650 }
3651
3652 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3653 * appropriate __reduce__ method for obj.
3654 */
3655 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3656 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3657 {
3658 PyObject *callable;
3659 PyObject *argtup;
3660 PyObject *state = NULL;
3661 PyObject *listitems = Py_None;
3662 PyObject *dictitems = Py_None;
3663 PickleState *st = _Pickle_GetGlobalState();
3664 Py_ssize_t size;
3665 int use_newobj = 0, use_newobj_ex = 0;
3666
3667 const char reduce_op = REDUCE;
3668 const char build_op = BUILD;
3669 const char newobj_op = NEWOBJ;
3670 const char newobj_ex_op = NEWOBJ_EX;
3671
3672 size = PyTuple_Size(args);
3673 if (size < 2 || size > 5) {
3674 PyErr_SetString(st->PicklingError, "tuple returned by "
3675 "__reduce__ must contain 2 through 5 elements");
3676 return -1;
3677 }
3678
3679 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
3680 &callable, &argtup, &state, &listitems, &dictitems))
3681 return -1;
3682
3683 if (!PyCallable_Check(callable)) {
3684 PyErr_SetString(st->PicklingError, "first item of the tuple "
3685 "returned by __reduce__ must be callable");
3686 return -1;
3687 }
3688 if (!PyTuple_Check(argtup)) {
3689 PyErr_SetString(st->PicklingError, "second item of the tuple "
3690 "returned by __reduce__ must be a tuple");
3691 return -1;
3692 }
3693
3694 if (state == Py_None)
3695 state = NULL;
3696
3697 if (listitems == Py_None)
3698 listitems = NULL;
3699 else if (!PyIter_Check(listitems)) {
3700 PyErr_Format(st->PicklingError, "fourth element of the tuple "
3701 "returned by __reduce__ must be an iterator, not %s",
3702 Py_TYPE(listitems)->tp_name);
3703 return -1;
3704 }
3705
3706 if (dictitems == Py_None)
3707 dictitems = NULL;
3708 else if (!PyIter_Check(dictitems)) {
3709 PyErr_Format(st->PicklingError, "fifth element of the tuple "
3710 "returned by __reduce__ must be an iterator, not %s",
3711 Py_TYPE(dictitems)->tp_name);
3712 return -1;
3713 }
3714
3715 if (self->proto >= 2) {
3716 PyObject *name;
3717 _Py_IDENTIFIER(__name__);
3718
3719 if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
3720 return -1;
3721 }
3722 if (name != NULL && PyUnicode_Check(name)) {
3723 _Py_IDENTIFIER(__newobj_ex__);
3724 use_newobj_ex = _PyUnicode_EqualToASCIIId(
3725 name, &PyId___newobj_ex__);
3726 if (!use_newobj_ex) {
3727 _Py_IDENTIFIER(__newobj__);
3728 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
3729 }
3730 }
3731 Py_XDECREF(name);
3732 }
3733
3734 if (use_newobj_ex) {
3735 PyObject *cls;
3736 PyObject *args;
3737 PyObject *kwargs;
3738
3739 if (PyTuple_GET_SIZE(argtup) != 3) {
3740 PyErr_Format(st->PicklingError,
3741 "length of the NEWOBJ_EX argument tuple must be "
3742 "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
3743 return -1;
3744 }
3745
3746 cls = PyTuple_GET_ITEM(argtup, 0);
3747 if (!PyType_Check(cls)) {
3748 PyErr_Format(st->PicklingError,
3749 "first item from NEWOBJ_EX argument tuple must "
3750 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
3751 return -1;
3752 }
3753 args = PyTuple_GET_ITEM(argtup, 1);
3754 if (!PyTuple_Check(args)) {
3755 PyErr_Format(st->PicklingError,
3756 "second item from NEWOBJ_EX argument tuple must "
3757 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
3758 return -1;
3759 }
3760 kwargs = PyTuple_GET_ITEM(argtup, 2);
3761 if (!PyDict_Check(kwargs)) {
3762 PyErr_Format(st->PicklingError,
3763 "third item from NEWOBJ_EX argument tuple must "
3764 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
3765 return -1;
3766 }
3767
3768 if (self->proto >= 4) {
3769 if (save(self, cls, 0) < 0 ||
3770 save(self, args, 0) < 0 ||
3771 save(self, kwargs, 0) < 0 ||
3772 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
3773 return -1;
3774 }
3775 }
3776 else {
3777 PyObject *newargs;
3778 PyObject *cls_new;
3779 Py_ssize_t i;
3780 _Py_IDENTIFIER(__new__);
3781
3782 newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
3783 if (newargs == NULL)
3784 return -1;
3785
3786 cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
3787 if (cls_new == NULL) {
3788 Py_DECREF(newargs);
3789 return -1;
3790 }
3791 PyTuple_SET_ITEM(newargs, 0, cls_new);
3792 Py_INCREF(cls);
3793 PyTuple_SET_ITEM(newargs, 1, cls);
3794 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
3795 PyObject *item = PyTuple_GET_ITEM(args, i);
3796 Py_INCREF(item);
3797 PyTuple_SET_ITEM(newargs, i + 2, item);
3798 }
3799
3800 callable = PyObject_Call(st->partial, newargs, kwargs);
3801 Py_DECREF(newargs);
3802 if (callable == NULL)
3803 return -1;
3804
3805 newargs = PyTuple_New(0);
3806 if (newargs == NULL) {
3807 Py_DECREF(callable);
3808 return -1;
3809 }
3810
3811 if (save(self, callable, 0) < 0 ||
3812 save(self, newargs, 0) < 0 ||
3813 _Pickler_Write(self, &reduce_op, 1) < 0) {
3814 Py_DECREF(newargs);
3815 Py_DECREF(callable);
3816 return -1;
3817 }
3818 Py_DECREF(newargs);
3819 Py_DECREF(callable);
3820 }
3821 }
3822 else if (use_newobj) {
3823 PyObject *cls;
3824 PyObject *newargtup;
3825 PyObject *obj_class;
3826 int p;
3827
3828 /* Sanity checks. */
3829 if (PyTuple_GET_SIZE(argtup) < 1) {
3830 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
3831 return -1;
3832 }
3833
3834 cls = PyTuple_GET_ITEM(argtup, 0);
3835 if (!PyType_Check(cls)) {
3836 PyErr_SetString(st->PicklingError, "args[0] from "
3837 "__newobj__ args is not a type");
3838 return -1;
3839 }
3840
3841 if (obj != NULL) {
3842 obj_class = get_class(obj);
3843 if (obj_class == NULL) {
3844 return -1;
3845 }
3846 p = obj_class != cls;
3847 Py_DECREF(obj_class);
3848 if (p) {
3849 PyErr_SetString(st->PicklingError, "args[0] from "
3850 "__newobj__ args has the wrong class");
3851 return -1;
3852 }
3853 }
3854 /* XXX: These calls save() are prone to infinite recursion. Imagine
3855 what happen if the value returned by the __reduce__() method of
3856 some extension type contains another object of the same type. Ouch!
3857
3858 Here is a quick example, that I ran into, to illustrate what I
3859 mean:
3860
3861 >>> import pickle, copyreg
3862 >>> copyreg.dispatch_table.pop(complex)
3863 >>> pickle.dumps(1+2j)
3864 Traceback (most recent call last):
3865 ...
3866 RecursionError: maximum recursion depth exceeded
3867
3868 Removing the complex class from copyreg.dispatch_table made the
3869 __reduce_ex__() method emit another complex object:
3870
3871 >>> (1+1j).__reduce_ex__(2)
3872 (<function __newobj__ at 0xb7b71c3c>,
3873 (<class 'complex'>, (1+1j)), None, None, None)
3874
3875 Thus when save() was called on newargstup (the 2nd item) recursion
3876 ensued. Of course, the bug was in the complex class which had a
3877 broken __getnewargs__() that emitted another complex object. But,
3878 the point, here, is it is quite easy to end up with a broken reduce
3879 function. */
3880
3881 /* Save the class and its __new__ arguments. */
3882 if (save(self, cls, 0) < 0)
3883 return -1;
3884
3885 newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
3886 if (newargtup == NULL)
3887 return -1;
3888
3889 p = save(self, newargtup, 0);
3890 Py_DECREF(newargtup);
3891 if (p < 0)
3892 return -1;
3893
3894 /* Add NEWOBJ opcode. */
3895 if (_Pickler_Write(self, &newobj_op, 1) < 0)
3896 return -1;
3897 }
3898 else { /* Not using NEWOBJ. */
3899 if (save(self, callable, 0) < 0 ||
3900 save(self, argtup, 0) < 0 ||
3901 _Pickler_Write(self, &reduce_op, 1) < 0)
3902 return -1;
3903 }
3904
3905 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3906 the caller do not want to memoize the object. Not particularly useful,
3907 but that is to mimic the behavior save_reduce() in pickle.py when
3908 obj is None. */
3909 if (obj != NULL) {
3910 /* If the object is already in the memo, this means it is
3911 recursive. In this case, throw away everything we put on the
3912 stack, and fetch the object back from the memo. */
3913 if (PyMemoTable_Get(self->memo, obj)) {
3914 const char pop_op = POP;
3915
3916 if (_Pickler_Write(self, &pop_op, 1) < 0)
3917 return -1;
3918 if (memo_get(self, obj) < 0)
3919 return -1;
3920
3921 return 0;
3922 }
3923 else if (memo_put(self, obj) < 0)
3924 return -1;
3925 }
3926
3927 if (listitems && batch_list(self, listitems) < 0)
3928 return -1;
3929
3930 if (dictitems && batch_dict(self, dictitems) < 0)
3931 return -1;
3932
3933 if (state) {
3934 if (save(self, state, 0) < 0 ||
3935 _Pickler_Write(self, &build_op, 1) < 0)
3936 return -1;
3937 }
3938
3939 return 0;
3940 }
3941
3942 static int
save(PicklerObject * self,PyObject * obj,int pers_save)3943 save(PicklerObject *self, PyObject *obj, int pers_save)
3944 {
3945 PyTypeObject *type;
3946 PyObject *reduce_func = NULL;
3947 PyObject *reduce_value = NULL;
3948 int status = 0;
3949
3950 if (_Pickler_OpcodeBoundary(self) < 0)
3951 return -1;
3952
3953 if (Py_EnterRecursiveCall(" while pickling an object"))
3954 return -1;
3955
3956 /* The extra pers_save argument is necessary to avoid calling save_pers()
3957 on its returned object. */
3958 if (!pers_save && self->pers_func) {
3959 /* save_pers() returns:
3960 -1 to signal an error;
3961 0 if it did nothing successfully;
3962 1 if a persistent id was saved.
3963 */
3964 if ((status = save_pers(self, obj)) != 0)
3965 goto done;
3966 }
3967
3968 type = Py_TYPE(obj);
3969
3970 /* The old cPickle had an optimization that used switch-case statement
3971 dispatching on the first letter of the type name. This has was removed
3972 since benchmarks shown that this optimization was actually slowing
3973 things down. */
3974
3975 /* Atom types; these aren't memoized, so don't check the memo. */
3976
3977 if (obj == Py_None) {
3978 status = save_none(self, obj);
3979 goto done;
3980 }
3981 else if (obj == Py_False || obj == Py_True) {
3982 status = save_bool(self, obj);
3983 goto done;
3984 }
3985 else if (type == &PyLong_Type) {
3986 status = save_long(self, obj);
3987 goto done;
3988 }
3989 else if (type == &PyFloat_Type) {
3990 status = save_float(self, obj);
3991 goto done;
3992 }
3993
3994 /* Check the memo to see if it has the object. If so, generate
3995 a GET (or BINGET) opcode, instead of pickling the object
3996 once again. */
3997 if (PyMemoTable_Get(self->memo, obj)) {
3998 if (memo_get(self, obj) < 0)
3999 goto error;
4000 goto done;
4001 }
4002
4003 if (type == &PyBytes_Type) {
4004 status = save_bytes(self, obj);
4005 goto done;
4006 }
4007 else if (type == &PyUnicode_Type) {
4008 status = save_unicode(self, obj);
4009 goto done;
4010 }
4011 else if (type == &PyDict_Type) {
4012 status = save_dict(self, obj);
4013 goto done;
4014 }
4015 else if (type == &PySet_Type) {
4016 status = save_set(self, obj);
4017 goto done;
4018 }
4019 else if (type == &PyFrozenSet_Type) {
4020 status = save_frozenset(self, obj);
4021 goto done;
4022 }
4023 else if (type == &PyList_Type) {
4024 status = save_list(self, obj);
4025 goto done;
4026 }
4027 else if (type == &PyTuple_Type) {
4028 status = save_tuple(self, obj);
4029 goto done;
4030 }
4031 else if (type == &PyType_Type) {
4032 status = save_type(self, obj);
4033 goto done;
4034 }
4035 else if (type == &PyFunction_Type) {
4036 status = save_global(self, obj, NULL);
4037 goto done;
4038 }
4039
4040 /* XXX: This part needs some unit tests. */
4041
4042 /* Get a reduction callable, and call it. This may come from
4043 * self.dispatch_table, copyreg.dispatch_table, the object's
4044 * __reduce_ex__ method, or the object's __reduce__ method.
4045 */
4046 if (self->dispatch_table == NULL) {
4047 PickleState *st = _Pickle_GetGlobalState();
4048 reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4049 (PyObject *)type);
4050 if (reduce_func == NULL) {
4051 if (PyErr_Occurred()) {
4052 goto error;
4053 }
4054 } else {
4055 /* PyDict_GetItemWithError() returns a borrowed reference.
4056 Increase the reference count to be consistent with
4057 PyObject_GetItem and _PyObject_GetAttrId used below. */
4058 Py_INCREF(reduce_func);
4059 }
4060 } else {
4061 reduce_func = PyObject_GetItem(self->dispatch_table,
4062 (PyObject *)type);
4063 if (reduce_func == NULL) {
4064 if (PyErr_ExceptionMatches(PyExc_KeyError))
4065 PyErr_Clear();
4066 else
4067 goto error;
4068 }
4069 }
4070 if (reduce_func != NULL) {
4071 Py_INCREF(obj);
4072 reduce_value = _Pickle_FastCall(reduce_func, obj);
4073 }
4074 else if (PyType_IsSubtype(type, &PyType_Type)) {
4075 status = save_global(self, obj, NULL);
4076 goto done;
4077 }
4078 else {
4079 _Py_IDENTIFIER(__reduce__);
4080 _Py_IDENTIFIER(__reduce_ex__);
4081
4082
4083 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4084 automatically defined as __reduce__. While this is convenient, this
4085 make it impossible to know which method was actually called. Of
4086 course, this is not a big deal. But still, it would be nice to let
4087 the user know which method was called when something go
4088 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4089 don't actually have to check for a __reduce__ method. */
4090
4091 /* Check for a __reduce_ex__ method. */
4092 if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4093 goto error;
4094 }
4095 if (reduce_func != NULL) {
4096 PyObject *proto;
4097 proto = PyLong_FromLong(self->proto);
4098 if (proto != NULL) {
4099 reduce_value = _Pickle_FastCall(reduce_func, proto);
4100 }
4101 }
4102 else {
4103 PickleState *st = _Pickle_GetGlobalState();
4104
4105 /* Check for a __reduce__ method. */
4106 reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce__);
4107 if (reduce_func != NULL) {
4108 reduce_value = _PyObject_CallNoArg(reduce_func);
4109 }
4110 else {
4111 PyErr_Format(st->PicklingError,
4112 "can't pickle '%.200s' object: %R",
4113 type->tp_name, obj);
4114 goto error;
4115 }
4116 }
4117 }
4118
4119 if (reduce_value == NULL)
4120 goto error;
4121
4122 if (PyUnicode_Check(reduce_value)) {
4123 status = save_global(self, obj, reduce_value);
4124 goto done;
4125 }
4126
4127 if (!PyTuple_Check(reduce_value)) {
4128 PickleState *st = _Pickle_GetGlobalState();
4129 PyErr_SetString(st->PicklingError,
4130 "__reduce__ must return a string or tuple");
4131 goto error;
4132 }
4133
4134 status = save_reduce(self, reduce_value, obj);
4135
4136 if (0) {
4137 error:
4138 status = -1;
4139 }
4140 done:
4141
4142 Py_LeaveRecursiveCall();
4143 Py_XDECREF(reduce_func);
4144 Py_XDECREF(reduce_value);
4145
4146 return status;
4147 }
4148
4149 static int
dump(PicklerObject * self,PyObject * obj)4150 dump(PicklerObject *self, PyObject *obj)
4151 {
4152 const char stop_op = STOP;
4153
4154 if (self->proto >= 2) {
4155 char header[2];
4156
4157 header[0] = PROTO;
4158 assert(self->proto >= 0 && self->proto < 256);
4159 header[1] = (unsigned char)self->proto;
4160 if (_Pickler_Write(self, header, 2) < 0)
4161 return -1;
4162 if (self->proto >= 4)
4163 self->framing = 1;
4164 }
4165
4166 if (save(self, obj, 0) < 0 ||
4167 _Pickler_Write(self, &stop_op, 1) < 0 ||
4168 _Pickler_CommitFrame(self) < 0)
4169 return -1;
4170 self->framing = 0;
4171 return 0;
4172 }
4173
4174 /*[clinic input]
4175
4176 _pickle.Pickler.clear_memo
4177
4178 Clears the pickler's "memo".
4179
4180 The memo is the data structure that remembers which objects the
4181 pickler has already seen, so that shared or recursive objects are
4182 pickled by reference and not by value. This method is useful when
4183 re-using picklers.
4184 [clinic start generated code]*/
4185
4186 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4187 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4188 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4189 {
4190 if (self->memo)
4191 PyMemoTable_Clear(self->memo);
4192
4193 Py_RETURN_NONE;
4194 }
4195
4196 /*[clinic input]
4197
4198 _pickle.Pickler.dump
4199
4200 obj: object
4201 /
4202
4203 Write a pickled representation of the given object to the open file.
4204 [clinic start generated code]*/
4205
4206 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4207 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4208 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4209 {
4210 /* Check whether the Pickler was initialized correctly (issue3664).
4211 Developers often forget to call __init__() in their subclasses, which
4212 would trigger a segfault without this check. */
4213 if (self->write == NULL) {
4214 PickleState *st = _Pickle_GetGlobalState();
4215 PyErr_Format(st->PicklingError,
4216 "Pickler.__init__() was not called by %s.__init__()",
4217 Py_TYPE(self)->tp_name);
4218 return NULL;
4219 }
4220
4221 if (_Pickler_ClearBuffer(self) < 0)
4222 return NULL;
4223
4224 if (dump(self, obj) < 0)
4225 return NULL;
4226
4227 if (_Pickler_FlushToFile(self) < 0)
4228 return NULL;
4229
4230 Py_RETURN_NONE;
4231 }
4232
4233 /*[clinic input]
4234
4235 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4236
4237 Returns size in memory, in bytes.
4238 [clinic start generated code]*/
4239
4240 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4241 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4242 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4243 {
4244 Py_ssize_t res, s;
4245
4246 res = _PyObject_SIZE(Py_TYPE(self));
4247 if (self->memo != NULL) {
4248 res += sizeof(PyMemoTable);
4249 res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4250 }
4251 if (self->output_buffer != NULL) {
4252 s = _PySys_GetSizeOf(self->output_buffer);
4253 if (s == -1)
4254 return -1;
4255 res += s;
4256 }
4257 return res;
4258 }
4259
4260 static struct PyMethodDef Pickler_methods[] = {
4261 _PICKLE_PICKLER_DUMP_METHODDEF
4262 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4263 _PICKLE_PICKLER___SIZEOF___METHODDEF
4264 {NULL, NULL} /* sentinel */
4265 };
4266
4267 static void
Pickler_dealloc(PicklerObject * self)4268 Pickler_dealloc(PicklerObject *self)
4269 {
4270 PyObject_GC_UnTrack(self);
4271
4272 Py_XDECREF(self->output_buffer);
4273 Py_XDECREF(self->write);
4274 Py_XDECREF(self->pers_func);
4275 Py_XDECREF(self->dispatch_table);
4276 Py_XDECREF(self->fast_memo);
4277
4278 PyMemoTable_Del(self->memo);
4279
4280 Py_TYPE(self)->tp_free((PyObject *)self);
4281 }
4282
4283 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4284 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4285 {
4286 Py_VISIT(self->write);
4287 Py_VISIT(self->pers_func);
4288 Py_VISIT(self->dispatch_table);
4289 Py_VISIT(self->fast_memo);
4290 return 0;
4291 }
4292
4293 static int
Pickler_clear(PicklerObject * self)4294 Pickler_clear(PicklerObject *self)
4295 {
4296 Py_CLEAR(self->output_buffer);
4297 Py_CLEAR(self->write);
4298 Py_CLEAR(self->pers_func);
4299 Py_CLEAR(self->dispatch_table);
4300 Py_CLEAR(self->fast_memo);
4301
4302 if (self->memo != NULL) {
4303 PyMemoTable *memo = self->memo;
4304 self->memo = NULL;
4305 PyMemoTable_Del(memo);
4306 }
4307 return 0;
4308 }
4309
4310
4311 /*[clinic input]
4312
4313 _pickle.Pickler.__init__
4314
4315 file: object
4316 protocol: object = NULL
4317 fix_imports: bool = True
4318
4319 This takes a binary file for writing a pickle data stream.
4320
4321 The optional *protocol* argument tells the pickler to use the given
4322 protocol; supported protocols are 0, 1, 2, 3 and 4. The default
4323 protocol is 3; a backward-incompatible protocol designed for Python 3.
4324
4325 Specifying a negative protocol version selects the highest protocol
4326 version supported. The higher the protocol used, the more recent the
4327 version of Python needed to read the pickle produced.
4328
4329 The *file* argument must have a write() method that accepts a single
4330 bytes argument. It can thus be a file object opened for binary
4331 writing, an io.BytesIO instance, or any other custom object that meets
4332 this interface.
4333
4334 If *fix_imports* is True and protocol is less than 3, pickle will try
4335 to map the new Python 3 names to the old module names used in Python
4336 2, so that the pickle data stream is readable with Python 2.
4337 [clinic start generated code]*/
4338
4339 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports)4340 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4341 PyObject *protocol, int fix_imports)
4342 /*[clinic end generated code: output=b5f31078dab17fb0 input=4faabdbc763c2389]*/
4343 {
4344 _Py_IDENTIFIER(persistent_id);
4345 _Py_IDENTIFIER(dispatch_table);
4346
4347 /* In case of multiple __init__() calls, clear previous content. */
4348 if (self->write != NULL)
4349 (void)Pickler_clear(self);
4350
4351 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4352 return -1;
4353
4354 if (_Pickler_SetOutputStream(self, file) < 0)
4355 return -1;
4356
4357 /* memo and output_buffer may have already been created in _Pickler_New */
4358 if (self->memo == NULL) {
4359 self->memo = PyMemoTable_New();
4360 if (self->memo == NULL)
4361 return -1;
4362 }
4363 self->output_len = 0;
4364 if (self->output_buffer == NULL) {
4365 self->max_output_len = WRITE_BUF_SIZE;
4366 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4367 self->max_output_len);
4368 if (self->output_buffer == NULL)
4369 return -1;
4370 }
4371
4372 self->fast = 0;
4373 self->fast_nesting = 0;
4374 self->fast_memo = NULL;
4375
4376 if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4377 &self->pers_func, &self->pers_func_self) < 0)
4378 {
4379 return -1;
4380 }
4381
4382 if (_PyObject_LookupAttrId((PyObject *)self,
4383 &PyId_dispatch_table, &self->dispatch_table) < 0) {
4384 return -1;
4385 }
4386
4387 return 0;
4388 }
4389
4390
4391 /* Define a proxy object for the Pickler's internal memo object. This is to
4392 * avoid breaking code like:
4393 * pickler.memo.clear()
4394 * and
4395 * pickler.memo = saved_memo
4396 * Is this a good idea? Not really, but we don't want to break code that uses
4397 * it. Note that we don't implement the entire mapping API here. This is
4398 * intentional, as these should be treated as black-box implementation details.
4399 */
4400
4401 /*[clinic input]
4402 _pickle.PicklerMemoProxy.clear
4403
4404 Remove all items from memo.
4405 [clinic start generated code]*/
4406
4407 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4408 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4409 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4410 {
4411 if (self->pickler->memo)
4412 PyMemoTable_Clear(self->pickler->memo);
4413 Py_RETURN_NONE;
4414 }
4415
4416 /*[clinic input]
4417 _pickle.PicklerMemoProxy.copy
4418
4419 Copy the memo to a new object.
4420 [clinic start generated code]*/
4421
4422 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4423 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4424 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4425 {
4426 PyMemoTable *memo;
4427 PyObject *new_memo = PyDict_New();
4428 if (new_memo == NULL)
4429 return NULL;
4430
4431 memo = self->pickler->memo;
4432 for (size_t i = 0; i < memo->mt_allocated; ++i) {
4433 PyMemoEntry entry = memo->mt_table[i];
4434 if (entry.me_key != NULL) {
4435 int status;
4436 PyObject *key, *value;
4437
4438 key = PyLong_FromVoidPtr(entry.me_key);
4439 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4440
4441 if (key == NULL || value == NULL) {
4442 Py_XDECREF(key);
4443 Py_XDECREF(value);
4444 goto error;
4445 }
4446 status = PyDict_SetItem(new_memo, key, value);
4447 Py_DECREF(key);
4448 Py_DECREF(value);
4449 if (status < 0)
4450 goto error;
4451 }
4452 }
4453 return new_memo;
4454
4455 error:
4456 Py_XDECREF(new_memo);
4457 return NULL;
4458 }
4459
4460 /*[clinic input]
4461 _pickle.PicklerMemoProxy.__reduce__
4462
4463 Implement pickle support.
4464 [clinic start generated code]*/
4465
4466 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4467 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4468 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4469 {
4470 PyObject *reduce_value, *dict_args;
4471 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4472 if (contents == NULL)
4473 return NULL;
4474
4475 reduce_value = PyTuple_New(2);
4476 if (reduce_value == NULL) {
4477 Py_DECREF(contents);
4478 return NULL;
4479 }
4480 dict_args = PyTuple_New(1);
4481 if (dict_args == NULL) {
4482 Py_DECREF(contents);
4483 Py_DECREF(reduce_value);
4484 return NULL;
4485 }
4486 PyTuple_SET_ITEM(dict_args, 0, contents);
4487 Py_INCREF((PyObject *)&PyDict_Type);
4488 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4489 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4490 return reduce_value;
4491 }
4492
4493 static PyMethodDef picklerproxy_methods[] = {
4494 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4495 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4496 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4497 {NULL, NULL} /* sentinel */
4498 };
4499
4500 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4501 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4502 {
4503 PyObject_GC_UnTrack(self);
4504 Py_XDECREF(self->pickler);
4505 PyObject_GC_Del((PyObject *)self);
4506 }
4507
4508 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4509 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4510 visitproc visit, void *arg)
4511 {
4512 Py_VISIT(self->pickler);
4513 return 0;
4514 }
4515
4516 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4517 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4518 {
4519 Py_CLEAR(self->pickler);
4520 return 0;
4521 }
4522
4523 static PyTypeObject PicklerMemoProxyType = {
4524 PyVarObject_HEAD_INIT(NULL, 0)
4525 "_pickle.PicklerMemoProxy", /*tp_name*/
4526 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4527 0,
4528 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
4529 0, /* tp_print */
4530 0, /* tp_getattr */
4531 0, /* tp_setattr */
4532 0, /* tp_compare */
4533 0, /* tp_repr */
4534 0, /* tp_as_number */
4535 0, /* tp_as_sequence */
4536 0, /* tp_as_mapping */
4537 PyObject_HashNotImplemented, /* tp_hash */
4538 0, /* tp_call */
4539 0, /* tp_str */
4540 PyObject_GenericGetAttr, /* tp_getattro */
4541 PyObject_GenericSetAttr, /* tp_setattro */
4542 0, /* tp_as_buffer */
4543 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4544 0, /* tp_doc */
4545 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4546 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4547 0, /* tp_richcompare */
4548 0, /* tp_weaklistoffset */
4549 0, /* tp_iter */
4550 0, /* tp_iternext */
4551 picklerproxy_methods, /* tp_methods */
4552 };
4553
4554 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4555 PicklerMemoProxy_New(PicklerObject *pickler)
4556 {
4557 PicklerMemoProxyObject *self;
4558
4559 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4560 if (self == NULL)
4561 return NULL;
4562 Py_INCREF(pickler);
4563 self->pickler = pickler;
4564 PyObject_GC_Track(self);
4565 return (PyObject *)self;
4566 }
4567
4568 /*****************************************************************************/
4569
4570 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4571 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4572 {
4573 return PicklerMemoProxy_New(self);
4574 }
4575
4576 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4577 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4578 {
4579 PyMemoTable *new_memo = NULL;
4580
4581 if (obj == NULL) {
4582 PyErr_SetString(PyExc_TypeError,
4583 "attribute deletion is not supported");
4584 return -1;
4585 }
4586
4587 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4588 PicklerObject *pickler =
4589 ((PicklerMemoProxyObject *)obj)->pickler;
4590
4591 new_memo = PyMemoTable_Copy(pickler->memo);
4592 if (new_memo == NULL)
4593 return -1;
4594 }
4595 else if (PyDict_Check(obj)) {
4596 Py_ssize_t i = 0;
4597 PyObject *key, *value;
4598
4599 new_memo = PyMemoTable_New();
4600 if (new_memo == NULL)
4601 return -1;
4602
4603 while (PyDict_Next(obj, &i, &key, &value)) {
4604 Py_ssize_t memo_id;
4605 PyObject *memo_obj;
4606
4607 if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4608 PyErr_SetString(PyExc_TypeError,
4609 "'memo' values must be 2-item tuples");
4610 goto error;
4611 }
4612 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
4613 if (memo_id == -1 && PyErr_Occurred())
4614 goto error;
4615 memo_obj = PyTuple_GET_ITEM(value, 1);
4616 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
4617 goto error;
4618 }
4619 }
4620 else {
4621 PyErr_Format(PyExc_TypeError,
4622 "'memo' attribute must be a PicklerMemoProxy object "
4623 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
4624 return -1;
4625 }
4626
4627 PyMemoTable_Del(self->memo);
4628 self->memo = new_memo;
4629
4630 return 0;
4631
4632 error:
4633 if (new_memo)
4634 PyMemoTable_Del(new_memo);
4635 return -1;
4636 }
4637
4638 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))4639 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
4640 {
4641 if (self->pers_func == NULL) {
4642 PyErr_SetString(PyExc_AttributeError, "persistent_id");
4643 return NULL;
4644 }
4645 return reconstruct_method(self->pers_func, self->pers_func_self);
4646 }
4647
4648 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))4649 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
4650 {
4651 if (value == NULL) {
4652 PyErr_SetString(PyExc_TypeError,
4653 "attribute deletion is not supported");
4654 return -1;
4655 }
4656 if (!PyCallable_Check(value)) {
4657 PyErr_SetString(PyExc_TypeError,
4658 "persistent_id must be a callable taking one argument");
4659 return -1;
4660 }
4661
4662 self->pers_func_self = NULL;
4663 Py_INCREF(value);
4664 Py_XSETREF(self->pers_func, value);
4665
4666 return 0;
4667 }
4668
4669 static PyMemberDef Pickler_members[] = {
4670 {"bin", T_INT, offsetof(PicklerObject, bin)},
4671 {"fast", T_INT, offsetof(PicklerObject, fast)},
4672 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
4673 {NULL}
4674 };
4675
4676 static PyGetSetDef Pickler_getsets[] = {
4677 {"memo", (getter)Pickler_get_memo,
4678 (setter)Pickler_set_memo},
4679 {"persistent_id", (getter)Pickler_get_persid,
4680 (setter)Pickler_set_persid},
4681 {NULL}
4682 };
4683
4684 static PyTypeObject Pickler_Type = {
4685 PyVarObject_HEAD_INIT(NULL, 0)
4686 "_pickle.Pickler" , /*tp_name*/
4687 sizeof(PicklerObject), /*tp_basicsize*/
4688 0, /*tp_itemsize*/
4689 (destructor)Pickler_dealloc, /*tp_dealloc*/
4690 0, /*tp_print*/
4691 0, /*tp_getattr*/
4692 0, /*tp_setattr*/
4693 0, /*tp_reserved*/
4694 0, /*tp_repr*/
4695 0, /*tp_as_number*/
4696 0, /*tp_as_sequence*/
4697 0, /*tp_as_mapping*/
4698 0, /*tp_hash*/
4699 0, /*tp_call*/
4700 0, /*tp_str*/
4701 0, /*tp_getattro*/
4702 0, /*tp_setattro*/
4703 0, /*tp_as_buffer*/
4704 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4705 _pickle_Pickler___init____doc__, /*tp_doc*/
4706 (traverseproc)Pickler_traverse, /*tp_traverse*/
4707 (inquiry)Pickler_clear, /*tp_clear*/
4708 0, /*tp_richcompare*/
4709 0, /*tp_weaklistoffset*/
4710 0, /*tp_iter*/
4711 0, /*tp_iternext*/
4712 Pickler_methods, /*tp_methods*/
4713 Pickler_members, /*tp_members*/
4714 Pickler_getsets, /*tp_getset*/
4715 0, /*tp_base*/
4716 0, /*tp_dict*/
4717 0, /*tp_descr_get*/
4718 0, /*tp_descr_set*/
4719 0, /*tp_dictoffset*/
4720 _pickle_Pickler___init__, /*tp_init*/
4721 PyType_GenericAlloc, /*tp_alloc*/
4722 PyType_GenericNew, /*tp_new*/
4723 PyObject_GC_Del, /*tp_free*/
4724 0, /*tp_is_gc*/
4725 };
4726
4727 /* Temporary helper for calling self.find_class().
4728
4729 XXX: It would be nice to able to avoid Python function call overhead, by
4730 using directly the C version of find_class(), when find_class() is not
4731 overridden by a subclass. Although, this could become rather hackish. A
4732 simpler optimization would be to call the C function when self is not a
4733 subclass instance. */
4734 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)4735 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
4736 {
4737 _Py_IDENTIFIER(find_class);
4738
4739 return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
4740 module_name, global_name, NULL);
4741 }
4742
4743 static Py_ssize_t
marker(UnpicklerObject * self)4744 marker(UnpicklerObject *self)
4745 {
4746 Py_ssize_t mark;
4747
4748 if (self->num_marks < 1) {
4749 PickleState *st = _Pickle_GetGlobalState();
4750 PyErr_SetString(st->UnpicklingError, "could not find MARK");
4751 return -1;
4752 }
4753
4754 mark = self->marks[--self->num_marks];
4755 self->stack->mark_set = self->num_marks != 0;
4756 self->stack->fence = self->num_marks ?
4757 self->marks[self->num_marks - 1] : 0;
4758 return mark;
4759 }
4760
4761 static int
load_none(UnpicklerObject * self)4762 load_none(UnpicklerObject *self)
4763 {
4764 PDATA_APPEND(self->stack, Py_None, -1);
4765 return 0;
4766 }
4767
4768 static int
load_int(UnpicklerObject * self)4769 load_int(UnpicklerObject *self)
4770 {
4771 PyObject *value;
4772 char *endptr, *s;
4773 Py_ssize_t len;
4774 long x;
4775
4776 if ((len = _Unpickler_Readline(self, &s)) < 0)
4777 return -1;
4778 if (len < 2)
4779 return bad_readline();
4780
4781 errno = 0;
4782 /* XXX: Should the base argument of strtol() be explicitly set to 10?
4783 XXX(avassalotti): Should this uses PyOS_strtol()? */
4784 x = strtol(s, &endptr, 0);
4785
4786 if (errno || (*endptr != '\n' && *endptr != '\0')) {
4787 /* Hm, maybe we've got something long. Let's try reading
4788 * it as a Python int object. */
4789 errno = 0;
4790 /* XXX: Same thing about the base here. */
4791 value = PyLong_FromString(s, NULL, 0);
4792 if (value == NULL) {
4793 PyErr_SetString(PyExc_ValueError,
4794 "could not convert string to int");
4795 return -1;
4796 }
4797 }
4798 else {
4799 if (len == 3 && (x == 0 || x == 1)) {
4800 if ((value = PyBool_FromLong(x)) == NULL)
4801 return -1;
4802 }
4803 else {
4804 if ((value = PyLong_FromLong(x)) == NULL)
4805 return -1;
4806 }
4807 }
4808
4809 PDATA_PUSH(self->stack, value, -1);
4810 return 0;
4811 }
4812
4813 static int
load_bool(UnpicklerObject * self,PyObject * boolean)4814 load_bool(UnpicklerObject *self, PyObject *boolean)
4815 {
4816 assert(boolean == Py_True || boolean == Py_False);
4817 PDATA_APPEND(self->stack, boolean, -1);
4818 return 0;
4819 }
4820
4821 /* s contains x bytes of an unsigned little-endian integer. Return its value
4822 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
4823 */
4824 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)4825 calc_binsize(char *bytes, int nbytes)
4826 {
4827 unsigned char *s = (unsigned char *)bytes;
4828 int i;
4829 size_t x = 0;
4830
4831 if (nbytes > (int)sizeof(size_t)) {
4832 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes
4833 * have 64-bit size that can't be represented on 32-bit platform.
4834 */
4835 for (i = (int)sizeof(size_t); i < nbytes; i++) {
4836 if (s[i])
4837 return -1;
4838 }
4839 nbytes = (int)sizeof(size_t);
4840 }
4841 for (i = 0; i < nbytes; i++) {
4842 x |= (size_t) s[i] << (8 * i);
4843 }
4844
4845 if (x > PY_SSIZE_T_MAX)
4846 return -1;
4847 else
4848 return (Py_ssize_t) x;
4849 }
4850
4851 /* s contains x bytes of a little-endian integer. Return its value as a
4852 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
4853 * int, but when x is 4 it's a signed one. This is a historical source
4854 * of x-platform bugs.
4855 */
4856 static long
calc_binint(char * bytes,int nbytes)4857 calc_binint(char *bytes, int nbytes)
4858 {
4859 unsigned char *s = (unsigned char *)bytes;
4860 Py_ssize_t i;
4861 long x = 0;
4862
4863 for (i = 0; i < nbytes; i++) {
4864 x |= (long)s[i] << (8 * i);
4865 }
4866
4867 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4868 * is signed, so on a box with longs bigger than 4 bytes we need
4869 * to extend a BININT's sign bit to the full width.
4870 */
4871 if (SIZEOF_LONG > 4 && nbytes == 4) {
4872 x |= -(x & (1L << 31));
4873 }
4874
4875 return x;
4876 }
4877
4878 static int
load_binintx(UnpicklerObject * self,char * s,int size)4879 load_binintx(UnpicklerObject *self, char *s, int size)
4880 {
4881 PyObject *value;
4882 long x;
4883
4884 x = calc_binint(s, size);
4885
4886 if ((value = PyLong_FromLong(x)) == NULL)
4887 return -1;
4888
4889 PDATA_PUSH(self->stack, value, -1);
4890 return 0;
4891 }
4892
4893 static int
load_binint(UnpicklerObject * self)4894 load_binint(UnpicklerObject *self)
4895 {
4896 char *s;
4897
4898 if (_Unpickler_Read(self, &s, 4) < 0)
4899 return -1;
4900
4901 return load_binintx(self, s, 4);
4902 }
4903
4904 static int
load_binint1(UnpicklerObject * self)4905 load_binint1(UnpicklerObject *self)
4906 {
4907 char *s;
4908
4909 if (_Unpickler_Read(self, &s, 1) < 0)
4910 return -1;
4911
4912 return load_binintx(self, s, 1);
4913 }
4914
4915 static int
load_binint2(UnpicklerObject * self)4916 load_binint2(UnpicklerObject *self)
4917 {
4918 char *s;
4919
4920 if (_Unpickler_Read(self, &s, 2) < 0)
4921 return -1;
4922
4923 return load_binintx(self, s, 2);
4924 }
4925
4926 static int
load_long(UnpicklerObject * self)4927 load_long(UnpicklerObject *self)
4928 {
4929 PyObject *value;
4930 char *s = NULL;
4931 Py_ssize_t len;
4932
4933 if ((len = _Unpickler_Readline(self, &s)) < 0)
4934 return -1;
4935 if (len < 2)
4936 return bad_readline();
4937
4938 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4939 the 'L' before calling PyLong_FromString. In order to maintain
4940 compatibility with Python 3.0.0, we don't actually *require*
4941 the 'L' to be present. */
4942 if (s[len-2] == 'L')
4943 s[len-2] = '\0';
4944 /* XXX: Should the base argument explicitly set to 10? */
4945 value = PyLong_FromString(s, NULL, 0);
4946 if (value == NULL)
4947 return -1;
4948
4949 PDATA_PUSH(self->stack, value, -1);
4950 return 0;
4951 }
4952
4953 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
4954 * data following.
4955 */
4956 static int
load_counted_long(UnpicklerObject * self,int size)4957 load_counted_long(UnpicklerObject *self, int size)
4958 {
4959 PyObject *value;
4960 char *nbytes;
4961 char *pdata;
4962
4963 assert(size == 1 || size == 4);
4964 if (_Unpickler_Read(self, &nbytes, size) < 0)
4965 return -1;
4966
4967 size = calc_binint(nbytes, size);
4968 if (size < 0) {
4969 PickleState *st = _Pickle_GetGlobalState();
4970 /* Corrupt or hostile pickle -- we never write one like this */
4971 PyErr_SetString(st->UnpicklingError,
4972 "LONG pickle has negative byte count");
4973 return -1;
4974 }
4975
4976 if (size == 0)
4977 value = PyLong_FromLong(0L);
4978 else {
4979 /* Read the raw little-endian bytes and convert. */
4980 if (_Unpickler_Read(self, &pdata, size) < 0)
4981 return -1;
4982 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4983 1 /* little endian */ , 1 /* signed */ );
4984 }
4985 if (value == NULL)
4986 return -1;
4987 PDATA_PUSH(self->stack, value, -1);
4988 return 0;
4989 }
4990
4991 static int
load_float(UnpicklerObject * self)4992 load_float(UnpicklerObject *self)
4993 {
4994 PyObject *value;
4995 char *endptr, *s;
4996 Py_ssize_t len;
4997 double d;
4998
4999 if ((len = _Unpickler_Readline(self, &s)) < 0)
5000 return -1;
5001 if (len < 2)
5002 return bad_readline();
5003
5004 errno = 0;
5005 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5006 if (d == -1.0 && PyErr_Occurred())
5007 return -1;
5008 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5009 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5010 return -1;
5011 }
5012 value = PyFloat_FromDouble(d);
5013 if (value == NULL)
5014 return -1;
5015
5016 PDATA_PUSH(self->stack, value, -1);
5017 return 0;
5018 }
5019
5020 static int
load_binfloat(UnpicklerObject * self)5021 load_binfloat(UnpicklerObject *self)
5022 {
5023 PyObject *value;
5024 double x;
5025 char *s;
5026
5027 if (_Unpickler_Read(self, &s, 8) < 0)
5028 return -1;
5029
5030 x = _PyFloat_Unpack8((unsigned char *)s, 0);
5031 if (x == -1.0 && PyErr_Occurred())
5032 return -1;
5033
5034 if ((value = PyFloat_FromDouble(x)) == NULL)
5035 return -1;
5036
5037 PDATA_PUSH(self->stack, value, -1);
5038 return 0;
5039 }
5040
5041 static int
load_string(UnpicklerObject * self)5042 load_string(UnpicklerObject *self)
5043 {
5044 PyObject *bytes;
5045 PyObject *obj;
5046 Py_ssize_t len;
5047 char *s, *p;
5048
5049 if ((len = _Unpickler_Readline(self, &s)) < 0)
5050 return -1;
5051 /* Strip the newline */
5052 len--;
5053 /* Strip outermost quotes */
5054 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5055 p = s + 1;
5056 len -= 2;
5057 }
5058 else {
5059 PickleState *st = _Pickle_GetGlobalState();
5060 PyErr_SetString(st->UnpicklingError,
5061 "the STRING opcode argument must be quoted");
5062 return -1;
5063 }
5064 assert(len >= 0);
5065
5066 /* Use the PyBytes API to decode the string, since that is what is used
5067 to encode, and then coerce the result to Unicode. */
5068 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5069 if (bytes == NULL)
5070 return -1;
5071
5072 /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5073 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5074 if (strcmp(self->encoding, "bytes") == 0) {
5075 obj = bytes;
5076 }
5077 else {
5078 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5079 Py_DECREF(bytes);
5080 if (obj == NULL) {
5081 return -1;
5082 }
5083 }
5084
5085 PDATA_PUSH(self->stack, obj, -1);
5086 return 0;
5087 }
5088
5089 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5090 load_counted_binstring(UnpicklerObject *self, int nbytes)
5091 {
5092 PyObject *obj;
5093 Py_ssize_t size;
5094 char *s;
5095
5096 if (_Unpickler_Read(self, &s, nbytes) < 0)
5097 return -1;
5098
5099 size = calc_binsize(s, nbytes);
5100 if (size < 0) {
5101 PickleState *st = _Pickle_GetGlobalState();
5102 PyErr_Format(st->UnpicklingError,
5103 "BINSTRING exceeds system's maximum size of %zd bytes",
5104 PY_SSIZE_T_MAX);
5105 return -1;
5106 }
5107
5108 if (_Unpickler_Read(self, &s, size) < 0)
5109 return -1;
5110
5111 /* Convert Python 2.x strings to bytes if the *encoding* given to the
5112 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5113 if (strcmp(self->encoding, "bytes") == 0) {
5114 obj = PyBytes_FromStringAndSize(s, size);
5115 }
5116 else {
5117 obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5118 }
5119 if (obj == NULL) {
5120 return -1;
5121 }
5122
5123 PDATA_PUSH(self->stack, obj, -1);
5124 return 0;
5125 }
5126
5127 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5128 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5129 {
5130 PyObject *bytes;
5131 Py_ssize_t size;
5132 char *s;
5133
5134 if (_Unpickler_Read(self, &s, nbytes) < 0)
5135 return -1;
5136
5137 size = calc_binsize(s, nbytes);
5138 if (size < 0) {
5139 PyErr_Format(PyExc_OverflowError,
5140 "BINBYTES exceeds system's maximum size of %zd bytes",
5141 PY_SSIZE_T_MAX);
5142 return -1;
5143 }
5144
5145 if (_Unpickler_Read(self, &s, size) < 0)
5146 return -1;
5147
5148 bytes = PyBytes_FromStringAndSize(s, size);
5149 if (bytes == NULL)
5150 return -1;
5151
5152 PDATA_PUSH(self->stack, bytes, -1);
5153 return 0;
5154 }
5155
5156 static int
load_unicode(UnpicklerObject * self)5157 load_unicode(UnpicklerObject *self)
5158 {
5159 PyObject *str;
5160 Py_ssize_t len;
5161 char *s = NULL;
5162
5163 if ((len = _Unpickler_Readline(self, &s)) < 0)
5164 return -1;
5165 if (len < 1)
5166 return bad_readline();
5167
5168 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5169 if (str == NULL)
5170 return -1;
5171
5172 PDATA_PUSH(self->stack, str, -1);
5173 return 0;
5174 }
5175
5176 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5177 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5178 {
5179 PyObject *str;
5180 Py_ssize_t size;
5181 char *s;
5182
5183 if (_Unpickler_Read(self, &s, nbytes) < 0)
5184 return -1;
5185
5186 size = calc_binsize(s, nbytes);
5187 if (size < 0) {
5188 PyErr_Format(PyExc_OverflowError,
5189 "BINUNICODE exceeds system's maximum size of %zd bytes",
5190 PY_SSIZE_T_MAX);
5191 return -1;
5192 }
5193
5194 if (_Unpickler_Read(self, &s, size) < 0)
5195 return -1;
5196
5197 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5198 if (str == NULL)
5199 return -1;
5200
5201 PDATA_PUSH(self->stack, str, -1);
5202 return 0;
5203 }
5204
5205 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5206 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5207 {
5208 PyObject *tuple;
5209
5210 if (Py_SIZE(self->stack) < len)
5211 return Pdata_stack_underflow(self->stack);
5212
5213 tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5214 if (tuple == NULL)
5215 return -1;
5216 PDATA_PUSH(self->stack, tuple, -1);
5217 return 0;
5218 }
5219
5220 static int
load_tuple(UnpicklerObject * self)5221 load_tuple(UnpicklerObject *self)
5222 {
5223 Py_ssize_t i;
5224
5225 if ((i = marker(self)) < 0)
5226 return -1;
5227
5228 return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5229 }
5230
5231 static int
load_empty_list(UnpicklerObject * self)5232 load_empty_list(UnpicklerObject *self)
5233 {
5234 PyObject *list;
5235
5236 if ((list = PyList_New(0)) == NULL)
5237 return -1;
5238 PDATA_PUSH(self->stack, list, -1);
5239 return 0;
5240 }
5241
5242 static int
load_empty_dict(UnpicklerObject * self)5243 load_empty_dict(UnpicklerObject *self)
5244 {
5245 PyObject *dict;
5246
5247 if ((dict = PyDict_New()) == NULL)
5248 return -1;
5249 PDATA_PUSH(self->stack, dict, -1);
5250 return 0;
5251 }
5252
5253 static int
load_empty_set(UnpicklerObject * self)5254 load_empty_set(UnpicklerObject *self)
5255 {
5256 PyObject *set;
5257
5258 if ((set = PySet_New(NULL)) == NULL)
5259 return -1;
5260 PDATA_PUSH(self->stack, set, -1);
5261 return 0;
5262 }
5263
5264 static int
load_list(UnpicklerObject * self)5265 load_list(UnpicklerObject *self)
5266 {
5267 PyObject *list;
5268 Py_ssize_t i;
5269
5270 if ((i = marker(self)) < 0)
5271 return -1;
5272
5273 list = Pdata_poplist(self->stack, i);
5274 if (list == NULL)
5275 return -1;
5276 PDATA_PUSH(self->stack, list, -1);
5277 return 0;
5278 }
5279
5280 static int
load_dict(UnpicklerObject * self)5281 load_dict(UnpicklerObject *self)
5282 {
5283 PyObject *dict, *key, *value;
5284 Py_ssize_t i, j, k;
5285
5286 if ((i = marker(self)) < 0)
5287 return -1;
5288 j = Py_SIZE(self->stack);
5289
5290 if ((dict = PyDict_New()) == NULL)
5291 return -1;
5292
5293 if ((j - i) % 2 != 0) {
5294 PickleState *st = _Pickle_GetGlobalState();
5295 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5296 Py_DECREF(dict);
5297 return -1;
5298 }
5299
5300 for (k = i + 1; k < j; k += 2) {
5301 key = self->stack->data[k - 1];
5302 value = self->stack->data[k];
5303 if (PyDict_SetItem(dict, key, value) < 0) {
5304 Py_DECREF(dict);
5305 return -1;
5306 }
5307 }
5308 Pdata_clear(self->stack, i);
5309 PDATA_PUSH(self->stack, dict, -1);
5310 return 0;
5311 }
5312
5313 static int
load_frozenset(UnpicklerObject * self)5314 load_frozenset(UnpicklerObject *self)
5315 {
5316 PyObject *items;
5317 PyObject *frozenset;
5318 Py_ssize_t i;
5319
5320 if ((i = marker(self)) < 0)
5321 return -1;
5322
5323 items = Pdata_poptuple(self->stack, i);
5324 if (items == NULL)
5325 return -1;
5326
5327 frozenset = PyFrozenSet_New(items);
5328 Py_DECREF(items);
5329 if (frozenset == NULL)
5330 return -1;
5331
5332 PDATA_PUSH(self->stack, frozenset, -1);
5333 return 0;
5334 }
5335
5336 static PyObject *
instantiate(PyObject * cls,PyObject * args)5337 instantiate(PyObject *cls, PyObject *args)
5338 {
5339 /* Caller must assure args are a tuple. Normally, args come from
5340 Pdata_poptuple which packs objects from the top of the stack
5341 into a newly created tuple. */
5342 assert(PyTuple_Check(args));
5343 if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5344 _Py_IDENTIFIER(__getinitargs__);
5345 _Py_IDENTIFIER(__new__);
5346 PyObject *func;
5347 if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5348 return NULL;
5349 }
5350 if (func == NULL) {
5351 return _PyObject_CallMethodIdObjArgs(cls, &PyId___new__, cls, NULL);
5352 }
5353 Py_DECREF(func);
5354 }
5355 return PyObject_CallObject(cls, args);
5356 }
5357
5358 static int
load_obj(UnpicklerObject * self)5359 load_obj(UnpicklerObject *self)
5360 {
5361 PyObject *cls, *args, *obj = NULL;
5362 Py_ssize_t i;
5363
5364 if ((i = marker(self)) < 0)
5365 return -1;
5366
5367 if (Py_SIZE(self->stack) - i < 1)
5368 return Pdata_stack_underflow(self->stack);
5369
5370 args = Pdata_poptuple(self->stack, i + 1);
5371 if (args == NULL)
5372 return -1;
5373
5374 PDATA_POP(self->stack, cls);
5375 if (cls) {
5376 obj = instantiate(cls, args);
5377 Py_DECREF(cls);
5378 }
5379 Py_DECREF(args);
5380 if (obj == NULL)
5381 return -1;
5382
5383 PDATA_PUSH(self->stack, obj, -1);
5384 return 0;
5385 }
5386
5387 static int
load_inst(UnpicklerObject * self)5388 load_inst(UnpicklerObject *self)
5389 {
5390 PyObject *cls = NULL;
5391 PyObject *args = NULL;
5392 PyObject *obj = NULL;
5393 PyObject *module_name;
5394 PyObject *class_name;
5395 Py_ssize_t len;
5396 Py_ssize_t i;
5397 char *s;
5398
5399 if ((i = marker(self)) < 0)
5400 return -1;
5401 if ((len = _Unpickler_Readline(self, &s)) < 0)
5402 return -1;
5403 if (len < 2)
5404 return bad_readline();
5405
5406 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5407 identifiers are permitted in Python 3.0, since the INST opcode is only
5408 supported by older protocols on Python 2.x. */
5409 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5410 if (module_name == NULL)
5411 return -1;
5412
5413 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5414 if (len < 2) {
5415 Py_DECREF(module_name);
5416 return bad_readline();
5417 }
5418 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5419 if (class_name != NULL) {
5420 cls = find_class(self, module_name, class_name);
5421 Py_DECREF(class_name);
5422 }
5423 }
5424 Py_DECREF(module_name);
5425
5426 if (cls == NULL)
5427 return -1;
5428
5429 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5430 obj = instantiate(cls, args);
5431 Py_DECREF(args);
5432 }
5433 Py_DECREF(cls);
5434
5435 if (obj == NULL)
5436 return -1;
5437
5438 PDATA_PUSH(self->stack, obj, -1);
5439 return 0;
5440 }
5441
5442 static int
load_newobj(UnpicklerObject * self)5443 load_newobj(UnpicklerObject *self)
5444 {
5445 PyObject *args = NULL;
5446 PyObject *clsraw = NULL;
5447 PyTypeObject *cls; /* clsraw cast to its true type */
5448 PyObject *obj;
5449 PickleState *st = _Pickle_GetGlobalState();
5450
5451 /* Stack is ... cls argtuple, and we want to call
5452 * cls.__new__(cls, *argtuple).
5453 */
5454 PDATA_POP(self->stack, args);
5455 if (args == NULL)
5456 goto error;
5457 if (!PyTuple_Check(args)) {
5458 PyErr_SetString(st->UnpicklingError,
5459 "NEWOBJ expected an arg " "tuple.");
5460 goto error;
5461 }
5462
5463 PDATA_POP(self->stack, clsraw);
5464 cls = (PyTypeObject *)clsraw;
5465 if (cls == NULL)
5466 goto error;
5467 if (!PyType_Check(cls)) {
5468 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5469 "isn't a type object");
5470 goto error;
5471 }
5472 if (cls->tp_new == NULL) {
5473 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5474 "has NULL tp_new");
5475 goto error;
5476 }
5477
5478 /* Call __new__. */
5479 obj = cls->tp_new(cls, args, NULL);
5480 if (obj == NULL)
5481 goto error;
5482
5483 Py_DECREF(args);
5484 Py_DECREF(clsraw);
5485 PDATA_PUSH(self->stack, obj, -1);
5486 return 0;
5487
5488 error:
5489 Py_XDECREF(args);
5490 Py_XDECREF(clsraw);
5491 return -1;
5492 }
5493
5494 static int
load_newobj_ex(UnpicklerObject * self)5495 load_newobj_ex(UnpicklerObject *self)
5496 {
5497 PyObject *cls, *args, *kwargs;
5498 PyObject *obj;
5499 PickleState *st = _Pickle_GetGlobalState();
5500
5501 PDATA_POP(self->stack, kwargs);
5502 if (kwargs == NULL) {
5503 return -1;
5504 }
5505 PDATA_POP(self->stack, args);
5506 if (args == NULL) {
5507 Py_DECREF(kwargs);
5508 return -1;
5509 }
5510 PDATA_POP(self->stack, cls);
5511 if (cls == NULL) {
5512 Py_DECREF(kwargs);
5513 Py_DECREF(args);
5514 return -1;
5515 }
5516
5517 if (!PyType_Check(cls)) {
5518 PyErr_Format(st->UnpicklingError,
5519 "NEWOBJ_EX class argument must be a type, not %.200s",
5520 Py_TYPE(cls)->tp_name);
5521 goto error;
5522 }
5523
5524 if (((PyTypeObject *)cls)->tp_new == NULL) {
5525 PyErr_SetString(st->UnpicklingError,
5526 "NEWOBJ_EX class argument doesn't have __new__");
5527 goto error;
5528 }
5529 if (!PyTuple_Check(args)) {
5530 PyErr_Format(st->UnpicklingError,
5531 "NEWOBJ_EX args argument must be a tuple, not %.200s",
5532 Py_TYPE(args)->tp_name);
5533 goto error;
5534 }
5535 if (!PyDict_Check(kwargs)) {
5536 PyErr_Format(st->UnpicklingError,
5537 "NEWOBJ_EX kwargs argument must be a dict, not %.200s",
5538 Py_TYPE(kwargs)->tp_name);
5539 goto error;
5540 }
5541
5542 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5543 Py_DECREF(kwargs);
5544 Py_DECREF(args);
5545 Py_DECREF(cls);
5546 if (obj == NULL) {
5547 return -1;
5548 }
5549 PDATA_PUSH(self->stack, obj, -1);
5550 return 0;
5551
5552 error:
5553 Py_DECREF(kwargs);
5554 Py_DECREF(args);
5555 Py_DECREF(cls);
5556 return -1;
5557 }
5558
5559 static int
load_global(UnpicklerObject * self)5560 load_global(UnpicklerObject *self)
5561 {
5562 PyObject *global = NULL;
5563 PyObject *module_name;
5564 PyObject *global_name;
5565 Py_ssize_t len;
5566 char *s;
5567
5568 if ((len = _Unpickler_Readline(self, &s)) < 0)
5569 return -1;
5570 if (len < 2)
5571 return bad_readline();
5572 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5573 if (!module_name)
5574 return -1;
5575
5576 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5577 if (len < 2) {
5578 Py_DECREF(module_name);
5579 return bad_readline();
5580 }
5581 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5582 if (global_name) {
5583 global = find_class(self, module_name, global_name);
5584 Py_DECREF(global_name);
5585 }
5586 }
5587 Py_DECREF(module_name);
5588
5589 if (global == NULL)
5590 return -1;
5591 PDATA_PUSH(self->stack, global, -1);
5592 return 0;
5593 }
5594
5595 static int
load_stack_global(UnpicklerObject * self)5596 load_stack_global(UnpicklerObject *self)
5597 {
5598 PyObject *global;
5599 PyObject *module_name;
5600 PyObject *global_name;
5601
5602 PDATA_POP(self->stack, global_name);
5603 PDATA_POP(self->stack, module_name);
5604 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
5605 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
5606 PickleState *st = _Pickle_GetGlobalState();
5607 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
5608 Py_XDECREF(global_name);
5609 Py_XDECREF(module_name);
5610 return -1;
5611 }
5612 global = find_class(self, module_name, global_name);
5613 Py_DECREF(global_name);
5614 Py_DECREF(module_name);
5615 if (global == NULL)
5616 return -1;
5617 PDATA_PUSH(self->stack, global, -1);
5618 return 0;
5619 }
5620
5621 static int
load_persid(UnpicklerObject * self)5622 load_persid(UnpicklerObject *self)
5623 {
5624 PyObject *pid, *obj;
5625 Py_ssize_t len;
5626 char *s;
5627
5628 if (self->pers_func) {
5629 if ((len = _Unpickler_Readline(self, &s)) < 0)
5630 return -1;
5631 if (len < 1)
5632 return bad_readline();
5633
5634 pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
5635 if (pid == NULL) {
5636 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
5637 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
5638 "persistent IDs in protocol 0 must be "
5639 "ASCII strings");
5640 }
5641 return -1;
5642 }
5643
5644 obj = call_method(self->pers_func, self->pers_func_self, pid);
5645 Py_DECREF(pid);
5646 if (obj == NULL)
5647 return -1;
5648
5649 PDATA_PUSH(self->stack, obj, -1);
5650 return 0;
5651 }
5652 else {
5653 PickleState *st = _Pickle_GetGlobalState();
5654 PyErr_SetString(st->UnpicklingError,
5655 "A load persistent id instruction was encountered,\n"
5656 "but no persistent_load function was specified.");
5657 return -1;
5658 }
5659 }
5660
5661 static int
load_binpersid(UnpicklerObject * self)5662 load_binpersid(UnpicklerObject *self)
5663 {
5664 PyObject *pid, *obj;
5665
5666 if (self->pers_func) {
5667 PDATA_POP(self->stack, pid);
5668 if (pid == NULL)
5669 return -1;
5670
5671 obj = call_method(self->pers_func, self->pers_func_self, pid);
5672 Py_DECREF(pid);
5673 if (obj == NULL)
5674 return -1;
5675
5676 PDATA_PUSH(self->stack, obj, -1);
5677 return 0;
5678 }
5679 else {
5680 PickleState *st = _Pickle_GetGlobalState();
5681 PyErr_SetString(st->UnpicklingError,
5682 "A load persistent id instruction was encountered,\n"
5683 "but no persistent_load function was specified.");
5684 return -1;
5685 }
5686 }
5687
5688 static int
load_pop(UnpicklerObject * self)5689 load_pop(UnpicklerObject *self)
5690 {
5691 Py_ssize_t len = Py_SIZE(self->stack);
5692
5693 /* Note that we split the (pickle.py) stack into two stacks,
5694 * an object stack and a mark stack. We have to be clever and
5695 * pop the right one. We do this by looking at the top of the
5696 * mark stack first, and only signalling a stack underflow if
5697 * the object stack is empty and the mark stack doesn't match
5698 * our expectations.
5699 */
5700 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
5701 self->num_marks--;
5702 self->stack->mark_set = self->num_marks != 0;
5703 self->stack->fence = self->num_marks ?
5704 self->marks[self->num_marks - 1] : 0;
5705 } else if (len <= self->stack->fence)
5706 return Pdata_stack_underflow(self->stack);
5707 else {
5708 len--;
5709 Py_DECREF(self->stack->data[len]);
5710 Py_SIZE(self->stack) = len;
5711 }
5712 return 0;
5713 }
5714
5715 static int
load_pop_mark(UnpicklerObject * self)5716 load_pop_mark(UnpicklerObject *self)
5717 {
5718 Py_ssize_t i;
5719
5720 if ((i = marker(self)) < 0)
5721 return -1;
5722
5723 Pdata_clear(self->stack, i);
5724
5725 return 0;
5726 }
5727
5728 static int
load_dup(UnpicklerObject * self)5729 load_dup(UnpicklerObject *self)
5730 {
5731 PyObject *last;
5732 Py_ssize_t len = Py_SIZE(self->stack);
5733
5734 if (len <= self->stack->fence)
5735 return Pdata_stack_underflow(self->stack);
5736 last = self->stack->data[len - 1];
5737 PDATA_APPEND(self->stack, last, -1);
5738 return 0;
5739 }
5740
5741 static int
load_get(UnpicklerObject * self)5742 load_get(UnpicklerObject *self)
5743 {
5744 PyObject *key, *value;
5745 Py_ssize_t idx;
5746 Py_ssize_t len;
5747 char *s;
5748
5749 if ((len = _Unpickler_Readline(self, &s)) < 0)
5750 return -1;
5751 if (len < 2)
5752 return bad_readline();
5753
5754 key = PyLong_FromString(s, NULL, 10);
5755 if (key == NULL)
5756 return -1;
5757 idx = PyLong_AsSsize_t(key);
5758 if (idx == -1 && PyErr_Occurred()) {
5759 Py_DECREF(key);
5760 return -1;
5761 }
5762
5763 value = _Unpickler_MemoGet(self, idx);
5764 if (value == NULL) {
5765 if (!PyErr_Occurred())
5766 PyErr_SetObject(PyExc_KeyError, key);
5767 Py_DECREF(key);
5768 return -1;
5769 }
5770 Py_DECREF(key);
5771
5772 PDATA_APPEND(self->stack, value, -1);
5773 return 0;
5774 }
5775
5776 static int
load_binget(UnpicklerObject * self)5777 load_binget(UnpicklerObject *self)
5778 {
5779 PyObject *value;
5780 Py_ssize_t idx;
5781 char *s;
5782
5783 if (_Unpickler_Read(self, &s, 1) < 0)
5784 return -1;
5785
5786 idx = Py_CHARMASK(s[0]);
5787
5788 value = _Unpickler_MemoGet(self, idx);
5789 if (value == NULL) {
5790 PyObject *key = PyLong_FromSsize_t(idx);
5791 if (key != NULL) {
5792 PyErr_SetObject(PyExc_KeyError, key);
5793 Py_DECREF(key);
5794 }
5795 return -1;
5796 }
5797
5798 PDATA_APPEND(self->stack, value, -1);
5799 return 0;
5800 }
5801
5802 static int
load_long_binget(UnpicklerObject * self)5803 load_long_binget(UnpicklerObject *self)
5804 {
5805 PyObject *value;
5806 Py_ssize_t idx;
5807 char *s;
5808
5809 if (_Unpickler_Read(self, &s, 4) < 0)
5810 return -1;
5811
5812 idx = calc_binsize(s, 4);
5813
5814 value = _Unpickler_MemoGet(self, idx);
5815 if (value == NULL) {
5816 PyObject *key = PyLong_FromSsize_t(idx);
5817 if (key != NULL) {
5818 PyErr_SetObject(PyExc_KeyError, key);
5819 Py_DECREF(key);
5820 }
5821 return -1;
5822 }
5823
5824 PDATA_APPEND(self->stack, value, -1);
5825 return 0;
5826 }
5827
5828 /* Push an object from the extension registry (EXT[124]). nbytes is
5829 * the number of bytes following the opcode, holding the index (code) value.
5830 */
5831 static int
load_extension(UnpicklerObject * self,int nbytes)5832 load_extension(UnpicklerObject *self, int nbytes)
5833 {
5834 char *codebytes; /* the nbytes bytes after the opcode */
5835 long code; /* calc_binint returns long */
5836 PyObject *py_code; /* code as a Python int */
5837 PyObject *obj; /* the object to push */
5838 PyObject *pair; /* (module_name, class_name) */
5839 PyObject *module_name, *class_name;
5840 PickleState *st = _Pickle_GetGlobalState();
5841
5842 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
5843 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
5844 return -1;
5845 code = calc_binint(codebytes, nbytes);
5846 if (code <= 0) { /* note that 0 is forbidden */
5847 /* Corrupt or hostile pickle. */
5848 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
5849 return -1;
5850 }
5851
5852 /* Look for the code in the cache. */
5853 py_code = PyLong_FromLong(code);
5854 if (py_code == NULL)
5855 return -1;
5856 obj = PyDict_GetItemWithError(st->extension_cache, py_code);
5857 if (obj != NULL) {
5858 /* Bingo. */
5859 Py_DECREF(py_code);
5860 PDATA_APPEND(self->stack, obj, -1);
5861 return 0;
5862 }
5863 if (PyErr_Occurred()) {
5864 Py_DECREF(py_code);
5865 return -1;
5866 }
5867
5868 /* Look up the (module_name, class_name) pair. */
5869 pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
5870 if (pair == NULL) {
5871 Py_DECREF(py_code);
5872 if (!PyErr_Occurred()) {
5873 PyErr_Format(PyExc_ValueError, "unregistered extension "
5874 "code %ld", code);
5875 }
5876 return -1;
5877 }
5878 /* Since the extension registry is manipulable via Python code,
5879 * confirm that pair is really a 2-tuple of strings.
5880 */
5881 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
5882 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
5883 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
5884 Py_DECREF(py_code);
5885 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
5886 "isn't a 2-tuple of strings", code);
5887 return -1;
5888 }
5889 /* Load the object. */
5890 obj = find_class(self, module_name, class_name);
5891 if (obj == NULL) {
5892 Py_DECREF(py_code);
5893 return -1;
5894 }
5895 /* Cache code -> obj. */
5896 code = PyDict_SetItem(st->extension_cache, py_code, obj);
5897 Py_DECREF(py_code);
5898 if (code < 0) {
5899 Py_DECREF(obj);
5900 return -1;
5901 }
5902 PDATA_PUSH(self->stack, obj, -1);
5903 return 0;
5904 }
5905
5906 static int
load_put(UnpicklerObject * self)5907 load_put(UnpicklerObject *self)
5908 {
5909 PyObject *key, *value;
5910 Py_ssize_t idx;
5911 Py_ssize_t len;
5912 char *s = NULL;
5913
5914 if ((len = _Unpickler_Readline(self, &s)) < 0)
5915 return -1;
5916 if (len < 2)
5917 return bad_readline();
5918 if (Py_SIZE(self->stack) <= self->stack->fence)
5919 return Pdata_stack_underflow(self->stack);
5920 value = self->stack->data[Py_SIZE(self->stack) - 1];
5921
5922 key = PyLong_FromString(s, NULL, 10);
5923 if (key == NULL)
5924 return -1;
5925 idx = PyLong_AsSsize_t(key);
5926 Py_DECREF(key);
5927 if (idx < 0) {
5928 if (!PyErr_Occurred())
5929 PyErr_SetString(PyExc_ValueError,
5930 "negative PUT argument");
5931 return -1;
5932 }
5933
5934 return _Unpickler_MemoPut(self, idx, value);
5935 }
5936
5937 static int
load_binput(UnpicklerObject * self)5938 load_binput(UnpicklerObject *self)
5939 {
5940 PyObject *value;
5941 Py_ssize_t idx;
5942 char *s;
5943
5944 if (_Unpickler_Read(self, &s, 1) < 0)
5945 return -1;
5946
5947 if (Py_SIZE(self->stack) <= self->stack->fence)
5948 return Pdata_stack_underflow(self->stack);
5949 value = self->stack->data[Py_SIZE(self->stack) - 1];
5950
5951 idx = Py_CHARMASK(s[0]);
5952
5953 return _Unpickler_MemoPut(self, idx, value);
5954 }
5955
5956 static int
load_long_binput(UnpicklerObject * self)5957 load_long_binput(UnpicklerObject *self)
5958 {
5959 PyObject *value;
5960 Py_ssize_t idx;
5961 char *s;
5962
5963 if (_Unpickler_Read(self, &s, 4) < 0)
5964 return -1;
5965
5966 if (Py_SIZE(self->stack) <= self->stack->fence)
5967 return Pdata_stack_underflow(self->stack);
5968 value = self->stack->data[Py_SIZE(self->stack) - 1];
5969
5970 idx = calc_binsize(s, 4);
5971 if (idx < 0) {
5972 PyErr_SetString(PyExc_ValueError,
5973 "negative LONG_BINPUT argument");
5974 return -1;
5975 }
5976
5977 return _Unpickler_MemoPut(self, idx, value);
5978 }
5979
5980 static int
load_memoize(UnpicklerObject * self)5981 load_memoize(UnpicklerObject *self)
5982 {
5983 PyObject *value;
5984
5985 if (Py_SIZE(self->stack) <= self->stack->fence)
5986 return Pdata_stack_underflow(self->stack);
5987 value = self->stack->data[Py_SIZE(self->stack) - 1];
5988
5989 return _Unpickler_MemoPut(self, self->memo_len, value);
5990 }
5991
5992 static int
do_append(UnpicklerObject * self,Py_ssize_t x)5993 do_append(UnpicklerObject *self, Py_ssize_t x)
5994 {
5995 PyObject *value;
5996 PyObject *slice;
5997 PyObject *list;
5998 PyObject *result;
5999 Py_ssize_t len, i;
6000
6001 len = Py_SIZE(self->stack);
6002 if (x > len || x <= self->stack->fence)
6003 return Pdata_stack_underflow(self->stack);
6004 if (len == x) /* nothing to do */
6005 return 0;
6006
6007 list = self->stack->data[x - 1];
6008
6009 if (PyList_CheckExact(list)) {
6010 Py_ssize_t list_len;
6011 int ret;
6012
6013 slice = Pdata_poplist(self->stack, x);
6014 if (!slice)
6015 return -1;
6016 list_len = PyList_GET_SIZE(list);
6017 ret = PyList_SetSlice(list, list_len, list_len, slice);
6018 Py_DECREF(slice);
6019 return ret;
6020 }
6021 else {
6022 PyObject *extend_func;
6023 _Py_IDENTIFIER(extend);
6024
6025 extend_func = _PyObject_GetAttrId(list, &PyId_extend);
6026 if (extend_func != NULL) {
6027 slice = Pdata_poplist(self->stack, x);
6028 if (!slice) {
6029 Py_DECREF(extend_func);
6030 return -1;
6031 }
6032 result = _Pickle_FastCall(extend_func, slice);
6033 Py_DECREF(extend_func);
6034 if (result == NULL)
6035 return -1;
6036 Py_DECREF(result);
6037 }
6038 else {
6039 PyObject *append_func;
6040 _Py_IDENTIFIER(append);
6041
6042 /* Even if the PEP 307 requires extend() and append() methods,
6043 fall back on append() if the object has no extend() method
6044 for backward compatibility. */
6045 PyErr_Clear();
6046 append_func = _PyObject_GetAttrId(list, &PyId_append);
6047 if (append_func == NULL)
6048 return -1;
6049 for (i = x; i < len; i++) {
6050 value = self->stack->data[i];
6051 result = _Pickle_FastCall(append_func, value);
6052 if (result == NULL) {
6053 Pdata_clear(self->stack, i + 1);
6054 Py_SIZE(self->stack) = x;
6055 Py_DECREF(append_func);
6056 return -1;
6057 }
6058 Py_DECREF(result);
6059 }
6060 Py_SIZE(self->stack) = x;
6061 Py_DECREF(append_func);
6062 }
6063 }
6064
6065 return 0;
6066 }
6067
6068 static int
load_append(UnpicklerObject * self)6069 load_append(UnpicklerObject *self)
6070 {
6071 if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6072 return Pdata_stack_underflow(self->stack);
6073 return do_append(self, Py_SIZE(self->stack) - 1);
6074 }
6075
6076 static int
load_appends(UnpicklerObject * self)6077 load_appends(UnpicklerObject *self)
6078 {
6079 Py_ssize_t i = marker(self);
6080 if (i < 0)
6081 return -1;
6082 return do_append(self, i);
6083 }
6084
6085 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6086 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6087 {
6088 PyObject *value, *key;
6089 PyObject *dict;
6090 Py_ssize_t len, i;
6091 int status = 0;
6092
6093 len = Py_SIZE(self->stack);
6094 if (x > len || x <= self->stack->fence)
6095 return Pdata_stack_underflow(self->stack);
6096 if (len == x) /* nothing to do */
6097 return 0;
6098 if ((len - x) % 2 != 0) {
6099 PickleState *st = _Pickle_GetGlobalState();
6100 /* Currupt or hostile pickle -- we never write one like this. */
6101 PyErr_SetString(st->UnpicklingError,
6102 "odd number of items for SETITEMS");
6103 return -1;
6104 }
6105
6106 /* Here, dict does not actually need to be a PyDict; it could be anything
6107 that supports the __setitem__ attribute. */
6108 dict = self->stack->data[x - 1];
6109
6110 for (i = x + 1; i < len; i += 2) {
6111 key = self->stack->data[i - 1];
6112 value = self->stack->data[i];
6113 if (PyObject_SetItem(dict, key, value) < 0) {
6114 status = -1;
6115 break;
6116 }
6117 }
6118
6119 Pdata_clear(self->stack, x);
6120 return status;
6121 }
6122
6123 static int
load_setitem(UnpicklerObject * self)6124 load_setitem(UnpicklerObject *self)
6125 {
6126 return do_setitems(self, Py_SIZE(self->stack) - 2);
6127 }
6128
6129 static int
load_setitems(UnpicklerObject * self)6130 load_setitems(UnpicklerObject *self)
6131 {
6132 Py_ssize_t i = marker(self);
6133 if (i < 0)
6134 return -1;
6135 return do_setitems(self, i);
6136 }
6137
6138 static int
load_additems(UnpicklerObject * self)6139 load_additems(UnpicklerObject *self)
6140 {
6141 PyObject *set;
6142 Py_ssize_t mark, len, i;
6143
6144 mark = marker(self);
6145 if (mark < 0)
6146 return -1;
6147 len = Py_SIZE(self->stack);
6148 if (mark > len || mark <= self->stack->fence)
6149 return Pdata_stack_underflow(self->stack);
6150 if (len == mark) /* nothing to do */
6151 return 0;
6152
6153 set = self->stack->data[mark - 1];
6154
6155 if (PySet_Check(set)) {
6156 PyObject *items;
6157 int status;
6158
6159 items = Pdata_poptuple(self->stack, mark);
6160 if (items == NULL)
6161 return -1;
6162
6163 status = _PySet_Update(set, items);
6164 Py_DECREF(items);
6165 return status;
6166 }
6167 else {
6168 PyObject *add_func;
6169 _Py_IDENTIFIER(add);
6170
6171 add_func = _PyObject_GetAttrId(set, &PyId_add);
6172 if (add_func == NULL)
6173 return -1;
6174 for (i = mark; i < len; i++) {
6175 PyObject *result;
6176 PyObject *item;
6177
6178 item = self->stack->data[i];
6179 result = _Pickle_FastCall(add_func, item);
6180 if (result == NULL) {
6181 Pdata_clear(self->stack, i + 1);
6182 Py_SIZE(self->stack) = mark;
6183 return -1;
6184 }
6185 Py_DECREF(result);
6186 }
6187 Py_SIZE(self->stack) = mark;
6188 }
6189
6190 return 0;
6191 }
6192
6193 static int
load_build(UnpicklerObject * self)6194 load_build(UnpicklerObject *self)
6195 {
6196 PyObject *state, *inst, *slotstate;
6197 PyObject *setstate;
6198 int status = 0;
6199 _Py_IDENTIFIER(__setstate__);
6200
6201 /* Stack is ... instance, state. We want to leave instance at
6202 * the stack top, possibly mutated via instance.__setstate__(state).
6203 */
6204 if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6205 return Pdata_stack_underflow(self->stack);
6206
6207 PDATA_POP(self->stack, state);
6208 if (state == NULL)
6209 return -1;
6210
6211 inst = self->stack->data[Py_SIZE(self->stack) - 1];
6212
6213 if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6214 Py_DECREF(state);
6215 return -1;
6216 }
6217 if (setstate != NULL) {
6218 PyObject *result;
6219
6220 /* The explicit __setstate__ is responsible for everything. */
6221 result = _Pickle_FastCall(setstate, state);
6222 Py_DECREF(setstate);
6223 if (result == NULL)
6224 return -1;
6225 Py_DECREF(result);
6226 return 0;
6227 }
6228
6229 /* A default __setstate__. First see whether state embeds a
6230 * slot state dict too (a proto 2 addition).
6231 */
6232 if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6233 PyObject *tmp = state;
6234
6235 state = PyTuple_GET_ITEM(tmp, 0);
6236 slotstate = PyTuple_GET_ITEM(tmp, 1);
6237 Py_INCREF(state);
6238 Py_INCREF(slotstate);
6239 Py_DECREF(tmp);
6240 }
6241 else
6242 slotstate = NULL;
6243
6244 /* Set inst.__dict__ from the state dict (if any). */
6245 if (state != Py_None) {
6246 PyObject *dict;
6247 PyObject *d_key, *d_value;
6248 Py_ssize_t i;
6249 _Py_IDENTIFIER(__dict__);
6250
6251 if (!PyDict_Check(state)) {
6252 PickleState *st = _Pickle_GetGlobalState();
6253 PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6254 goto error;
6255 }
6256 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
6257 if (dict == NULL)
6258 goto error;
6259
6260 i = 0;
6261 while (PyDict_Next(state, &i, &d_key, &d_value)) {
6262 /* normally the keys for instance attributes are
6263 interned. we should try to do that here. */
6264 Py_INCREF(d_key);
6265 if (PyUnicode_CheckExact(d_key))
6266 PyUnicode_InternInPlace(&d_key);
6267 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6268 Py_DECREF(d_key);
6269 goto error;
6270 }
6271 Py_DECREF(d_key);
6272 }
6273 Py_DECREF(dict);
6274 }
6275
6276 /* Also set instance attributes from the slotstate dict (if any). */
6277 if (slotstate != NULL) {
6278 PyObject *d_key, *d_value;
6279 Py_ssize_t i;
6280
6281 if (!PyDict_Check(slotstate)) {
6282 PickleState *st = _Pickle_GetGlobalState();
6283 PyErr_SetString(st->UnpicklingError,
6284 "slot state is not a dictionary");
6285 goto error;
6286 }
6287 i = 0;
6288 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6289 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6290 goto error;
6291 }
6292 }
6293
6294 if (0) {
6295 error:
6296 status = -1;
6297 }
6298
6299 Py_DECREF(state);
6300 Py_XDECREF(slotstate);
6301 return status;
6302 }
6303
6304 static int
load_mark(UnpicklerObject * self)6305 load_mark(UnpicklerObject *self)
6306 {
6307
6308 /* Note that we split the (pickle.py) stack into two stacks, an
6309 * object stack and a mark stack. Here we push a mark onto the
6310 * mark stack.
6311 */
6312
6313 if ((self->num_marks + 1) >= self->marks_size) {
6314 size_t alloc;
6315
6316 /* Use the size_t type to check for overflow. */
6317 alloc = ((size_t)self->num_marks << 1) + 20;
6318 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
6319 alloc <= ((size_t)self->num_marks + 1)) {
6320 PyErr_NoMemory();
6321 return -1;
6322 }
6323
6324 Py_ssize_t *marks_old = self->marks;
6325 PyMem_RESIZE(self->marks, Py_ssize_t, alloc);
6326 if (self->marks == NULL) {
6327 PyMem_FREE(marks_old);
6328 self->marks_size = 0;
6329 PyErr_NoMemory();
6330 return -1;
6331 }
6332 self->marks_size = (Py_ssize_t)alloc;
6333 }
6334
6335 self->stack->mark_set = 1;
6336 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6337
6338 return 0;
6339 }
6340
6341 static int
load_reduce(UnpicklerObject * self)6342 load_reduce(UnpicklerObject *self)
6343 {
6344 PyObject *callable = NULL;
6345 PyObject *argtup = NULL;
6346 PyObject *obj = NULL;
6347
6348 PDATA_POP(self->stack, argtup);
6349 if (argtup == NULL)
6350 return -1;
6351 PDATA_POP(self->stack, callable);
6352 if (callable) {
6353 obj = PyObject_CallObject(callable, argtup);
6354 Py_DECREF(callable);
6355 }
6356 Py_DECREF(argtup);
6357
6358 if (obj == NULL)
6359 return -1;
6360
6361 PDATA_PUSH(self->stack, obj, -1);
6362 return 0;
6363 }
6364
6365 /* Just raises an error if we don't know the protocol specified. PROTO
6366 * is the first opcode for protocols >= 2.
6367 */
6368 static int
load_proto(UnpicklerObject * self)6369 load_proto(UnpicklerObject *self)
6370 {
6371 char *s;
6372 int i;
6373
6374 if (_Unpickler_Read(self, &s, 1) < 0)
6375 return -1;
6376
6377 i = (unsigned char)s[0];
6378 if (i <= HIGHEST_PROTOCOL) {
6379 self->proto = i;
6380 return 0;
6381 }
6382
6383 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6384 return -1;
6385 }
6386
6387 static int
load_frame(UnpicklerObject * self)6388 load_frame(UnpicklerObject *self)
6389 {
6390 char *s;
6391 Py_ssize_t frame_len;
6392
6393 if (_Unpickler_Read(self, &s, 8) < 0)
6394 return -1;
6395
6396 frame_len = calc_binsize(s, 8);
6397 if (frame_len < 0) {
6398 PyErr_Format(PyExc_OverflowError,
6399 "FRAME length exceeds system's maximum of %zd bytes",
6400 PY_SSIZE_T_MAX);
6401 return -1;
6402 }
6403
6404 if (_Unpickler_Read(self, &s, frame_len) < 0)
6405 return -1;
6406
6407 /* Rewind to start of frame */
6408 self->next_read_idx -= frame_len;
6409 return 0;
6410 }
6411
6412 static PyObject *
load(UnpicklerObject * self)6413 load(UnpicklerObject *self)
6414 {
6415 PyObject *value = NULL;
6416 char *s = NULL;
6417
6418 self->num_marks = 0;
6419 self->stack->mark_set = 0;
6420 self->stack->fence = 0;
6421 self->proto = 0;
6422 if (Py_SIZE(self->stack))
6423 Pdata_clear(self->stack, 0);
6424
6425 /* Convenient macros for the dispatch while-switch loop just below. */
6426 #define OP(opcode, load_func) \
6427 case opcode: if (load_func(self) < 0) break; continue;
6428
6429 #define OP_ARG(opcode, load_func, arg) \
6430 case opcode: if (load_func(self, (arg)) < 0) break; continue;
6431
6432 while (1) {
6433 if (_Unpickler_Read(self, &s, 1) < 0) {
6434 PickleState *st = _Pickle_GetGlobalState();
6435 if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6436 PyErr_Format(PyExc_EOFError, "Ran out of input");
6437 }
6438 return NULL;
6439 }
6440
6441 switch ((enum opcode)s[0]) {
6442 OP(NONE, load_none)
6443 OP(BININT, load_binint)
6444 OP(BININT1, load_binint1)
6445 OP(BININT2, load_binint2)
6446 OP(INT, load_int)
6447 OP(LONG, load_long)
6448 OP_ARG(LONG1, load_counted_long, 1)
6449 OP_ARG(LONG4, load_counted_long, 4)
6450 OP(FLOAT, load_float)
6451 OP(BINFLOAT, load_binfloat)
6452 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6453 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6454 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6455 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6456 OP_ARG(BINSTRING, load_counted_binstring, 4)
6457 OP(STRING, load_string)
6458 OP(UNICODE, load_unicode)
6459 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6460 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6461 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6462 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6463 OP_ARG(TUPLE1, load_counted_tuple, 1)
6464 OP_ARG(TUPLE2, load_counted_tuple, 2)
6465 OP_ARG(TUPLE3, load_counted_tuple, 3)
6466 OP(TUPLE, load_tuple)
6467 OP(EMPTY_LIST, load_empty_list)
6468 OP(LIST, load_list)
6469 OP(EMPTY_DICT, load_empty_dict)
6470 OP(DICT, load_dict)
6471 OP(EMPTY_SET, load_empty_set)
6472 OP(ADDITEMS, load_additems)
6473 OP(FROZENSET, load_frozenset)
6474 OP(OBJ, load_obj)
6475 OP(INST, load_inst)
6476 OP(NEWOBJ, load_newobj)
6477 OP(NEWOBJ_EX, load_newobj_ex)
6478 OP(GLOBAL, load_global)
6479 OP(STACK_GLOBAL, load_stack_global)
6480 OP(APPEND, load_append)
6481 OP(APPENDS, load_appends)
6482 OP(BUILD, load_build)
6483 OP(DUP, load_dup)
6484 OP(BINGET, load_binget)
6485 OP(LONG_BINGET, load_long_binget)
6486 OP(GET, load_get)
6487 OP(MARK, load_mark)
6488 OP(BINPUT, load_binput)
6489 OP(LONG_BINPUT, load_long_binput)
6490 OP(PUT, load_put)
6491 OP(MEMOIZE, load_memoize)
6492 OP(POP, load_pop)
6493 OP(POP_MARK, load_pop_mark)
6494 OP(SETITEM, load_setitem)
6495 OP(SETITEMS, load_setitems)
6496 OP(PERSID, load_persid)
6497 OP(BINPERSID, load_binpersid)
6498 OP(REDUCE, load_reduce)
6499 OP(PROTO, load_proto)
6500 OP(FRAME, load_frame)
6501 OP_ARG(EXT1, load_extension, 1)
6502 OP_ARG(EXT2, load_extension, 2)
6503 OP_ARG(EXT4, load_extension, 4)
6504 OP_ARG(NEWTRUE, load_bool, Py_True)
6505 OP_ARG(NEWFALSE, load_bool, Py_False)
6506
6507 case STOP:
6508 break;
6509
6510 default:
6511 {
6512 PickleState *st = _Pickle_GetGlobalState();
6513 unsigned char c = (unsigned char) *s;
6514 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6515 PyErr_Format(st->UnpicklingError,
6516 "invalid load key, '%c'.", c);
6517 }
6518 else {
6519 PyErr_Format(st->UnpicklingError,
6520 "invalid load key, '\\x%02x'.", c);
6521 }
6522 return NULL;
6523 }
6524 }
6525
6526 break; /* and we are done! */
6527 }
6528
6529 if (PyErr_Occurred()) {
6530 return NULL;
6531 }
6532
6533 if (_Unpickler_SkipConsumed(self) < 0)
6534 return NULL;
6535
6536 PDATA_POP(self->stack, value);
6537 return value;
6538 }
6539
6540 /*[clinic input]
6541
6542 _pickle.Unpickler.load
6543
6544 Load a pickle.
6545
6546 Read a pickled object representation from the open file object given
6547 in the constructor, and return the reconstituted object hierarchy
6548 specified therein.
6549 [clinic start generated code]*/
6550
6551 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)6552 _pickle_Unpickler_load_impl(UnpicklerObject *self)
6553 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
6554 {
6555 UnpicklerObject *unpickler = (UnpicklerObject*)self;
6556
6557 /* Check whether the Unpickler was initialized correctly. This prevents
6558 segfaulting if a subclass overridden __init__ with a function that does
6559 not call Unpickler.__init__(). Here, we simply ensure that self->read
6560 is not NULL. */
6561 if (unpickler->read == NULL) {
6562 PickleState *st = _Pickle_GetGlobalState();
6563 PyErr_Format(st->UnpicklingError,
6564 "Unpickler.__init__() was not called by %s.__init__()",
6565 Py_TYPE(unpickler)->tp_name);
6566 return NULL;
6567 }
6568
6569 return load(unpickler);
6570 }
6571
6572 /* The name of find_class() is misleading. In newer pickle protocols, this
6573 function is used for loading any global (i.e., functions), not just
6574 classes. The name is kept only for backward compatibility. */
6575
6576 /*[clinic input]
6577
6578 _pickle.Unpickler.find_class
6579
6580 module_name: object
6581 global_name: object
6582 /
6583
6584 Return an object from a specified module.
6585
6586 If necessary, the module will be imported. Subclasses may override
6587 this method (e.g. to restrict unpickling of arbitrary classes and
6588 functions).
6589
6590 This method is called whenever a class or a function object is
6591 needed. Both arguments passed are str objects.
6592 [clinic start generated code]*/
6593
6594 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)6595 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
6596 PyObject *module_name,
6597 PyObject *global_name)
6598 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
6599 {
6600 PyObject *global;
6601 PyObject *module;
6602
6603 /* Try to map the old names used in Python 2.x to the new ones used in
6604 Python 3.x. We do this only with old pickle protocols and when the
6605 user has not disabled the feature. */
6606 if (self->proto < 3 && self->fix_imports) {
6607 PyObject *key;
6608 PyObject *item;
6609 PickleState *st = _Pickle_GetGlobalState();
6610
6611 /* Check if the global (i.e., a function or a class) was renamed
6612 or moved to another module. */
6613 key = PyTuple_Pack(2, module_name, global_name);
6614 if (key == NULL)
6615 return NULL;
6616 item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
6617 Py_DECREF(key);
6618 if (item) {
6619 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
6620 PyErr_Format(PyExc_RuntimeError,
6621 "_compat_pickle.NAME_MAPPING values should be "
6622 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
6623 return NULL;
6624 }
6625 module_name = PyTuple_GET_ITEM(item, 0);
6626 global_name = PyTuple_GET_ITEM(item, 1);
6627 if (!PyUnicode_Check(module_name) ||
6628 !PyUnicode_Check(global_name)) {
6629 PyErr_Format(PyExc_RuntimeError,
6630 "_compat_pickle.NAME_MAPPING values should be "
6631 "pairs of str, not (%.200s, %.200s)",
6632 Py_TYPE(module_name)->tp_name,
6633 Py_TYPE(global_name)->tp_name);
6634 return NULL;
6635 }
6636 }
6637 else if (PyErr_Occurred()) {
6638 return NULL;
6639 }
6640 else {
6641 /* Check if the module was renamed. */
6642 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
6643 if (item) {
6644 if (!PyUnicode_Check(item)) {
6645 PyErr_Format(PyExc_RuntimeError,
6646 "_compat_pickle.IMPORT_MAPPING values should be "
6647 "strings, not %.200s", Py_TYPE(item)->tp_name);
6648 return NULL;
6649 }
6650 module_name = item;
6651 }
6652 else if (PyErr_Occurred()) {
6653 return NULL;
6654 }
6655 }
6656 }
6657
6658 /*
6659 * we don't use PyImport_GetModule here, because it can return partially-
6660 * initialised modules, which then cause the getattribute to fail.
6661 */
6662 module = PyImport_Import(module_name);
6663 if (module == NULL) {
6664 return NULL;
6665 }
6666 global = getattribute(module, global_name, self->proto >= 4);
6667 Py_DECREF(module);
6668 return global;
6669 }
6670
6671 /*[clinic input]
6672
6673 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
6674
6675 Returns size in memory, in bytes.
6676 [clinic start generated code]*/
6677
6678 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)6679 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
6680 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
6681 {
6682 Py_ssize_t res;
6683
6684 res = _PyObject_SIZE(Py_TYPE(self));
6685 if (self->memo != NULL)
6686 res += self->memo_size * sizeof(PyObject *);
6687 if (self->marks != NULL)
6688 res += self->marks_size * sizeof(Py_ssize_t);
6689 if (self->input_line != NULL)
6690 res += strlen(self->input_line) + 1;
6691 if (self->encoding != NULL)
6692 res += strlen(self->encoding) + 1;
6693 if (self->errors != NULL)
6694 res += strlen(self->errors) + 1;
6695 return res;
6696 }
6697
6698 static struct PyMethodDef Unpickler_methods[] = {
6699 _PICKLE_UNPICKLER_LOAD_METHODDEF
6700 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
6701 _PICKLE_UNPICKLER___SIZEOF___METHODDEF
6702 {NULL, NULL} /* sentinel */
6703 };
6704
6705 static void
Unpickler_dealloc(UnpicklerObject * self)6706 Unpickler_dealloc(UnpicklerObject *self)
6707 {
6708 PyObject_GC_UnTrack((PyObject *)self);
6709 Py_XDECREF(self->readline);
6710 Py_XDECREF(self->read);
6711 Py_XDECREF(self->peek);
6712 Py_XDECREF(self->stack);
6713 Py_XDECREF(self->pers_func);
6714 if (self->buffer.buf != NULL) {
6715 PyBuffer_Release(&self->buffer);
6716 self->buffer.buf = NULL;
6717 }
6718
6719 _Unpickler_MemoCleanup(self);
6720 PyMem_Free(self->marks);
6721 PyMem_Free(self->input_line);
6722 PyMem_Free(self->encoding);
6723 PyMem_Free(self->errors);
6724
6725 Py_TYPE(self)->tp_free((PyObject *)self);
6726 }
6727
6728 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)6729 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
6730 {
6731 Py_VISIT(self->readline);
6732 Py_VISIT(self->read);
6733 Py_VISIT(self->peek);
6734 Py_VISIT(self->stack);
6735 Py_VISIT(self->pers_func);
6736 return 0;
6737 }
6738
6739 static int
Unpickler_clear(UnpicklerObject * self)6740 Unpickler_clear(UnpicklerObject *self)
6741 {
6742 Py_CLEAR(self->readline);
6743 Py_CLEAR(self->read);
6744 Py_CLEAR(self->peek);
6745 Py_CLEAR(self->stack);
6746 Py_CLEAR(self->pers_func);
6747 if (self->buffer.buf != NULL) {
6748 PyBuffer_Release(&self->buffer);
6749 self->buffer.buf = NULL;
6750 }
6751
6752 _Unpickler_MemoCleanup(self);
6753 PyMem_Free(self->marks);
6754 self->marks = NULL;
6755 PyMem_Free(self->input_line);
6756 self->input_line = NULL;
6757 PyMem_Free(self->encoding);
6758 self->encoding = NULL;
6759 PyMem_Free(self->errors);
6760 self->errors = NULL;
6761
6762 return 0;
6763 }
6764
6765 /*[clinic input]
6766
6767 _pickle.Unpickler.__init__
6768
6769 file: object
6770 *
6771 fix_imports: bool = True
6772 encoding: str = 'ASCII'
6773 errors: str = 'strict'
6774
6775 This takes a binary file for reading a pickle data stream.
6776
6777 The protocol version of the pickle is detected automatically, so no
6778 protocol argument is needed. Bytes past the pickled object's
6779 representation are ignored.
6780
6781 The argument *file* must have two methods, a read() method that takes
6782 an integer argument, and a readline() method that requires no
6783 arguments. Both methods should return bytes. Thus *file* can be a
6784 binary file object opened for reading, an io.BytesIO object, or any
6785 other custom object that meets this interface.
6786
6787 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
6788 which are used to control compatibility support for pickle stream
6789 generated by Python 2. If *fix_imports* is True, pickle will try to
6790 map the old Python 2 names to the new names used in Python 3. The
6791 *encoding* and *errors* tell pickle how to decode 8-bit string
6792 instances pickled by Python 2; these default to 'ASCII' and 'strict',
6793 respectively. The *encoding* can be 'bytes' to read these 8-bit
6794 string instances as bytes objects.
6795 [clinic start generated code]*/
6796
6797 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors)6798 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
6799 int fix_imports, const char *encoding,
6800 const char *errors)
6801 /*[clinic end generated code: output=e2c8ce748edc57b0 input=f9b7da04f5f4f335]*/
6802 {
6803 _Py_IDENTIFIER(persistent_load);
6804
6805 /* In case of multiple __init__() calls, clear previous content. */
6806 if (self->read != NULL)
6807 (void)Unpickler_clear(self);
6808
6809 if (_Unpickler_SetInputStream(self, file) < 0)
6810 return -1;
6811
6812 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
6813 return -1;
6814
6815 self->fix_imports = fix_imports;
6816
6817 if (init_method_ref((PyObject *)self, &PyId_persistent_load,
6818 &self->pers_func, &self->pers_func_self) < 0)
6819 {
6820 return -1;
6821 }
6822
6823 self->stack = (Pdata *)Pdata_New();
6824 if (self->stack == NULL)
6825 return -1;
6826
6827 self->memo_size = 32;
6828 self->memo = _Unpickler_NewMemo(self->memo_size);
6829 if (self->memo == NULL)
6830 return -1;
6831
6832 self->proto = 0;
6833
6834 return 0;
6835 }
6836
6837
6838 /* Define a proxy object for the Unpickler's internal memo object. This is to
6839 * avoid breaking code like:
6840 * unpickler.memo.clear()
6841 * and
6842 * unpickler.memo = saved_memo
6843 * Is this a good idea? Not really, but we don't want to break code that uses
6844 * it. Note that we don't implement the entire mapping API here. This is
6845 * intentional, as these should be treated as black-box implementation details.
6846 *
6847 * We do, however, have to implement pickling/unpickling support because of
6848 * real-world code like cvs2svn.
6849 */
6850
6851 /*[clinic input]
6852 _pickle.UnpicklerMemoProxy.clear
6853
6854 Remove all items from memo.
6855 [clinic start generated code]*/
6856
6857 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)6858 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
6859 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
6860 {
6861 _Unpickler_MemoCleanup(self->unpickler);
6862 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
6863 if (self->unpickler->memo == NULL)
6864 return NULL;
6865 Py_RETURN_NONE;
6866 }
6867
6868 /*[clinic input]
6869 _pickle.UnpicklerMemoProxy.copy
6870
6871 Copy the memo to a new object.
6872 [clinic start generated code]*/
6873
6874 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)6875 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
6876 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
6877 {
6878 size_t i;
6879 PyObject *new_memo = PyDict_New();
6880 if (new_memo == NULL)
6881 return NULL;
6882
6883 for (i = 0; i < self->unpickler->memo_size; i++) {
6884 int status;
6885 PyObject *key, *value;
6886
6887 value = self->unpickler->memo[i];
6888 if (value == NULL)
6889 continue;
6890
6891 key = PyLong_FromSsize_t(i);
6892 if (key == NULL)
6893 goto error;
6894 status = PyDict_SetItem(new_memo, key, value);
6895 Py_DECREF(key);
6896 if (status < 0)
6897 goto error;
6898 }
6899 return new_memo;
6900
6901 error:
6902 Py_DECREF(new_memo);
6903 return NULL;
6904 }
6905
6906 /*[clinic input]
6907 _pickle.UnpicklerMemoProxy.__reduce__
6908
6909 Implement pickling support.
6910 [clinic start generated code]*/
6911
6912 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)6913 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
6914 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
6915 {
6916 PyObject *reduce_value;
6917 PyObject *constructor_args;
6918 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
6919 if (contents == NULL)
6920 return NULL;
6921
6922 reduce_value = PyTuple_New(2);
6923 if (reduce_value == NULL) {
6924 Py_DECREF(contents);
6925 return NULL;
6926 }
6927 constructor_args = PyTuple_New(1);
6928 if (constructor_args == NULL) {
6929 Py_DECREF(contents);
6930 Py_DECREF(reduce_value);
6931 return NULL;
6932 }
6933 PyTuple_SET_ITEM(constructor_args, 0, contents);
6934 Py_INCREF((PyObject *)&PyDict_Type);
6935 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
6936 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
6937 return reduce_value;
6938 }
6939
6940 static PyMethodDef unpicklerproxy_methods[] = {
6941 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
6942 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
6943 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
6944 {NULL, NULL} /* sentinel */
6945 };
6946
6947 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)6948 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
6949 {
6950 PyObject_GC_UnTrack(self);
6951 Py_XDECREF(self->unpickler);
6952 PyObject_GC_Del((PyObject *)self);
6953 }
6954
6955 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)6956 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
6957 visitproc visit, void *arg)
6958 {
6959 Py_VISIT(self->unpickler);
6960 return 0;
6961 }
6962
6963 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)6964 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
6965 {
6966 Py_CLEAR(self->unpickler);
6967 return 0;
6968 }
6969
6970 static PyTypeObject UnpicklerMemoProxyType = {
6971 PyVarObject_HEAD_INIT(NULL, 0)
6972 "_pickle.UnpicklerMemoProxy", /*tp_name*/
6973 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
6974 0,
6975 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
6976 0, /* tp_print */
6977 0, /* tp_getattr */
6978 0, /* tp_setattr */
6979 0, /* tp_compare */
6980 0, /* tp_repr */
6981 0, /* tp_as_number */
6982 0, /* tp_as_sequence */
6983 0, /* tp_as_mapping */
6984 PyObject_HashNotImplemented, /* tp_hash */
6985 0, /* tp_call */
6986 0, /* tp_str */
6987 PyObject_GenericGetAttr, /* tp_getattro */
6988 PyObject_GenericSetAttr, /* tp_setattro */
6989 0, /* tp_as_buffer */
6990 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6991 0, /* tp_doc */
6992 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
6993 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
6994 0, /* tp_richcompare */
6995 0, /* tp_weaklistoffset */
6996 0, /* tp_iter */
6997 0, /* tp_iternext */
6998 unpicklerproxy_methods, /* tp_methods */
6999 };
7000
7001 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7002 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7003 {
7004 UnpicklerMemoProxyObject *self;
7005
7006 self = PyObject_GC_New(UnpicklerMemoProxyObject,
7007 &UnpicklerMemoProxyType);
7008 if (self == NULL)
7009 return NULL;
7010 Py_INCREF(unpickler);
7011 self->unpickler = unpickler;
7012 PyObject_GC_Track(self);
7013 return (PyObject *)self;
7014 }
7015
7016 /*****************************************************************************/
7017
7018
7019 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7020 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7021 {
7022 return UnpicklerMemoProxy_New(self);
7023 }
7024
7025 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7026 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7027 {
7028 PyObject **new_memo;
7029 size_t new_memo_size = 0;
7030
7031 if (obj == NULL) {
7032 PyErr_SetString(PyExc_TypeError,
7033 "attribute deletion is not supported");
7034 return -1;
7035 }
7036
7037 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
7038 UnpicklerObject *unpickler =
7039 ((UnpicklerMemoProxyObject *)obj)->unpickler;
7040
7041 new_memo_size = unpickler->memo_size;
7042 new_memo = _Unpickler_NewMemo(new_memo_size);
7043 if (new_memo == NULL)
7044 return -1;
7045
7046 for (size_t i = 0; i < new_memo_size; i++) {
7047 Py_XINCREF(unpickler->memo[i]);
7048 new_memo[i] = unpickler->memo[i];
7049 }
7050 }
7051 else if (PyDict_Check(obj)) {
7052 Py_ssize_t i = 0;
7053 PyObject *key, *value;
7054
7055 new_memo_size = PyDict_GET_SIZE(obj);
7056 new_memo = _Unpickler_NewMemo(new_memo_size);
7057 if (new_memo == NULL)
7058 return -1;
7059
7060 while (PyDict_Next(obj, &i, &key, &value)) {
7061 Py_ssize_t idx;
7062 if (!PyLong_Check(key)) {
7063 PyErr_SetString(PyExc_TypeError,
7064 "memo key must be integers");
7065 goto error;
7066 }
7067 idx = PyLong_AsSsize_t(key);
7068 if (idx == -1 && PyErr_Occurred())
7069 goto error;
7070 if (idx < 0) {
7071 PyErr_SetString(PyExc_ValueError,
7072 "memo key must be positive integers.");
7073 goto error;
7074 }
7075 if (_Unpickler_MemoPut(self, idx, value) < 0)
7076 goto error;
7077 }
7078 }
7079 else {
7080 PyErr_Format(PyExc_TypeError,
7081 "'memo' attribute must be an UnpicklerMemoProxy object "
7082 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7083 return -1;
7084 }
7085
7086 _Unpickler_MemoCleanup(self);
7087 self->memo_size = new_memo_size;
7088 self->memo = new_memo;
7089
7090 return 0;
7091
7092 error:
7093 if (new_memo_size) {
7094 for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7095 Py_XDECREF(new_memo[i]);
7096 }
7097 PyMem_FREE(new_memo);
7098 }
7099 return -1;
7100 }
7101
7102 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7103 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7104 {
7105 if (self->pers_func == NULL) {
7106 PyErr_SetString(PyExc_AttributeError, "persistent_load");
7107 return NULL;
7108 }
7109 return reconstruct_method(self->pers_func, self->pers_func_self);
7110 }
7111
7112 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7113 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7114 {
7115 if (value == NULL) {
7116 PyErr_SetString(PyExc_TypeError,
7117 "attribute deletion is not supported");
7118 return -1;
7119 }
7120 if (!PyCallable_Check(value)) {
7121 PyErr_SetString(PyExc_TypeError,
7122 "persistent_load must be a callable taking "
7123 "one argument");
7124 return -1;
7125 }
7126
7127 self->pers_func_self = NULL;
7128 Py_INCREF(value);
7129 Py_XSETREF(self->pers_func, value);
7130
7131 return 0;
7132 }
7133
7134 static PyGetSetDef Unpickler_getsets[] = {
7135 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7136 {"persistent_load", (getter)Unpickler_get_persload,
7137 (setter)Unpickler_set_persload},
7138 {NULL}
7139 };
7140
7141 static PyTypeObject Unpickler_Type = {
7142 PyVarObject_HEAD_INIT(NULL, 0)
7143 "_pickle.Unpickler", /*tp_name*/
7144 sizeof(UnpicklerObject), /*tp_basicsize*/
7145 0, /*tp_itemsize*/
7146 (destructor)Unpickler_dealloc, /*tp_dealloc*/
7147 0, /*tp_print*/
7148 0, /*tp_getattr*/
7149 0, /*tp_setattr*/
7150 0, /*tp_reserved*/
7151 0, /*tp_repr*/
7152 0, /*tp_as_number*/
7153 0, /*tp_as_sequence*/
7154 0, /*tp_as_mapping*/
7155 0, /*tp_hash*/
7156 0, /*tp_call*/
7157 0, /*tp_str*/
7158 0, /*tp_getattro*/
7159 0, /*tp_setattro*/
7160 0, /*tp_as_buffer*/
7161 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7162 _pickle_Unpickler___init____doc__, /*tp_doc*/
7163 (traverseproc)Unpickler_traverse, /*tp_traverse*/
7164 (inquiry)Unpickler_clear, /*tp_clear*/
7165 0, /*tp_richcompare*/
7166 0, /*tp_weaklistoffset*/
7167 0, /*tp_iter*/
7168 0, /*tp_iternext*/
7169 Unpickler_methods, /*tp_methods*/
7170 0, /*tp_members*/
7171 Unpickler_getsets, /*tp_getset*/
7172 0, /*tp_base*/
7173 0, /*tp_dict*/
7174 0, /*tp_descr_get*/
7175 0, /*tp_descr_set*/
7176 0, /*tp_dictoffset*/
7177 _pickle_Unpickler___init__, /*tp_init*/
7178 PyType_GenericAlloc, /*tp_alloc*/
7179 PyType_GenericNew, /*tp_new*/
7180 PyObject_GC_Del, /*tp_free*/
7181 0, /*tp_is_gc*/
7182 };
7183
7184 /*[clinic input]
7185
7186 _pickle.dump
7187
7188 obj: object
7189 file: object
7190 protocol: object = NULL
7191 *
7192 fix_imports: bool = True
7193
7194 Write a pickled representation of obj to the open file object file.
7195
7196 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7197 be more efficient.
7198
7199 The optional *protocol* argument tells the pickler to use the given
7200 protocol supported protocols are 0, 1, 2, 3 and 4. The default
7201 protocol is 3; a backward-incompatible protocol designed for Python 3.
7202
7203 Specifying a negative protocol version selects the highest protocol
7204 version supported. The higher the protocol used, the more recent the
7205 version of Python needed to read the pickle produced.
7206
7207 The *file* argument must have a write() method that accepts a single
7208 bytes argument. It can thus be a file object opened for binary
7209 writing, an io.BytesIO instance, or any other custom object that meets
7210 this interface.
7211
7212 If *fix_imports* is True and protocol is less than 3, pickle will try
7213 to map the new Python 3 names to the old module names used in Python
7214 2, so that the pickle data stream is readable with Python 2.
7215 [clinic start generated code]*/
7216
7217 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports)7218 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7219 PyObject *protocol, int fix_imports)
7220 /*[clinic end generated code: output=a4774d5fde7d34de input=830f8a64cef6f042]*/
7221 {
7222 PicklerObject *pickler = _Pickler_New();
7223
7224 if (pickler == NULL)
7225 return NULL;
7226
7227 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7228 goto error;
7229
7230 if (_Pickler_SetOutputStream(pickler, file) < 0)
7231 goto error;
7232
7233 if (dump(pickler, obj) < 0)
7234 goto error;
7235
7236 if (_Pickler_FlushToFile(pickler) < 0)
7237 goto error;
7238
7239 Py_DECREF(pickler);
7240 Py_RETURN_NONE;
7241
7242 error:
7243 Py_XDECREF(pickler);
7244 return NULL;
7245 }
7246
7247 /*[clinic input]
7248
7249 _pickle.dumps
7250
7251 obj: object
7252 protocol: object = NULL
7253 *
7254 fix_imports: bool = True
7255
7256 Return the pickled representation of the object as a bytes object.
7257
7258 The optional *protocol* argument tells the pickler to use the given
7259 protocol; supported protocols are 0, 1, 2, 3 and 4. The default
7260 protocol is 3; a backward-incompatible protocol designed for Python 3.
7261
7262 Specifying a negative protocol version selects the highest protocol
7263 version supported. The higher the protocol used, the more recent the
7264 version of Python needed to read the pickle produced.
7265
7266 If *fix_imports* is True and *protocol* is less than 3, pickle will
7267 try to map the new Python 3 names to the old module names used in
7268 Python 2, so that the pickle data stream is readable with Python 2.
7269 [clinic start generated code]*/
7270
7271 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports)7272 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7273 int fix_imports)
7274 /*[clinic end generated code: output=d75d5cda456fd261 input=293dbeda181580b7]*/
7275 {
7276 PyObject *result;
7277 PicklerObject *pickler = _Pickler_New();
7278
7279 if (pickler == NULL)
7280 return NULL;
7281
7282 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7283 goto error;
7284
7285 if (dump(pickler, obj) < 0)
7286 goto error;
7287
7288 result = _Pickler_GetString(pickler);
7289 Py_DECREF(pickler);
7290 return result;
7291
7292 error:
7293 Py_XDECREF(pickler);
7294 return NULL;
7295 }
7296
7297 /*[clinic input]
7298
7299 _pickle.load
7300
7301 file: object
7302 *
7303 fix_imports: bool = True
7304 encoding: str = 'ASCII'
7305 errors: str = 'strict'
7306
7307 Read and return an object from the pickle data stored in a file.
7308
7309 This is equivalent to ``Unpickler(file).load()``, but may be more
7310 efficient.
7311
7312 The protocol version of the pickle is detected automatically, so no
7313 protocol argument is needed. Bytes past the pickled object's
7314 representation are ignored.
7315
7316 The argument *file* must have two methods, a read() method that takes
7317 an integer argument, and a readline() method that requires no
7318 arguments. Both methods should return bytes. Thus *file* can be a
7319 binary file object opened for reading, an io.BytesIO object, or any
7320 other custom object that meets this interface.
7321
7322 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7323 which are used to control compatibility support for pickle stream
7324 generated by Python 2. If *fix_imports* is True, pickle will try to
7325 map the old Python 2 names to the new names used in Python 3. The
7326 *encoding* and *errors* tell pickle how to decode 8-bit string
7327 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7328 respectively. The *encoding* can be 'bytes' to read these 8-bit
7329 string instances as bytes objects.
7330 [clinic start generated code]*/
7331
7332 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors)7333 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7334 const char *encoding, const char *errors)
7335 /*[clinic end generated code: output=69e298160285199e input=01b44dd3fc07afa7]*/
7336 {
7337 PyObject *result;
7338 UnpicklerObject *unpickler = _Unpickler_New();
7339
7340 if (unpickler == NULL)
7341 return NULL;
7342
7343 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7344 goto error;
7345
7346 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7347 goto error;
7348
7349 unpickler->fix_imports = fix_imports;
7350
7351 result = load(unpickler);
7352 Py_DECREF(unpickler);
7353 return result;
7354
7355 error:
7356 Py_XDECREF(unpickler);
7357 return NULL;
7358 }
7359
7360 /*[clinic input]
7361
7362 _pickle.loads
7363
7364 data: object
7365 *
7366 fix_imports: bool = True
7367 encoding: str = 'ASCII'
7368 errors: str = 'strict'
7369
7370 Read and return an object from the given pickle data.
7371
7372 The protocol version of the pickle is detected automatically, so no
7373 protocol argument is needed. Bytes past the pickled object's
7374 representation are ignored.
7375
7376 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7377 which are used to control compatibility support for pickle stream
7378 generated by Python 2. If *fix_imports* is True, pickle will try to
7379 map the old Python 2 names to the new names used in Python 3. The
7380 *encoding* and *errors* tell pickle how to decode 8-bit string
7381 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7382 respectively. The *encoding* can be 'bytes' to read these 8-bit
7383 string instances as bytes objects.
7384 [clinic start generated code]*/
7385
7386 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors)7387 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7388 const char *encoding, const char *errors)
7389 /*[clinic end generated code: output=1e7cb2343f2c440f input=70605948a719feb9]*/
7390 {
7391 PyObject *result;
7392 UnpicklerObject *unpickler = _Unpickler_New();
7393
7394 if (unpickler == NULL)
7395 return NULL;
7396
7397 if (_Unpickler_SetStringInput(unpickler, data) < 0)
7398 goto error;
7399
7400 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7401 goto error;
7402
7403 unpickler->fix_imports = fix_imports;
7404
7405 result = load(unpickler);
7406 Py_DECREF(unpickler);
7407 return result;
7408
7409 error:
7410 Py_XDECREF(unpickler);
7411 return NULL;
7412 }
7413
7414 static struct PyMethodDef pickle_methods[] = {
7415 _PICKLE_DUMP_METHODDEF
7416 _PICKLE_DUMPS_METHODDEF
7417 _PICKLE_LOAD_METHODDEF
7418 _PICKLE_LOADS_METHODDEF
7419 {NULL, NULL} /* sentinel */
7420 };
7421
7422 static int
pickle_clear(PyObject * m)7423 pickle_clear(PyObject *m)
7424 {
7425 _Pickle_ClearState(_Pickle_GetState(m));
7426 return 0;
7427 }
7428
7429 static void
pickle_free(PyObject * m)7430 pickle_free(PyObject *m)
7431 {
7432 _Pickle_ClearState(_Pickle_GetState(m));
7433 }
7434
7435 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7436 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7437 {
7438 PickleState *st = _Pickle_GetState(m);
7439 Py_VISIT(st->PickleError);
7440 Py_VISIT(st->PicklingError);
7441 Py_VISIT(st->UnpicklingError);
7442 Py_VISIT(st->dispatch_table);
7443 Py_VISIT(st->extension_registry);
7444 Py_VISIT(st->extension_cache);
7445 Py_VISIT(st->inverted_registry);
7446 Py_VISIT(st->name_mapping_2to3);
7447 Py_VISIT(st->import_mapping_2to3);
7448 Py_VISIT(st->name_mapping_3to2);
7449 Py_VISIT(st->import_mapping_3to2);
7450 Py_VISIT(st->codecs_encode);
7451 Py_VISIT(st->getattr);
7452 Py_VISIT(st->partial);
7453 return 0;
7454 }
7455
7456 static struct PyModuleDef _picklemodule = {
7457 PyModuleDef_HEAD_INIT,
7458 "_pickle", /* m_name */
7459 pickle_module_doc, /* m_doc */
7460 sizeof(PickleState), /* m_size */
7461 pickle_methods, /* m_methods */
7462 NULL, /* m_reload */
7463 pickle_traverse, /* m_traverse */
7464 pickle_clear, /* m_clear */
7465 (freefunc)pickle_free /* m_free */
7466 };
7467
7468 PyMODINIT_FUNC
PyInit__pickle(void)7469 PyInit__pickle(void)
7470 {
7471 PyObject *m;
7472 PickleState *st;
7473
7474 m = PyState_FindModule(&_picklemodule);
7475 if (m) {
7476 Py_INCREF(m);
7477 return m;
7478 }
7479
7480 if (PyType_Ready(&Unpickler_Type) < 0)
7481 return NULL;
7482 if (PyType_Ready(&Pickler_Type) < 0)
7483 return NULL;
7484 if (PyType_Ready(&Pdata_Type) < 0)
7485 return NULL;
7486 if (PyType_Ready(&PicklerMemoProxyType) < 0)
7487 return NULL;
7488 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7489 return NULL;
7490
7491 /* Create the module and add the functions. */
7492 m = PyModule_Create(&_picklemodule);
7493 if (m == NULL)
7494 return NULL;
7495
7496 Py_INCREF(&Pickler_Type);
7497 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
7498 return NULL;
7499 Py_INCREF(&Unpickler_Type);
7500 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
7501 return NULL;
7502
7503 st = _Pickle_GetState(m);
7504
7505 /* Initialize the exceptions. */
7506 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7507 if (st->PickleError == NULL)
7508 return NULL;
7509 st->PicklingError = \
7510 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7511 if (st->PicklingError == NULL)
7512 return NULL;
7513 st->UnpicklingError = \
7514 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7515 if (st->UnpicklingError == NULL)
7516 return NULL;
7517
7518 Py_INCREF(st->PickleError);
7519 if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
7520 return NULL;
7521 Py_INCREF(st->PicklingError);
7522 if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
7523 return NULL;
7524 Py_INCREF(st->UnpicklingError);
7525 if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
7526 return NULL;
7527
7528 if (_Pickle_InitState(st) < 0)
7529 return NULL;
7530
7531 return m;
7532 }
7533