1 /* pickle accelerator C extensor: _pickle module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
6
7 #if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8 # error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
9 #endif
10
11 #include "Python.h"
12 #include "structmember.h"
13
14 PyDoc_STRVAR(pickle_module_doc,
15 "Optimized C implementation for the Python pickle module.");
16
17 /*[clinic input]
18 module _pickle
19 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
20 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
21 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
22 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
23 [clinic start generated code]*/
24 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
25
26 /* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
27 Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
28 already includes it. */
29 enum {
30 HIGHEST_PROTOCOL = 5,
31 DEFAULT_PROTOCOL = 4
32 };
33
34 /* Pickle opcodes. These must be kept updated with pickle.py.
35 Extensive docs are in pickletools.py. */
36 enum opcode {
37 MARK = '(',
38 STOP = '.',
39 POP = '0',
40 POP_MARK = '1',
41 DUP = '2',
42 FLOAT = 'F',
43 INT = 'I',
44 BININT = 'J',
45 BININT1 = 'K',
46 LONG = 'L',
47 BININT2 = 'M',
48 NONE = 'N',
49 PERSID = 'P',
50 BINPERSID = 'Q',
51 REDUCE = 'R',
52 STRING = 'S',
53 BINSTRING = 'T',
54 SHORT_BINSTRING = 'U',
55 UNICODE = 'V',
56 BINUNICODE = 'X',
57 APPEND = 'a',
58 BUILD = 'b',
59 GLOBAL = 'c',
60 DICT = 'd',
61 EMPTY_DICT = '}',
62 APPENDS = 'e',
63 GET = 'g',
64 BINGET = 'h',
65 INST = 'i',
66 LONG_BINGET = 'j',
67 LIST = 'l',
68 EMPTY_LIST = ']',
69 OBJ = 'o',
70 PUT = 'p',
71 BINPUT = 'q',
72 LONG_BINPUT = 'r',
73 SETITEM = 's',
74 TUPLE = 't',
75 EMPTY_TUPLE = ')',
76 SETITEMS = 'u',
77 BINFLOAT = 'G',
78
79 /* Protocol 2. */
80 PROTO = '\x80',
81 NEWOBJ = '\x81',
82 EXT1 = '\x82',
83 EXT2 = '\x83',
84 EXT4 = '\x84',
85 TUPLE1 = '\x85',
86 TUPLE2 = '\x86',
87 TUPLE3 = '\x87',
88 NEWTRUE = '\x88',
89 NEWFALSE = '\x89',
90 LONG1 = '\x8a',
91 LONG4 = '\x8b',
92
93 /* Protocol 3 (Python 3.x) */
94 BINBYTES = 'B',
95 SHORT_BINBYTES = 'C',
96
97 /* Protocol 4 */
98 SHORT_BINUNICODE = '\x8c',
99 BINUNICODE8 = '\x8d',
100 BINBYTES8 = '\x8e',
101 EMPTY_SET = '\x8f',
102 ADDITEMS = '\x90',
103 FROZENSET = '\x91',
104 NEWOBJ_EX = '\x92',
105 STACK_GLOBAL = '\x93',
106 MEMOIZE = '\x94',
107 FRAME = '\x95',
108
109 /* Protocol 5 */
110 BYTEARRAY8 = '\x96',
111 NEXT_BUFFER = '\x97',
112 READONLY_BUFFER = '\x98'
113 };
114
115 enum {
116 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
117 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
118 break if this gets out of synch with pickle.py, but it's unclear that would
119 help anything either. */
120 BATCHSIZE = 1000,
121
122 /* Nesting limit until Pickler, when running in "fast mode", starts
123 checking for self-referential data-structures. */
124 FAST_NESTING_LIMIT = 50,
125
126 /* Initial size of the write buffer of Pickler. */
127 WRITE_BUF_SIZE = 4096,
128
129 /* Prefetch size when unpickling (disabled on unpeekable streams) */
130 PREFETCH = 8192 * 16,
131
132 FRAME_SIZE_MIN = 4,
133 FRAME_SIZE_TARGET = 64 * 1024,
134 FRAME_HEADER_SIZE = 9
135 };
136
137 /*************************************************************************/
138
139 /* State of the pickle module, per PEP 3121. */
140 typedef struct {
141 /* Exception classes for pickle. */
142 PyObject *PickleError;
143 PyObject *PicklingError;
144 PyObject *UnpicklingError;
145
146 /* copyreg.dispatch_table, {type_object: pickling_function} */
147 PyObject *dispatch_table;
148
149 /* For the extension opcodes EXT1, EXT2 and EXT4. */
150
151 /* copyreg._extension_registry, {(module_name, function_name): code} */
152 PyObject *extension_registry;
153 /* copyreg._extension_cache, {code: object} */
154 PyObject *extension_cache;
155 /* copyreg._inverted_registry, {code: (module_name, function_name)} */
156 PyObject *inverted_registry;
157
158 /* Import mappings for compatibility with Python 2.x */
159
160 /* _compat_pickle.NAME_MAPPING,
161 {(oldmodule, oldname): (newmodule, newname)} */
162 PyObject *name_mapping_2to3;
163 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
164 PyObject *import_mapping_2to3;
165 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
166 PyObject *name_mapping_3to2;
167 PyObject *import_mapping_3to2;
168
169 /* codecs.encode, used for saving bytes in older protocols */
170 PyObject *codecs_encode;
171 /* builtins.getattr, used for saving nested names with protocol < 4 */
172 PyObject *getattr;
173 /* functools.partial, used for implementing __newobj_ex__ with protocols
174 2 and 3 */
175 PyObject *partial;
176 } PickleState;
177
178 /* Forward declaration of the _pickle module definition. */
179 static struct PyModuleDef _picklemodule;
180
181 /* Given a module object, get its per-module state. */
182 static PickleState *
_Pickle_GetState(PyObject * module)183 _Pickle_GetState(PyObject *module)
184 {
185 return (PickleState *)PyModule_GetState(module);
186 }
187
188 /* Find the module instance imported in the currently running sub-interpreter
189 and get its state. */
190 static PickleState *
_Pickle_GetGlobalState(void)191 _Pickle_GetGlobalState(void)
192 {
193 return _Pickle_GetState(PyState_FindModule(&_picklemodule));
194 }
195
196 /* Clear the given pickle module state. */
197 static void
_Pickle_ClearState(PickleState * st)198 _Pickle_ClearState(PickleState *st)
199 {
200 Py_CLEAR(st->PickleError);
201 Py_CLEAR(st->PicklingError);
202 Py_CLEAR(st->UnpicklingError);
203 Py_CLEAR(st->dispatch_table);
204 Py_CLEAR(st->extension_registry);
205 Py_CLEAR(st->extension_cache);
206 Py_CLEAR(st->inverted_registry);
207 Py_CLEAR(st->name_mapping_2to3);
208 Py_CLEAR(st->import_mapping_2to3);
209 Py_CLEAR(st->name_mapping_3to2);
210 Py_CLEAR(st->import_mapping_3to2);
211 Py_CLEAR(st->codecs_encode);
212 Py_CLEAR(st->getattr);
213 Py_CLEAR(st->partial);
214 }
215
216 /* Initialize the given pickle module state. */
217 static int
_Pickle_InitState(PickleState * st)218 _Pickle_InitState(PickleState *st)
219 {
220 PyObject *copyreg = NULL;
221 PyObject *compat_pickle = NULL;
222 PyObject *codecs = NULL;
223 PyObject *functools = NULL;
224 _Py_IDENTIFIER(getattr);
225
226 st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
227 if (st->getattr == NULL)
228 goto error;
229
230 copyreg = PyImport_ImportModule("copyreg");
231 if (!copyreg)
232 goto error;
233 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
234 if (!st->dispatch_table)
235 goto error;
236 if (!PyDict_CheckExact(st->dispatch_table)) {
237 PyErr_Format(PyExc_RuntimeError,
238 "copyreg.dispatch_table should be a dict, not %.200s",
239 Py_TYPE(st->dispatch_table)->tp_name);
240 goto error;
241 }
242 st->extension_registry = \
243 PyObject_GetAttrString(copyreg, "_extension_registry");
244 if (!st->extension_registry)
245 goto error;
246 if (!PyDict_CheckExact(st->extension_registry)) {
247 PyErr_Format(PyExc_RuntimeError,
248 "copyreg._extension_registry should be a dict, "
249 "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
250 goto error;
251 }
252 st->inverted_registry = \
253 PyObject_GetAttrString(copyreg, "_inverted_registry");
254 if (!st->inverted_registry)
255 goto error;
256 if (!PyDict_CheckExact(st->inverted_registry)) {
257 PyErr_Format(PyExc_RuntimeError,
258 "copyreg._inverted_registry should be a dict, "
259 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
260 goto error;
261 }
262 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
263 if (!st->extension_cache)
264 goto error;
265 if (!PyDict_CheckExact(st->extension_cache)) {
266 PyErr_Format(PyExc_RuntimeError,
267 "copyreg._extension_cache should be a dict, "
268 "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
269 goto error;
270 }
271 Py_CLEAR(copyreg);
272
273 /* Load the 2.x -> 3.x stdlib module mapping tables */
274 compat_pickle = PyImport_ImportModule("_compat_pickle");
275 if (!compat_pickle)
276 goto error;
277 st->name_mapping_2to3 = \
278 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
279 if (!st->name_mapping_2to3)
280 goto error;
281 if (!PyDict_CheckExact(st->name_mapping_2to3)) {
282 PyErr_Format(PyExc_RuntimeError,
283 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
284 Py_TYPE(st->name_mapping_2to3)->tp_name);
285 goto error;
286 }
287 st->import_mapping_2to3 = \
288 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
289 if (!st->import_mapping_2to3)
290 goto error;
291 if (!PyDict_CheckExact(st->import_mapping_2to3)) {
292 PyErr_Format(PyExc_RuntimeError,
293 "_compat_pickle.IMPORT_MAPPING should be a dict, "
294 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
295 goto error;
296 }
297 /* ... and the 3.x -> 2.x mapping tables */
298 st->name_mapping_3to2 = \
299 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
300 if (!st->name_mapping_3to2)
301 goto error;
302 if (!PyDict_CheckExact(st->name_mapping_3to2)) {
303 PyErr_Format(PyExc_RuntimeError,
304 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
305 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
306 goto error;
307 }
308 st->import_mapping_3to2 = \
309 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
310 if (!st->import_mapping_3to2)
311 goto error;
312 if (!PyDict_CheckExact(st->import_mapping_3to2)) {
313 PyErr_Format(PyExc_RuntimeError,
314 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
315 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
316 goto error;
317 }
318 Py_CLEAR(compat_pickle);
319
320 codecs = PyImport_ImportModule("codecs");
321 if (codecs == NULL)
322 goto error;
323 st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
324 if (st->codecs_encode == NULL) {
325 goto error;
326 }
327 if (!PyCallable_Check(st->codecs_encode)) {
328 PyErr_Format(PyExc_RuntimeError,
329 "codecs.encode should be a callable, not %.200s",
330 Py_TYPE(st->codecs_encode)->tp_name);
331 goto error;
332 }
333 Py_CLEAR(codecs);
334
335 functools = PyImport_ImportModule("functools");
336 if (!functools)
337 goto error;
338 st->partial = PyObject_GetAttrString(functools, "partial");
339 if (!st->partial)
340 goto error;
341 Py_CLEAR(functools);
342
343 return 0;
344
345 error:
346 Py_CLEAR(copyreg);
347 Py_CLEAR(compat_pickle);
348 Py_CLEAR(codecs);
349 Py_CLEAR(functools);
350 _Pickle_ClearState(st);
351 return -1;
352 }
353
354 /* Helper for calling a function with a single argument quickly.
355
356 This function steals the reference of the given argument. */
357 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)358 _Pickle_FastCall(PyObject *func, PyObject *obj)
359 {
360 PyObject *result;
361
362 result = PyObject_CallFunctionObjArgs(func, obj, NULL);
363 Py_DECREF(obj);
364 return result;
365 }
366
367 /*************************************************************************/
368
369 /* Retrieve and deconstruct a method for avoiding a reference cycle
370 (pickler -> bound method of pickler -> pickler) */
371 static int
init_method_ref(PyObject * self,_Py_Identifier * name,PyObject ** method_func,PyObject ** method_self)372 init_method_ref(PyObject *self, _Py_Identifier *name,
373 PyObject **method_func, PyObject **method_self)
374 {
375 PyObject *func, *func2;
376 int ret;
377
378 /* *method_func and *method_self should be consistent. All refcount decrements
379 should be occurred after setting *method_self and *method_func. */
380 ret = _PyObject_LookupAttrId(self, name, &func);
381 if (func == NULL) {
382 *method_self = NULL;
383 Py_CLEAR(*method_func);
384 return ret;
385 }
386
387 if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
388 /* Deconstruct a bound Python method */
389 func2 = PyMethod_GET_FUNCTION(func);
390 Py_INCREF(func2);
391 *method_self = self; /* borrowed */
392 Py_XSETREF(*method_func, func2);
393 Py_DECREF(func);
394 return 0;
395 }
396 else {
397 *method_self = NULL;
398 Py_XSETREF(*method_func, func);
399 return 0;
400 }
401 }
402
403 /* Bind a method if it was deconstructed */
404 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)405 reconstruct_method(PyObject *func, PyObject *self)
406 {
407 if (self) {
408 return PyMethod_New(func, self);
409 }
410 else {
411 Py_INCREF(func);
412 return func;
413 }
414 }
415
416 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)417 call_method(PyObject *func, PyObject *self, PyObject *obj)
418 {
419 if (self) {
420 return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
421 }
422 else {
423 return PyObject_CallFunctionObjArgs(func, obj, NULL);
424 }
425 }
426
427 /*************************************************************************/
428
429 /* Internal data type used as the unpickling stack. */
430 typedef struct {
431 PyObject_VAR_HEAD
432 PyObject **data;
433 int mark_set; /* is MARK set? */
434 Py_ssize_t fence; /* position of top MARK or 0 */
435 Py_ssize_t allocated; /* number of slots in data allocated */
436 } Pdata;
437
438 static void
Pdata_dealloc(Pdata * self)439 Pdata_dealloc(Pdata *self)
440 {
441 Py_ssize_t i = Py_SIZE(self);
442 while (--i >= 0) {
443 Py_DECREF(self->data[i]);
444 }
445 PyMem_FREE(self->data);
446 PyObject_Del(self);
447 }
448
449 static PyTypeObject Pdata_Type = {
450 PyVarObject_HEAD_INIT(NULL, 0)
451 "_pickle.Pdata", /*tp_name*/
452 sizeof(Pdata), /*tp_basicsize*/
453 sizeof(PyObject *), /*tp_itemsize*/
454 (destructor)Pdata_dealloc, /*tp_dealloc*/
455 };
456
457 static PyObject *
Pdata_New(void)458 Pdata_New(void)
459 {
460 Pdata *self;
461
462 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
463 return NULL;
464 Py_SIZE(self) = 0;
465 self->mark_set = 0;
466 self->fence = 0;
467 self->allocated = 8;
468 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
469 if (self->data)
470 return (PyObject *)self;
471 Py_DECREF(self);
472 return PyErr_NoMemory();
473 }
474
475
476 /* Retain only the initial clearto items. If clearto >= the current
477 * number of items, this is a (non-erroneous) NOP.
478 */
479 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)480 Pdata_clear(Pdata *self, Py_ssize_t clearto)
481 {
482 Py_ssize_t i = Py_SIZE(self);
483
484 assert(clearto >= self->fence);
485 if (clearto >= i)
486 return 0;
487
488 while (--i >= clearto) {
489 Py_CLEAR(self->data[i]);
490 }
491 Py_SIZE(self) = clearto;
492 return 0;
493 }
494
495 static int
Pdata_grow(Pdata * self)496 Pdata_grow(Pdata *self)
497 {
498 PyObject **data = self->data;
499 size_t allocated = (size_t)self->allocated;
500 size_t new_allocated;
501
502 new_allocated = (allocated >> 3) + 6;
503 /* check for integer overflow */
504 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
505 goto nomemory;
506 new_allocated += allocated;
507 PyMem_RESIZE(data, PyObject *, new_allocated);
508 if (data == NULL)
509 goto nomemory;
510
511 self->data = data;
512 self->allocated = (Py_ssize_t)new_allocated;
513 return 0;
514
515 nomemory:
516 PyErr_NoMemory();
517 return -1;
518 }
519
520 static int
Pdata_stack_underflow(Pdata * self)521 Pdata_stack_underflow(Pdata *self)
522 {
523 PickleState *st = _Pickle_GetGlobalState();
524 PyErr_SetString(st->UnpicklingError,
525 self->mark_set ?
526 "unexpected MARK found" :
527 "unpickling stack underflow");
528 return -1;
529 }
530
531 /* D is a Pdata*. Pop the topmost element and store it into V, which
532 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
533 * is raised and V is set to NULL.
534 */
535 static PyObject *
Pdata_pop(Pdata * self)536 Pdata_pop(Pdata *self)
537 {
538 if (Py_SIZE(self) <= self->fence) {
539 Pdata_stack_underflow(self);
540 return NULL;
541 }
542 return self->data[--Py_SIZE(self)];
543 }
544 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
545
546 static int
Pdata_push(Pdata * self,PyObject * obj)547 Pdata_push(Pdata *self, PyObject *obj)
548 {
549 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
550 return -1;
551 }
552 self->data[Py_SIZE(self)++] = obj;
553 return 0;
554 }
555
556 /* Push an object on stack, transferring its ownership to the stack. */
557 #define PDATA_PUSH(D, O, ER) do { \
558 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
559
560 /* Push an object on stack, adding a new reference to the object. */
561 #define PDATA_APPEND(D, O, ER) do { \
562 Py_INCREF((O)); \
563 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
564
565 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)566 Pdata_poptuple(Pdata *self, Py_ssize_t start)
567 {
568 PyObject *tuple;
569 Py_ssize_t len, i, j;
570
571 if (start < self->fence) {
572 Pdata_stack_underflow(self);
573 return NULL;
574 }
575 len = Py_SIZE(self) - start;
576 tuple = PyTuple_New(len);
577 if (tuple == NULL)
578 return NULL;
579 for (i = start, j = 0; j < len; i++, j++)
580 PyTuple_SET_ITEM(tuple, j, self->data[i]);
581
582 Py_SIZE(self) = start;
583 return tuple;
584 }
585
586 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)587 Pdata_poplist(Pdata *self, Py_ssize_t start)
588 {
589 PyObject *list;
590 Py_ssize_t len, i, j;
591
592 len = Py_SIZE(self) - start;
593 list = PyList_New(len);
594 if (list == NULL)
595 return NULL;
596 for (i = start, j = 0; j < len; i++, j++)
597 PyList_SET_ITEM(list, j, self->data[i]);
598
599 Py_SIZE(self) = start;
600 return list;
601 }
602
603 typedef struct {
604 PyObject *me_key;
605 Py_ssize_t me_value;
606 } PyMemoEntry;
607
608 typedef struct {
609 size_t mt_mask;
610 size_t mt_used;
611 size_t mt_allocated;
612 PyMemoEntry *mt_table;
613 } PyMemoTable;
614
615 typedef struct PicklerObject {
616 PyObject_HEAD
617 PyMemoTable *memo; /* Memo table, keep track of the seen
618 objects to support self-referential objects
619 pickling. */
620 PyObject *pers_func; /* persistent_id() method, can be NULL */
621 PyObject *pers_func_self; /* borrowed reference to self if pers_func
622 is an unbound method, NULL otherwise */
623 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
624 PyObject *reducer_override; /* hook for invoking user-defined callbacks
625 instead of save_global when pickling
626 functions and classes*/
627
628 PyObject *write; /* write() method of the output stream. */
629 PyObject *output_buffer; /* Write into a local bytearray buffer before
630 flushing to the stream. */
631 Py_ssize_t output_len; /* Length of output_buffer. */
632 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
633 int proto; /* Pickle protocol number, >= 0 */
634 int bin; /* Boolean, true if proto > 0 */
635 int framing; /* True when framing is enabled, proto >= 4 */
636 Py_ssize_t frame_start; /* Position in output_buffer where the
637 current frame begins. -1 if there
638 is no frame currently open. */
639
640 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
641 int fast; /* Enable fast mode if set to a true value.
642 The fast mode disable the usage of memo,
643 therefore speeding the pickling process by
644 not generating superfluous PUT opcodes. It
645 should not be used if with self-referential
646 objects. */
647 int fast_nesting;
648 int fix_imports; /* Indicate whether Pickler should fix
649 the name of globals for Python 2.x. */
650 PyObject *fast_memo;
651 PyObject *buffer_callback; /* Callback for out-of-band buffers, or NULL */
652 } PicklerObject;
653
654 typedef struct UnpicklerObject {
655 PyObject_HEAD
656 Pdata *stack; /* Pickle data stack, store unpickled objects. */
657
658 /* The unpickler memo is just an array of PyObject *s. Using a dict
659 is unnecessary, since the keys are contiguous ints. */
660 PyObject **memo;
661 size_t memo_size; /* Capacity of the memo array */
662 size_t memo_len; /* Number of objects in the memo */
663
664 PyObject *pers_func; /* persistent_load() method, can be NULL. */
665 PyObject *pers_func_self; /* borrowed reference to self if pers_func
666 is an unbound method, NULL otherwise */
667
668 Py_buffer buffer;
669 char *input_buffer;
670 char *input_line;
671 Py_ssize_t input_len;
672 Py_ssize_t next_read_idx;
673 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
674
675 PyObject *read; /* read() method of the input stream. */
676 PyObject *readinto; /* readinto() method of the input stream. */
677 PyObject *readline; /* readline() method of the input stream. */
678 PyObject *peek; /* peek() method of the input stream, or NULL */
679 PyObject *buffers; /* iterable of out-of-band buffers, or NULL */
680
681 char *encoding; /* Name of the encoding to be used for
682 decoding strings pickled using Python
683 2.x. The default value is "ASCII" */
684 char *errors; /* Name of errors handling scheme to used when
685 decoding strings. The default value is
686 "strict". */
687 Py_ssize_t *marks; /* Mark stack, used for unpickling container
688 objects. */
689 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
690 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
691 int proto; /* Protocol of the pickle loaded. */
692 int fix_imports; /* Indicate whether Unpickler should fix
693 the name of globals pickled by Python 2.x. */
694 } UnpicklerObject;
695
696 typedef struct {
697 PyObject_HEAD
698 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
699 } PicklerMemoProxyObject;
700
701 typedef struct {
702 PyObject_HEAD
703 UnpicklerObject *unpickler;
704 } UnpicklerMemoProxyObject;
705
706 /* Forward declarations */
707 static int save(PicklerObject *, PyObject *, int);
708 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
709 static PyTypeObject Pickler_Type;
710 static PyTypeObject Unpickler_Type;
711
712 #include "clinic/_pickle.c.h"
713
714 /*************************************************************************
715 A custom hashtable mapping void* to Python ints. This is used by the pickler
716 for memoization. Using a custom hashtable rather than PyDict allows us to skip
717 a bunch of unnecessary object creation. This makes a huge performance
718 difference. */
719
720 #define MT_MINSIZE 8
721 #define PERTURB_SHIFT 5
722
723
724 static PyMemoTable *
PyMemoTable_New(void)725 PyMemoTable_New(void)
726 {
727 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
728 if (memo == NULL) {
729 PyErr_NoMemory();
730 return NULL;
731 }
732
733 memo->mt_used = 0;
734 memo->mt_allocated = MT_MINSIZE;
735 memo->mt_mask = MT_MINSIZE - 1;
736 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
737 if (memo->mt_table == NULL) {
738 PyMem_FREE(memo);
739 PyErr_NoMemory();
740 return NULL;
741 }
742 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
743
744 return memo;
745 }
746
747 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)748 PyMemoTable_Copy(PyMemoTable *self)
749 {
750 PyMemoTable *new = PyMemoTable_New();
751 if (new == NULL)
752 return NULL;
753
754 new->mt_used = self->mt_used;
755 new->mt_allocated = self->mt_allocated;
756 new->mt_mask = self->mt_mask;
757 /* The table we get from _New() is probably smaller than we wanted.
758 Free it and allocate one that's the right size. */
759 PyMem_FREE(new->mt_table);
760 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
761 if (new->mt_table == NULL) {
762 PyMem_FREE(new);
763 PyErr_NoMemory();
764 return NULL;
765 }
766 for (size_t i = 0; i < self->mt_allocated; i++) {
767 Py_XINCREF(self->mt_table[i].me_key);
768 }
769 memcpy(new->mt_table, self->mt_table,
770 sizeof(PyMemoEntry) * self->mt_allocated);
771
772 return new;
773 }
774
775 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)776 PyMemoTable_Size(PyMemoTable *self)
777 {
778 return self->mt_used;
779 }
780
781 static int
PyMemoTable_Clear(PyMemoTable * self)782 PyMemoTable_Clear(PyMemoTable *self)
783 {
784 Py_ssize_t i = self->mt_allocated;
785
786 while (--i >= 0) {
787 Py_XDECREF(self->mt_table[i].me_key);
788 }
789 self->mt_used = 0;
790 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
791 return 0;
792 }
793
794 static void
PyMemoTable_Del(PyMemoTable * self)795 PyMemoTable_Del(PyMemoTable *self)
796 {
797 if (self == NULL)
798 return;
799 PyMemoTable_Clear(self);
800
801 PyMem_FREE(self->mt_table);
802 PyMem_FREE(self);
803 }
804
805 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
806 can be considerably simpler than dictobject.c's lookdict(). */
807 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)808 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
809 {
810 size_t i;
811 size_t perturb;
812 size_t mask = self->mt_mask;
813 PyMemoEntry *table = self->mt_table;
814 PyMemoEntry *entry;
815 Py_hash_t hash = (Py_hash_t)key >> 3;
816
817 i = hash & mask;
818 entry = &table[i];
819 if (entry->me_key == NULL || entry->me_key == key)
820 return entry;
821
822 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
823 i = (i << 2) + i + perturb + 1;
824 entry = &table[i & mask];
825 if (entry->me_key == NULL || entry->me_key == key)
826 return entry;
827 }
828 Py_UNREACHABLE();
829 }
830
831 /* Returns -1 on failure, 0 on success. */
832 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)833 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
834 {
835 PyMemoEntry *oldtable = NULL;
836 PyMemoEntry *oldentry, *newentry;
837 size_t new_size = MT_MINSIZE;
838 size_t to_process;
839
840 assert(min_size > 0);
841
842 if (min_size > PY_SSIZE_T_MAX) {
843 PyErr_NoMemory();
844 return -1;
845 }
846
847 /* Find the smallest valid table size >= min_size. */
848 while (new_size < min_size) {
849 new_size <<= 1;
850 }
851 /* new_size needs to be a power of two. */
852 assert((new_size & (new_size - 1)) == 0);
853
854 /* Allocate new table. */
855 oldtable = self->mt_table;
856 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
857 if (self->mt_table == NULL) {
858 self->mt_table = oldtable;
859 PyErr_NoMemory();
860 return -1;
861 }
862 self->mt_allocated = new_size;
863 self->mt_mask = new_size - 1;
864 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
865
866 /* Copy entries from the old table. */
867 to_process = self->mt_used;
868 for (oldentry = oldtable; to_process > 0; oldentry++) {
869 if (oldentry->me_key != NULL) {
870 to_process--;
871 /* newentry is a pointer to a chunk of the new
872 mt_table, so we're setting the key:value pair
873 in-place. */
874 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
875 newentry->me_key = oldentry->me_key;
876 newentry->me_value = oldentry->me_value;
877 }
878 }
879
880 /* Deallocate the old table. */
881 PyMem_FREE(oldtable);
882 return 0;
883 }
884
885 /* Returns NULL on failure, a pointer to the value otherwise. */
886 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)887 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
888 {
889 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
890 if (entry->me_key == NULL)
891 return NULL;
892 return &entry->me_value;
893 }
894
895 /* Returns -1 on failure, 0 on success. */
896 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)897 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
898 {
899 PyMemoEntry *entry;
900
901 assert(key != NULL);
902
903 entry = _PyMemoTable_Lookup(self, key);
904 if (entry->me_key != NULL) {
905 entry->me_value = value;
906 return 0;
907 }
908 Py_INCREF(key);
909 entry->me_key = key;
910 entry->me_value = value;
911 self->mt_used++;
912
913 /* If we added a key, we can safely resize. Otherwise just return!
914 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
915 *
916 * Quadrupling the size improves average table sparseness
917 * (reducing collisions) at the cost of some memory. It also halves
918 * the number of expensive resize operations in a growing memo table.
919 *
920 * Very large memo tables (over 50K items) use doubling instead.
921 * This may help applications with severe memory constraints.
922 */
923 if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
924 return 0;
925 }
926 // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
927 size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
928 return _PyMemoTable_ResizeTable(self, desired_size);
929 }
930
931 #undef MT_MINSIZE
932 #undef PERTURB_SHIFT
933
934 /*************************************************************************/
935
936
937 static int
_Pickler_ClearBuffer(PicklerObject * self)938 _Pickler_ClearBuffer(PicklerObject *self)
939 {
940 Py_XSETREF(self->output_buffer,
941 PyBytes_FromStringAndSize(NULL, self->max_output_len));
942 if (self->output_buffer == NULL)
943 return -1;
944 self->output_len = 0;
945 self->frame_start = -1;
946 return 0;
947 }
948
949 static void
_write_size64(char * out,size_t value)950 _write_size64(char *out, size_t value)
951 {
952 size_t i;
953
954 Py_BUILD_ASSERT(sizeof(size_t) <= 8);
955
956 for (i = 0; i < sizeof(size_t); i++) {
957 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
958 }
959 for (i = sizeof(size_t); i < 8; i++) {
960 out[i] = 0;
961 }
962 }
963
964 static int
_Pickler_CommitFrame(PicklerObject * self)965 _Pickler_CommitFrame(PicklerObject *self)
966 {
967 size_t frame_len;
968 char *qdata;
969
970 if (!self->framing || self->frame_start == -1)
971 return 0;
972 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
973 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
974 if (frame_len >= FRAME_SIZE_MIN) {
975 qdata[0] = FRAME;
976 _write_size64(qdata + 1, frame_len);
977 }
978 else {
979 memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
980 self->output_len -= FRAME_HEADER_SIZE;
981 }
982 self->frame_start = -1;
983 return 0;
984 }
985
986 static PyObject *
_Pickler_GetString(PicklerObject * self)987 _Pickler_GetString(PicklerObject *self)
988 {
989 PyObject *output_buffer = self->output_buffer;
990
991 assert(self->output_buffer != NULL);
992
993 if (_Pickler_CommitFrame(self))
994 return NULL;
995
996 self->output_buffer = NULL;
997 /* Resize down to exact size */
998 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
999 return NULL;
1000 return output_buffer;
1001 }
1002
1003 static int
_Pickler_FlushToFile(PicklerObject * self)1004 _Pickler_FlushToFile(PicklerObject *self)
1005 {
1006 PyObject *output, *result;
1007
1008 assert(self->write != NULL);
1009
1010 /* This will commit the frame first */
1011 output = _Pickler_GetString(self);
1012 if (output == NULL)
1013 return -1;
1014
1015 result = _Pickle_FastCall(self->write, output);
1016 Py_XDECREF(result);
1017 return (result == NULL) ? -1 : 0;
1018 }
1019
1020 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1021 _Pickler_OpcodeBoundary(PicklerObject *self)
1022 {
1023 Py_ssize_t frame_len;
1024
1025 if (!self->framing || self->frame_start == -1) {
1026 return 0;
1027 }
1028 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1029 if (frame_len >= FRAME_SIZE_TARGET) {
1030 if(_Pickler_CommitFrame(self)) {
1031 return -1;
1032 }
1033 /* Flush the content of the committed frame to the underlying
1034 * file and reuse the pickler buffer for the next frame so as
1035 * to limit memory usage when dumping large complex objects to
1036 * a file.
1037 *
1038 * self->write is NULL when called via dumps.
1039 */
1040 if (self->write != NULL) {
1041 if (_Pickler_FlushToFile(self) < 0) {
1042 return -1;
1043 }
1044 if (_Pickler_ClearBuffer(self) < 0) {
1045 return -1;
1046 }
1047 }
1048 }
1049 return 0;
1050 }
1051
1052 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1053 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1054 {
1055 Py_ssize_t i, n, required;
1056 char *buffer;
1057 int need_new_frame;
1058
1059 assert(s != NULL);
1060 need_new_frame = (self->framing && self->frame_start == -1);
1061
1062 if (need_new_frame)
1063 n = data_len + FRAME_HEADER_SIZE;
1064 else
1065 n = data_len;
1066
1067 required = self->output_len + n;
1068 if (required > self->max_output_len) {
1069 /* Make place in buffer for the pickle chunk */
1070 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1071 PyErr_NoMemory();
1072 return -1;
1073 }
1074 self->max_output_len = (self->output_len + n) / 2 * 3;
1075 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1076 return -1;
1077 }
1078 buffer = PyBytes_AS_STRING(self->output_buffer);
1079 if (need_new_frame) {
1080 /* Setup new frame */
1081 Py_ssize_t frame_start = self->output_len;
1082 self->frame_start = frame_start;
1083 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1084 /* Write an invalid value, for debugging */
1085 buffer[frame_start + i] = 0xFE;
1086 }
1087 self->output_len += FRAME_HEADER_SIZE;
1088 }
1089 if (data_len < 8) {
1090 /* This is faster than memcpy when the string is short. */
1091 for (i = 0; i < data_len; i++) {
1092 buffer[self->output_len + i] = s[i];
1093 }
1094 }
1095 else {
1096 memcpy(buffer + self->output_len, s, data_len);
1097 }
1098 self->output_len += data_len;
1099 return data_len;
1100 }
1101
1102 static PicklerObject *
_Pickler_New(void)1103 _Pickler_New(void)
1104 {
1105 PicklerObject *self;
1106
1107 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1108 if (self == NULL)
1109 return NULL;
1110
1111 self->pers_func = NULL;
1112 self->dispatch_table = NULL;
1113 self->buffer_callback = NULL;
1114 self->write = NULL;
1115 self->proto = 0;
1116 self->bin = 0;
1117 self->framing = 0;
1118 self->frame_start = -1;
1119 self->fast = 0;
1120 self->fast_nesting = 0;
1121 self->fix_imports = 0;
1122 self->fast_memo = NULL;
1123 self->max_output_len = WRITE_BUF_SIZE;
1124 self->output_len = 0;
1125 self->reducer_override = NULL;
1126
1127 self->memo = PyMemoTable_New();
1128 self->output_buffer = PyBytes_FromStringAndSize(NULL,
1129 self->max_output_len);
1130
1131 if (self->memo == NULL || self->output_buffer == NULL) {
1132 Py_DECREF(self);
1133 return NULL;
1134 }
1135
1136 PyObject_GC_Track(self);
1137 return self;
1138 }
1139
1140 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1141 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1142 {
1143 long proto;
1144
1145 if (protocol == Py_None) {
1146 proto = DEFAULT_PROTOCOL;
1147 }
1148 else {
1149 proto = PyLong_AsLong(protocol);
1150 if (proto < 0) {
1151 if (proto == -1 && PyErr_Occurred())
1152 return -1;
1153 proto = HIGHEST_PROTOCOL;
1154 }
1155 else if (proto > HIGHEST_PROTOCOL) {
1156 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1157 HIGHEST_PROTOCOL);
1158 return -1;
1159 }
1160 }
1161 self->proto = (int)proto;
1162 self->bin = proto > 0;
1163 self->fix_imports = fix_imports && proto < 3;
1164 return 0;
1165 }
1166
1167 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1168 be called once on a freshly created Pickler. */
1169 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1170 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1171 {
1172 _Py_IDENTIFIER(write);
1173 assert(file != NULL);
1174 if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1175 return -1;
1176 }
1177 if (self->write == NULL) {
1178 PyErr_SetString(PyExc_TypeError,
1179 "file must have a 'write' attribute");
1180 return -1;
1181 }
1182
1183 return 0;
1184 }
1185
1186 static int
_Pickler_SetBufferCallback(PicklerObject * self,PyObject * buffer_callback)1187 _Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1188 {
1189 if (buffer_callback == Py_None) {
1190 buffer_callback = NULL;
1191 }
1192 if (buffer_callback != NULL && self->proto < 5) {
1193 PyErr_SetString(PyExc_ValueError,
1194 "buffer_callback needs protocol >= 5");
1195 return -1;
1196 }
1197
1198 Py_XINCREF(buffer_callback);
1199 self->buffer_callback = buffer_callback;
1200 return 0;
1201 }
1202
1203 /* Returns the size of the input on success, -1 on failure. This takes its
1204 own reference to `input`. */
1205 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1206 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1207 {
1208 if (self->buffer.buf != NULL)
1209 PyBuffer_Release(&self->buffer);
1210 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1211 return -1;
1212 self->input_buffer = self->buffer.buf;
1213 self->input_len = self->buffer.len;
1214 self->next_read_idx = 0;
1215 self->prefetched_idx = self->input_len;
1216 return self->input_len;
1217 }
1218
1219 static int
bad_readline(void)1220 bad_readline(void)
1221 {
1222 PickleState *st = _Pickle_GetGlobalState();
1223 PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1224 return -1;
1225 }
1226
1227 /* Skip any consumed data that was only prefetched using peek() */
1228 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1229 _Unpickler_SkipConsumed(UnpicklerObject *self)
1230 {
1231 Py_ssize_t consumed;
1232 PyObject *r;
1233
1234 consumed = self->next_read_idx - self->prefetched_idx;
1235 if (consumed <= 0)
1236 return 0;
1237
1238 assert(self->peek); /* otherwise we did something wrong */
1239 /* This makes a useless copy... */
1240 r = PyObject_CallFunction(self->read, "n", consumed);
1241 if (r == NULL)
1242 return -1;
1243 Py_DECREF(r);
1244
1245 self->prefetched_idx = self->next_read_idx;
1246 return 0;
1247 }
1248
1249 static const Py_ssize_t READ_WHOLE_LINE = -1;
1250
1251 /* If reading from a file, we need to only pull the bytes we need, since there
1252 may be multiple pickle objects arranged contiguously in the same input
1253 buffer.
1254
1255 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1256 bytes from the input stream/buffer.
1257
1258 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1259 failure; on success, returns the number of bytes read from the file.
1260
1261 On success, self->input_len will be 0; this is intentional so that when
1262 unpickling from a file, the "we've run out of data" code paths will trigger,
1263 causing the Unpickler to go back to the file for more data. Use the returned
1264 size to tell you how much data you can process. */
1265 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1266 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1267 {
1268 PyObject *data;
1269 Py_ssize_t read_size;
1270
1271 assert(self->read != NULL);
1272
1273 if (_Unpickler_SkipConsumed(self) < 0)
1274 return -1;
1275
1276 if (n == READ_WHOLE_LINE) {
1277 data = _PyObject_CallNoArg(self->readline);
1278 }
1279 else {
1280 PyObject *len;
1281 /* Prefetch some data without advancing the file pointer, if possible */
1282 if (self->peek && n < PREFETCH) {
1283 len = PyLong_FromSsize_t(PREFETCH);
1284 if (len == NULL)
1285 return -1;
1286 data = _Pickle_FastCall(self->peek, len);
1287 if (data == NULL) {
1288 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1289 return -1;
1290 /* peek() is probably not supported by the given file object */
1291 PyErr_Clear();
1292 Py_CLEAR(self->peek);
1293 }
1294 else {
1295 read_size = _Unpickler_SetStringInput(self, data);
1296 Py_DECREF(data);
1297 self->prefetched_idx = 0;
1298 if (n <= read_size)
1299 return n;
1300 }
1301 }
1302 len = PyLong_FromSsize_t(n);
1303 if (len == NULL)
1304 return -1;
1305 data = _Pickle_FastCall(self->read, len);
1306 }
1307 if (data == NULL)
1308 return -1;
1309
1310 read_size = _Unpickler_SetStringInput(self, data);
1311 Py_DECREF(data);
1312 return read_size;
1313 }
1314
1315 /* Don't call it directly: use _Unpickler_Read() */
1316 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1317 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1318 {
1319 Py_ssize_t num_read;
1320
1321 *s = NULL;
1322 if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1323 PickleState *st = _Pickle_GetGlobalState();
1324 PyErr_SetString(st->UnpicklingError,
1325 "read would overflow (invalid bytecode)");
1326 return -1;
1327 }
1328
1329 /* This case is handled by the _Unpickler_Read() macro for efficiency */
1330 assert(self->next_read_idx + n > self->input_len);
1331
1332 if (!self->read)
1333 return bad_readline();
1334
1335 /* Extend the buffer to satisfy desired size */
1336 num_read = _Unpickler_ReadFromFile(self, n);
1337 if (num_read < 0)
1338 return -1;
1339 if (num_read < n)
1340 return bad_readline();
1341 *s = self->input_buffer;
1342 self->next_read_idx = n;
1343 return n;
1344 }
1345
1346 /* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1347 *
1348 * This should only be used for non-small data reads where potentially
1349 * avoiding a copy is beneficial. This method does not try to prefetch
1350 * more data into the input buffer.
1351 *
1352 * _Unpickler_Read() is recommended in most cases.
1353 */
1354 static Py_ssize_t
_Unpickler_ReadInto(UnpicklerObject * self,char * buf,Py_ssize_t n)1355 _Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1356 {
1357 assert(n != READ_WHOLE_LINE);
1358
1359 /* Read from available buffer data, if any */
1360 Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1361 if (in_buffer > 0) {
1362 Py_ssize_t to_read = Py_MIN(in_buffer, n);
1363 memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1364 self->next_read_idx += to_read;
1365 buf += to_read;
1366 n -= to_read;
1367 if (n == 0) {
1368 /* Entire read was satisfied from buffer */
1369 return n;
1370 }
1371 }
1372
1373 /* Read from file */
1374 if (!self->read) {
1375 /* We're unpickling memory, this means the input is truncated */
1376 return bad_readline();
1377 }
1378 if (_Unpickler_SkipConsumed(self) < 0) {
1379 return -1;
1380 }
1381
1382 if (!self->readinto) {
1383 /* readinto() not supported on file-like object, fall back to read()
1384 * and copy into destination buffer (bpo-39681) */
1385 PyObject* len = PyLong_FromSsize_t(n);
1386 if (len == NULL) {
1387 return -1;
1388 }
1389 PyObject* data = _Pickle_FastCall(self->read, len);
1390 if (data == NULL) {
1391 return -1;
1392 }
1393 if (!PyBytes_Check(data)) {
1394 PyErr_Format(PyExc_ValueError,
1395 "read() returned non-bytes object (%R)",
1396 Py_TYPE(data));
1397 Py_DECREF(data);
1398 return -1;
1399 }
1400 Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1401 if (read_size < n) {
1402 Py_DECREF(data);
1403 return bad_readline();
1404 }
1405 memcpy(buf, PyBytes_AS_STRING(data), n);
1406 Py_DECREF(data);
1407 return n;
1408 }
1409
1410 /* Call readinto() into user buffer */
1411 PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1412 if (buf_obj == NULL) {
1413 return -1;
1414 }
1415 PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1416 if (read_size_obj == NULL) {
1417 return -1;
1418 }
1419 Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1420 Py_DECREF(read_size_obj);
1421
1422 if (read_size < 0) {
1423 if (!PyErr_Occurred()) {
1424 PyErr_SetString(PyExc_ValueError,
1425 "readinto() returned negative size");
1426 }
1427 return -1;
1428 }
1429 if (read_size < n) {
1430 return bad_readline();
1431 }
1432 return n;
1433 }
1434
1435 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1436
1437 This should be used for all data reads, rather than accessing the unpickler's
1438 input buffer directly. This method deals correctly with reading from input
1439 streams, which the input buffer doesn't deal with.
1440
1441 Note that when reading from a file-like object, self->next_read_idx won't
1442 be updated (it should remain at 0 for the entire unpickling process). You
1443 should use this function's return value to know how many bytes you can
1444 consume.
1445
1446 Returns -1 (with an exception set) on failure. On success, return the
1447 number of chars read. */
1448 #define _Unpickler_Read(self, s, n) \
1449 (((n) <= (self)->input_len - (self)->next_read_idx) \
1450 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1451 (self)->next_read_idx += (n), \
1452 (n)) \
1453 : _Unpickler_ReadImpl(self, (s), (n)))
1454
1455 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1456 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1457 char **result)
1458 {
1459 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1460 if (input_line == NULL) {
1461 PyErr_NoMemory();
1462 return -1;
1463 }
1464
1465 memcpy(input_line, line, len);
1466 input_line[len] = '\0';
1467 self->input_line = input_line;
1468 *result = self->input_line;
1469 return len;
1470 }
1471
1472 /* Read a line from the input stream/buffer. If we run off the end of the input
1473 before hitting \n, raise an error.
1474
1475 Returns the number of chars read, or -1 on failure. */
1476 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1477 _Unpickler_Readline(UnpicklerObject *self, char **result)
1478 {
1479 Py_ssize_t i, num_read;
1480
1481 for (i = self->next_read_idx; i < self->input_len; i++) {
1482 if (self->input_buffer[i] == '\n') {
1483 char *line_start = self->input_buffer + self->next_read_idx;
1484 num_read = i - self->next_read_idx + 1;
1485 self->next_read_idx = i + 1;
1486 return _Unpickler_CopyLine(self, line_start, num_read, result);
1487 }
1488 }
1489 if (!self->read)
1490 return bad_readline();
1491
1492 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1493 if (num_read < 0)
1494 return -1;
1495 if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1496 return bad_readline();
1497 self->next_read_idx = num_read;
1498 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1499 }
1500
1501 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1502 will be modified in place. */
1503 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1504 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1505 {
1506 size_t i;
1507
1508 assert(new_size > self->memo_size);
1509
1510 PyObject **memo_new = self->memo;
1511 PyMem_RESIZE(memo_new, PyObject *, new_size);
1512 if (memo_new == NULL) {
1513 PyErr_NoMemory();
1514 return -1;
1515 }
1516 self->memo = memo_new;
1517 for (i = self->memo_size; i < new_size; i++)
1518 self->memo[i] = NULL;
1519 self->memo_size = new_size;
1520 return 0;
1521 }
1522
1523 /* Returns NULL if idx is out of bounds. */
1524 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1525 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1526 {
1527 if (idx >= self->memo_size)
1528 return NULL;
1529
1530 return self->memo[idx];
1531 }
1532
1533 /* Returns -1 (with an exception set) on failure, 0 on success.
1534 This takes its own reference to `value`. */
1535 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1536 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1537 {
1538 PyObject *old_item;
1539
1540 if (idx >= self->memo_size) {
1541 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1542 return -1;
1543 assert(idx < self->memo_size);
1544 }
1545 Py_INCREF(value);
1546 old_item = self->memo[idx];
1547 self->memo[idx] = value;
1548 if (old_item != NULL) {
1549 Py_DECREF(old_item);
1550 }
1551 else {
1552 self->memo_len++;
1553 }
1554 return 0;
1555 }
1556
1557 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1558 _Unpickler_NewMemo(Py_ssize_t new_size)
1559 {
1560 PyObject **memo = PyMem_NEW(PyObject *, new_size);
1561 if (memo == NULL) {
1562 PyErr_NoMemory();
1563 return NULL;
1564 }
1565 memset(memo, 0, new_size * sizeof(PyObject *));
1566 return memo;
1567 }
1568
1569 /* Free the unpickler's memo, taking care to decref any items left in it. */
1570 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1571 _Unpickler_MemoCleanup(UnpicklerObject *self)
1572 {
1573 Py_ssize_t i;
1574 PyObject **memo = self->memo;
1575
1576 if (self->memo == NULL)
1577 return;
1578 self->memo = NULL;
1579 i = self->memo_size;
1580 while (--i >= 0) {
1581 Py_XDECREF(memo[i]);
1582 }
1583 PyMem_FREE(memo);
1584 }
1585
1586 static UnpicklerObject *
_Unpickler_New(void)1587 _Unpickler_New(void)
1588 {
1589 UnpicklerObject *self;
1590
1591 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1592 if (self == NULL)
1593 return NULL;
1594
1595 self->pers_func = NULL;
1596 self->input_buffer = NULL;
1597 self->input_line = NULL;
1598 self->input_len = 0;
1599 self->next_read_idx = 0;
1600 self->prefetched_idx = 0;
1601 self->read = NULL;
1602 self->readinto = NULL;
1603 self->readline = NULL;
1604 self->peek = NULL;
1605 self->buffers = NULL;
1606 self->encoding = NULL;
1607 self->errors = NULL;
1608 self->marks = NULL;
1609 self->num_marks = 0;
1610 self->marks_size = 0;
1611 self->proto = 0;
1612 self->fix_imports = 0;
1613 memset(&self->buffer, 0, sizeof(Py_buffer));
1614 self->memo_size = 32;
1615 self->memo_len = 0;
1616 self->memo = _Unpickler_NewMemo(self->memo_size);
1617 self->stack = (Pdata *)Pdata_New();
1618
1619 if (self->memo == NULL || self->stack == NULL) {
1620 Py_DECREF(self);
1621 return NULL;
1622 }
1623
1624 PyObject_GC_Track(self);
1625 return self;
1626 }
1627
1628 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1629 be called once on a freshly created Unpickler. */
1630 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1631 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1632 {
1633 _Py_IDENTIFIER(peek);
1634 _Py_IDENTIFIER(read);
1635 _Py_IDENTIFIER(readinto);
1636 _Py_IDENTIFIER(readline);
1637
1638 /* Optional file methods */
1639 if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1640 return -1;
1641 }
1642 if (_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto) < 0) {
1643 return -1;
1644 }
1645 (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1646 (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
1647 if (!self->readline || !self->read) {
1648 if (!PyErr_Occurred()) {
1649 PyErr_SetString(PyExc_TypeError,
1650 "file must have 'read' and 'readline' attributes");
1651 }
1652 Py_CLEAR(self->read);
1653 Py_CLEAR(self->readinto);
1654 Py_CLEAR(self->readline);
1655 Py_CLEAR(self->peek);
1656 return -1;
1657 }
1658 return 0;
1659 }
1660
1661 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1662 be called once on a freshly created Unpickler. */
1663 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1664 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1665 const char *encoding,
1666 const char *errors)
1667 {
1668 if (encoding == NULL)
1669 encoding = "ASCII";
1670 if (errors == NULL)
1671 errors = "strict";
1672
1673 self->encoding = _PyMem_Strdup(encoding);
1674 self->errors = _PyMem_Strdup(errors);
1675 if (self->encoding == NULL || self->errors == NULL) {
1676 PyErr_NoMemory();
1677 return -1;
1678 }
1679 return 0;
1680 }
1681
1682 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1683 be called once on a freshly created Unpickler. */
1684 static int
_Unpickler_SetBuffers(UnpicklerObject * self,PyObject * buffers)1685 _Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1686 {
1687 if (buffers == NULL || buffers == Py_None) {
1688 self->buffers = NULL;
1689 }
1690 else {
1691 self->buffers = PyObject_GetIter(buffers);
1692 if (self->buffers == NULL) {
1693 return -1;
1694 }
1695 }
1696 return 0;
1697 }
1698
1699 /* Generate a GET opcode for an object stored in the memo. */
1700 static int
memo_get(PicklerObject * self,PyObject * key)1701 memo_get(PicklerObject *self, PyObject *key)
1702 {
1703 Py_ssize_t *value;
1704 char pdata[30];
1705 Py_ssize_t len;
1706
1707 value = PyMemoTable_Get(self->memo, key);
1708 if (value == NULL) {
1709 PyErr_SetObject(PyExc_KeyError, key);
1710 return -1;
1711 }
1712
1713 if (!self->bin) {
1714 pdata[0] = GET;
1715 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1716 "%" PY_FORMAT_SIZE_T "d\n", *value);
1717 len = strlen(pdata);
1718 }
1719 else {
1720 if (*value < 256) {
1721 pdata[0] = BINGET;
1722 pdata[1] = (unsigned char)(*value & 0xff);
1723 len = 2;
1724 }
1725 else if ((size_t)*value <= 0xffffffffUL) {
1726 pdata[0] = LONG_BINGET;
1727 pdata[1] = (unsigned char)(*value & 0xff);
1728 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1729 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1730 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1731 len = 5;
1732 }
1733 else { /* unlikely */
1734 PickleState *st = _Pickle_GetGlobalState();
1735 PyErr_SetString(st->PicklingError,
1736 "memo id too large for LONG_BINGET");
1737 return -1;
1738 }
1739 }
1740
1741 if (_Pickler_Write(self, pdata, len) < 0)
1742 return -1;
1743
1744 return 0;
1745 }
1746
1747 /* Store an object in the memo, assign it a new unique ID based on the number
1748 of objects currently stored in the memo and generate a PUT opcode. */
1749 static int
memo_put(PicklerObject * self,PyObject * obj)1750 memo_put(PicklerObject *self, PyObject *obj)
1751 {
1752 char pdata[30];
1753 Py_ssize_t len;
1754 Py_ssize_t idx;
1755
1756 const char memoize_op = MEMOIZE;
1757
1758 if (self->fast)
1759 return 0;
1760
1761 idx = PyMemoTable_Size(self->memo);
1762 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1763 return -1;
1764
1765 if (self->proto >= 4) {
1766 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1767 return -1;
1768 return 0;
1769 }
1770 else if (!self->bin) {
1771 pdata[0] = PUT;
1772 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1773 "%" PY_FORMAT_SIZE_T "d\n", idx);
1774 len = strlen(pdata);
1775 }
1776 else {
1777 if (idx < 256) {
1778 pdata[0] = BINPUT;
1779 pdata[1] = (unsigned char)idx;
1780 len = 2;
1781 }
1782 else if ((size_t)idx <= 0xffffffffUL) {
1783 pdata[0] = LONG_BINPUT;
1784 pdata[1] = (unsigned char)(idx & 0xff);
1785 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1786 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1787 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1788 len = 5;
1789 }
1790 else { /* unlikely */
1791 PickleState *st = _Pickle_GetGlobalState();
1792 PyErr_SetString(st->PicklingError,
1793 "memo id too large for LONG_BINPUT");
1794 return -1;
1795 }
1796 }
1797 if (_Pickler_Write(self, pdata, len) < 0)
1798 return -1;
1799
1800 return 0;
1801 }
1802
1803 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1804 get_dotted_path(PyObject *obj, PyObject *name)
1805 {
1806 _Py_static_string(PyId_dot, ".");
1807 PyObject *dotted_path;
1808 Py_ssize_t i, n;
1809
1810 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1811 if (dotted_path == NULL)
1812 return NULL;
1813 n = PyList_GET_SIZE(dotted_path);
1814 assert(n >= 1);
1815 for (i = 0; i < n; i++) {
1816 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1817 if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1818 if (obj == NULL)
1819 PyErr_Format(PyExc_AttributeError,
1820 "Can't pickle local object %R", name);
1821 else
1822 PyErr_Format(PyExc_AttributeError,
1823 "Can't pickle local attribute %R on %R", name, obj);
1824 Py_DECREF(dotted_path);
1825 return NULL;
1826 }
1827 }
1828 return dotted_path;
1829 }
1830
1831 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1832 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1833 {
1834 Py_ssize_t i, n;
1835 PyObject *parent = NULL;
1836
1837 assert(PyList_CheckExact(names));
1838 Py_INCREF(obj);
1839 n = PyList_GET_SIZE(names);
1840 for (i = 0; i < n; i++) {
1841 PyObject *name = PyList_GET_ITEM(names, i);
1842 Py_XDECREF(parent);
1843 parent = obj;
1844 (void)_PyObject_LookupAttr(parent, name, &obj);
1845 if (obj == NULL) {
1846 Py_DECREF(parent);
1847 return NULL;
1848 }
1849 }
1850 if (pparent != NULL)
1851 *pparent = parent;
1852 else
1853 Py_XDECREF(parent);
1854 return obj;
1855 }
1856
1857
1858 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1859 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1860 {
1861 PyObject *dotted_path, *attr;
1862
1863 if (allow_qualname) {
1864 dotted_path = get_dotted_path(obj, name);
1865 if (dotted_path == NULL)
1866 return NULL;
1867 attr = get_deep_attribute(obj, dotted_path, NULL);
1868 Py_DECREF(dotted_path);
1869 }
1870 else {
1871 (void)_PyObject_LookupAttr(obj, name, &attr);
1872 }
1873 if (attr == NULL && !PyErr_Occurred()) {
1874 PyErr_Format(PyExc_AttributeError,
1875 "Can't get attribute %R on %R", name, obj);
1876 }
1877 return attr;
1878 }
1879
1880 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1881 _checkmodule(PyObject *module_name, PyObject *module,
1882 PyObject *global, PyObject *dotted_path)
1883 {
1884 if (module == Py_None) {
1885 return -1;
1886 }
1887 if (PyUnicode_Check(module_name) &&
1888 _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1889 return -1;
1890 }
1891
1892 PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1893 if (candidate == NULL) {
1894 return -1;
1895 }
1896 if (candidate != global) {
1897 Py_DECREF(candidate);
1898 return -1;
1899 }
1900 Py_DECREF(candidate);
1901 return 0;
1902 }
1903
1904 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1905 whichmodule(PyObject *global, PyObject *dotted_path)
1906 {
1907 PyObject *module_name;
1908 PyObject *module = NULL;
1909 Py_ssize_t i;
1910 PyObject *modules;
1911 _Py_IDENTIFIER(__module__);
1912 _Py_IDENTIFIER(modules);
1913 _Py_IDENTIFIER(__main__);
1914
1915 if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1916 return NULL;
1917 }
1918 if (module_name) {
1919 /* In some rare cases (e.g., bound methods of extension types),
1920 __module__ can be None. If it is so, then search sys.modules for
1921 the module of global. */
1922 if (module_name != Py_None)
1923 return module_name;
1924 Py_CLEAR(module_name);
1925 }
1926 assert(module_name == NULL);
1927
1928 /* Fallback on walking sys.modules */
1929 modules = _PySys_GetObjectId(&PyId_modules);
1930 if (modules == NULL) {
1931 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1932 return NULL;
1933 }
1934 if (PyDict_CheckExact(modules)) {
1935 i = 0;
1936 while (PyDict_Next(modules, &i, &module_name, &module)) {
1937 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1938 Py_INCREF(module_name);
1939 return module_name;
1940 }
1941 if (PyErr_Occurred()) {
1942 return NULL;
1943 }
1944 }
1945 }
1946 else {
1947 PyObject *iterator = PyObject_GetIter(modules);
1948 if (iterator == NULL) {
1949 return NULL;
1950 }
1951 while ((module_name = PyIter_Next(iterator))) {
1952 module = PyObject_GetItem(modules, module_name);
1953 if (module == NULL) {
1954 Py_DECREF(module_name);
1955 Py_DECREF(iterator);
1956 return NULL;
1957 }
1958 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1959 Py_DECREF(module);
1960 Py_DECREF(iterator);
1961 return module_name;
1962 }
1963 Py_DECREF(module);
1964 Py_DECREF(module_name);
1965 if (PyErr_Occurred()) {
1966 Py_DECREF(iterator);
1967 return NULL;
1968 }
1969 }
1970 Py_DECREF(iterator);
1971 }
1972
1973 /* If no module is found, use __main__. */
1974 module_name = _PyUnicode_FromId(&PyId___main__);
1975 Py_XINCREF(module_name);
1976 return module_name;
1977 }
1978
1979 /* fast_save_enter() and fast_save_leave() are guards against recursive
1980 objects when Pickler is used with the "fast mode" (i.e., with object
1981 memoization disabled). If the nesting of a list or dict object exceed
1982 FAST_NESTING_LIMIT, these guards will start keeping an internal
1983 reference to the seen list or dict objects and check whether these objects
1984 are recursive. These are not strictly necessary, since save() has a
1985 hard-coded recursion limit, but they give a nicer error message than the
1986 typical RuntimeError. */
1987 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1988 fast_save_enter(PicklerObject *self, PyObject *obj)
1989 {
1990 /* if fast_nesting < 0, we're doing an error exit. */
1991 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1992 PyObject *key = NULL;
1993 if (self->fast_memo == NULL) {
1994 self->fast_memo = PyDict_New();
1995 if (self->fast_memo == NULL) {
1996 self->fast_nesting = -1;
1997 return 0;
1998 }
1999 }
2000 key = PyLong_FromVoidPtr(obj);
2001 if (key == NULL) {
2002 self->fast_nesting = -1;
2003 return 0;
2004 }
2005 if (PyDict_GetItemWithError(self->fast_memo, key)) {
2006 Py_DECREF(key);
2007 PyErr_Format(PyExc_ValueError,
2008 "fast mode: can't pickle cyclic objects "
2009 "including object type %.200s at %p",
2010 obj->ob_type->tp_name, obj);
2011 self->fast_nesting = -1;
2012 return 0;
2013 }
2014 if (PyErr_Occurred()) {
2015 Py_DECREF(key);
2016 self->fast_nesting = -1;
2017 return 0;
2018 }
2019 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
2020 Py_DECREF(key);
2021 self->fast_nesting = -1;
2022 return 0;
2023 }
2024 Py_DECREF(key);
2025 }
2026 return 1;
2027 }
2028
2029 static int
fast_save_leave(PicklerObject * self,PyObject * obj)2030 fast_save_leave(PicklerObject *self, PyObject *obj)
2031 {
2032 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2033 PyObject *key = PyLong_FromVoidPtr(obj);
2034 if (key == NULL)
2035 return 0;
2036 if (PyDict_DelItem(self->fast_memo, key) < 0) {
2037 Py_DECREF(key);
2038 return 0;
2039 }
2040 Py_DECREF(key);
2041 }
2042 return 1;
2043 }
2044
2045 static int
save_none(PicklerObject * self,PyObject * obj)2046 save_none(PicklerObject *self, PyObject *obj)
2047 {
2048 const char none_op = NONE;
2049 if (_Pickler_Write(self, &none_op, 1) < 0)
2050 return -1;
2051
2052 return 0;
2053 }
2054
2055 static int
save_bool(PicklerObject * self,PyObject * obj)2056 save_bool(PicklerObject *self, PyObject *obj)
2057 {
2058 if (self->proto >= 2) {
2059 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
2060 if (_Pickler_Write(self, &bool_op, 1) < 0)
2061 return -1;
2062 }
2063 else {
2064 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2065 * so that unpicklers written before bools were introduced unpickle them
2066 * as ints, but unpicklers after can recognize that bools were intended.
2067 * Note that protocol 2 added direct ways to pickle bools.
2068 */
2069 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2070 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2071 return -1;
2072 }
2073 return 0;
2074 }
2075
2076 static int
save_long(PicklerObject * self,PyObject * obj)2077 save_long(PicklerObject *self, PyObject *obj)
2078 {
2079 PyObject *repr = NULL;
2080 Py_ssize_t size;
2081 long val;
2082 int overflow;
2083 int status = 0;
2084
2085 val= PyLong_AsLongAndOverflow(obj, &overflow);
2086 if (!overflow && (sizeof(long) <= 4 ||
2087 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2088 {
2089 /* result fits in a signed 4-byte integer.
2090
2091 Note: we can't use -0x80000000L in the above condition because some
2092 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2093 before applying the unary minus when sizeof(long) <= 4. The
2094 resulting value stays unsigned which is commonly not what we want,
2095 so MSVC happily warns us about it. However, that result would have
2096 been fine because we guard for sizeof(long) <= 4 which turns the
2097 condition true in that particular case. */
2098 char pdata[32];
2099 Py_ssize_t len = 0;
2100
2101 if (self->bin) {
2102 pdata[1] = (unsigned char)(val & 0xff);
2103 pdata[2] = (unsigned char)((val >> 8) & 0xff);
2104 pdata[3] = (unsigned char)((val >> 16) & 0xff);
2105 pdata[4] = (unsigned char)((val >> 24) & 0xff);
2106
2107 if ((pdata[4] != 0) || (pdata[3] != 0)) {
2108 pdata[0] = BININT;
2109 len = 5;
2110 }
2111 else if (pdata[2] != 0) {
2112 pdata[0] = BININT2;
2113 len = 3;
2114 }
2115 else {
2116 pdata[0] = BININT1;
2117 len = 2;
2118 }
2119 }
2120 else {
2121 sprintf(pdata, "%c%ld\n", INT, val);
2122 len = strlen(pdata);
2123 }
2124 if (_Pickler_Write(self, pdata, len) < 0)
2125 return -1;
2126
2127 return 0;
2128 }
2129 assert(!PyErr_Occurred());
2130
2131 if (self->proto >= 2) {
2132 /* Linear-time pickling. */
2133 size_t nbits;
2134 size_t nbytes;
2135 unsigned char *pdata;
2136 char header[5];
2137 int i;
2138 int sign = _PyLong_Sign(obj);
2139
2140 if (sign == 0) {
2141 header[0] = LONG1;
2142 header[1] = 0; /* It's 0 -- an empty bytestring. */
2143 if (_Pickler_Write(self, header, 2) < 0)
2144 goto error;
2145 return 0;
2146 }
2147 nbits = _PyLong_NumBits(obj);
2148 if (nbits == (size_t)-1 && PyErr_Occurred())
2149 goto error;
2150 /* How many bytes do we need? There are nbits >> 3 full
2151 * bytes of data, and nbits & 7 leftover bits. If there
2152 * are any leftover bits, then we clearly need another
2153 * byte. What's not so obvious is that we *probably*
2154 * need another byte even if there aren't any leftovers:
2155 * the most-significant bit of the most-significant byte
2156 * acts like a sign bit, and it's usually got a sense
2157 * opposite of the one we need. The exception is ints
2158 * of the form -(2**(8*j-1)) for j > 0. Such an int is
2159 * its own 256's-complement, so has the right sign bit
2160 * even without the extra byte. That's a pain to check
2161 * for in advance, though, so we always grab an extra
2162 * byte at the start, and cut it back later if possible.
2163 */
2164 nbytes = (nbits >> 3) + 1;
2165 if (nbytes > 0x7fffffffL) {
2166 PyErr_SetString(PyExc_OverflowError,
2167 "int too large to pickle");
2168 goto error;
2169 }
2170 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2171 if (repr == NULL)
2172 goto error;
2173 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2174 i = _PyLong_AsByteArray((PyLongObject *)obj,
2175 pdata, nbytes,
2176 1 /* little endian */ , 1 /* signed */ );
2177 if (i < 0)
2178 goto error;
2179 /* If the int is negative, this may be a byte more than
2180 * needed. This is so iff the MSB is all redundant sign
2181 * bits.
2182 */
2183 if (sign < 0 &&
2184 nbytes > 1 &&
2185 pdata[nbytes - 1] == 0xff &&
2186 (pdata[nbytes - 2] & 0x80) != 0) {
2187 nbytes--;
2188 }
2189
2190 if (nbytes < 256) {
2191 header[0] = LONG1;
2192 header[1] = (unsigned char)nbytes;
2193 size = 2;
2194 }
2195 else {
2196 header[0] = LONG4;
2197 size = (Py_ssize_t) nbytes;
2198 for (i = 1; i < 5; i++) {
2199 header[i] = (unsigned char)(size & 0xff);
2200 size >>= 8;
2201 }
2202 size = 5;
2203 }
2204 if (_Pickler_Write(self, header, size) < 0 ||
2205 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2206 goto error;
2207 }
2208 else {
2209 const char long_op = LONG;
2210 const char *string;
2211
2212 /* proto < 2: write the repr and newline. This is quadratic-time (in
2213 the number of digits), in both directions. We add a trailing 'L'
2214 to the repr, for compatibility with Python 2.x. */
2215
2216 repr = PyObject_Repr(obj);
2217 if (repr == NULL)
2218 goto error;
2219
2220 string = PyUnicode_AsUTF8AndSize(repr, &size);
2221 if (string == NULL)
2222 goto error;
2223
2224 if (_Pickler_Write(self, &long_op, 1) < 0 ||
2225 _Pickler_Write(self, string, size) < 0 ||
2226 _Pickler_Write(self, "L\n", 2) < 0)
2227 goto error;
2228 }
2229
2230 if (0) {
2231 error:
2232 status = -1;
2233 }
2234 Py_XDECREF(repr);
2235
2236 return status;
2237 }
2238
2239 static int
save_float(PicklerObject * self,PyObject * obj)2240 save_float(PicklerObject *self, PyObject *obj)
2241 {
2242 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2243
2244 if (self->bin) {
2245 char pdata[9];
2246 pdata[0] = BINFLOAT;
2247 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2248 return -1;
2249 if (_Pickler_Write(self, pdata, 9) < 0)
2250 return -1;
2251 }
2252 else {
2253 int result = -1;
2254 char *buf = NULL;
2255 char op = FLOAT;
2256
2257 if (_Pickler_Write(self, &op, 1) < 0)
2258 goto done;
2259
2260 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2261 if (!buf) {
2262 PyErr_NoMemory();
2263 goto done;
2264 }
2265
2266 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2267 goto done;
2268
2269 if (_Pickler_Write(self, "\n", 1) < 0)
2270 goto done;
2271
2272 result = 0;
2273 done:
2274 PyMem_Free(buf);
2275 return result;
2276 }
2277
2278 return 0;
2279 }
2280
2281 /* Perform direct write of the header and payload of the binary object.
2282
2283 The large contiguous data is written directly into the underlying file
2284 object, bypassing the output_buffer of the Pickler. We intentionally
2285 do not insert a protocol 4 frame opcode to make it possible to optimize
2286 file.read calls in the loader.
2287 */
2288 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2289 _Pickler_write_bytes(PicklerObject *self,
2290 const char *header, Py_ssize_t header_size,
2291 const char *data, Py_ssize_t data_size,
2292 PyObject *payload)
2293 {
2294 int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2295 int framing = self->framing;
2296
2297 if (bypass_buffer) {
2298 assert(self->output_buffer != NULL);
2299 /* Commit the previous frame. */
2300 if (_Pickler_CommitFrame(self)) {
2301 return -1;
2302 }
2303 /* Disable framing temporarily */
2304 self->framing = 0;
2305 }
2306
2307 if (_Pickler_Write(self, header, header_size) < 0) {
2308 return -1;
2309 }
2310
2311 if (bypass_buffer && self->write != NULL) {
2312 /* Bypass the in-memory buffer to directly stream large data
2313 into the underlying file object. */
2314 PyObject *result, *mem = NULL;
2315 /* Dump the output buffer to the file. */
2316 if (_Pickler_FlushToFile(self) < 0) {
2317 return -1;
2318 }
2319
2320 /* Stream write the payload into the file without going through the
2321 output buffer. */
2322 if (payload == NULL) {
2323 /* TODO: It would be better to use a memoryview with a linked
2324 original string if this is possible. */
2325 payload = mem = PyBytes_FromStringAndSize(data, data_size);
2326 if (payload == NULL) {
2327 return -1;
2328 }
2329 }
2330 result = PyObject_CallFunctionObjArgs(self->write, payload, NULL);
2331 Py_XDECREF(mem);
2332 if (result == NULL) {
2333 return -1;
2334 }
2335 Py_DECREF(result);
2336
2337 /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2338 if (_Pickler_ClearBuffer(self) < 0) {
2339 return -1;
2340 }
2341 }
2342 else {
2343 if (_Pickler_Write(self, data, data_size) < 0) {
2344 return -1;
2345 }
2346 }
2347
2348 /* Re-enable framing for subsequent calls to _Pickler_Write. */
2349 self->framing = framing;
2350
2351 return 0;
2352 }
2353
2354 static int
_save_bytes_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2355 _save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2356 Py_ssize_t size)
2357 {
2358 assert(self->proto >= 3);
2359
2360 char header[9];
2361 Py_ssize_t len;
2362
2363 if (size < 0)
2364 return -1;
2365
2366 if (size <= 0xff) {
2367 header[0] = SHORT_BINBYTES;
2368 header[1] = (unsigned char)size;
2369 len = 2;
2370 }
2371 else if ((size_t)size <= 0xffffffffUL) {
2372 header[0] = BINBYTES;
2373 header[1] = (unsigned char)(size & 0xff);
2374 header[2] = (unsigned char)((size >> 8) & 0xff);
2375 header[3] = (unsigned char)((size >> 16) & 0xff);
2376 header[4] = (unsigned char)((size >> 24) & 0xff);
2377 len = 5;
2378 }
2379 else if (self->proto >= 4) {
2380 header[0] = BINBYTES8;
2381 _write_size64(header + 1, size);
2382 len = 9;
2383 }
2384 else {
2385 PyErr_SetString(PyExc_OverflowError,
2386 "serializing a bytes object larger than 4 GiB "
2387 "requires pickle protocol 4 or higher");
2388 return -1;
2389 }
2390
2391 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2392 return -1;
2393 }
2394
2395 if (memo_put(self, obj) < 0) {
2396 return -1;
2397 }
2398
2399 return 0;
2400 }
2401
2402 static int
save_bytes(PicklerObject * self,PyObject * obj)2403 save_bytes(PicklerObject *self, PyObject *obj)
2404 {
2405 if (self->proto < 3) {
2406 /* Older pickle protocols do not have an opcode for pickling bytes
2407 objects. Therefore, we need to fake the copy protocol (i.e.,
2408 the __reduce__ method) to permit bytes object unpickling.
2409
2410 Here we use a hack to be compatible with Python 2. Since in Python
2411 2 'bytes' is just an alias for 'str' (which has different
2412 parameters than the actual bytes object), we use codecs.encode
2413 to create the appropriate 'str' object when unpickled using
2414 Python 2 *and* the appropriate 'bytes' object when unpickled
2415 using Python 3. Again this is a hack and we don't need to do this
2416 with newer protocols. */
2417 PyObject *reduce_value;
2418 int status;
2419
2420 if (PyBytes_GET_SIZE(obj) == 0) {
2421 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2422 }
2423 else {
2424 PickleState *st = _Pickle_GetGlobalState();
2425 PyObject *unicode_str =
2426 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2427 PyBytes_GET_SIZE(obj),
2428 "strict");
2429 _Py_IDENTIFIER(latin1);
2430
2431 if (unicode_str == NULL)
2432 return -1;
2433 reduce_value = Py_BuildValue("(O(OO))",
2434 st->codecs_encode, unicode_str,
2435 _PyUnicode_FromId(&PyId_latin1));
2436 Py_DECREF(unicode_str);
2437 }
2438
2439 if (reduce_value == NULL)
2440 return -1;
2441
2442 /* save_reduce() will memoize the object automatically. */
2443 status = save_reduce(self, reduce_value, obj);
2444 Py_DECREF(reduce_value);
2445 return status;
2446 }
2447 else {
2448 return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2449 PyBytes_GET_SIZE(obj));
2450 }
2451 }
2452
2453 static int
_save_bytearray_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2454 _save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2455 Py_ssize_t size)
2456 {
2457 assert(self->proto >= 5);
2458
2459 char header[9];
2460 Py_ssize_t len;
2461
2462 if (size < 0)
2463 return -1;
2464
2465 header[0] = BYTEARRAY8;
2466 _write_size64(header + 1, size);
2467 len = 9;
2468
2469 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2470 return -1;
2471 }
2472
2473 if (memo_put(self, obj) < 0) {
2474 return -1;
2475 }
2476
2477 return 0;
2478 }
2479
2480 static int
save_bytearray(PicklerObject * self,PyObject * obj)2481 save_bytearray(PicklerObject *self, PyObject *obj)
2482 {
2483 if (self->proto < 5) {
2484 /* Older pickle protocols do not have an opcode for pickling
2485 * bytearrays. */
2486 PyObject *reduce_value = NULL;
2487 int status;
2488
2489 if (PyByteArray_GET_SIZE(obj) == 0) {
2490 reduce_value = Py_BuildValue("(O())",
2491 (PyObject *) &PyByteArray_Type);
2492 }
2493 else {
2494 PyObject *bytes_obj = PyBytes_FromObject(obj);
2495 if (bytes_obj != NULL) {
2496 reduce_value = Py_BuildValue("(O(O))",
2497 (PyObject *) &PyByteArray_Type,
2498 bytes_obj);
2499 Py_DECREF(bytes_obj);
2500 }
2501 }
2502 if (reduce_value == NULL)
2503 return -1;
2504
2505 /* save_reduce() will memoize the object automatically. */
2506 status = save_reduce(self, reduce_value, obj);
2507 Py_DECREF(reduce_value);
2508 return status;
2509 }
2510 else {
2511 return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2512 PyByteArray_GET_SIZE(obj));
2513 }
2514 }
2515
2516 static int
save_picklebuffer(PicklerObject * self,PyObject * obj)2517 save_picklebuffer(PicklerObject *self, PyObject *obj)
2518 {
2519 if (self->proto < 5) {
2520 PickleState *st = _Pickle_GetGlobalState();
2521 PyErr_SetString(st->PicklingError,
2522 "PickleBuffer can only pickled with protocol >= 5");
2523 return -1;
2524 }
2525 const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2526 if (view == NULL) {
2527 return -1;
2528 }
2529 if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2530 PickleState *st = _Pickle_GetGlobalState();
2531 PyErr_SetString(st->PicklingError,
2532 "PickleBuffer can not be pickled when "
2533 "pointing to a non-contiguous buffer");
2534 return -1;
2535 }
2536 int in_band = 1;
2537 if (self->buffer_callback != NULL) {
2538 PyObject *ret = PyObject_CallFunctionObjArgs(self->buffer_callback,
2539 obj, NULL);
2540 if (ret == NULL) {
2541 return -1;
2542 }
2543 in_band = PyObject_IsTrue(ret);
2544 Py_DECREF(ret);
2545 if (in_band == -1) {
2546 return -1;
2547 }
2548 }
2549 if (in_band) {
2550 /* Write data in-band */
2551 if (view->readonly) {
2552 return _save_bytes_data(self, obj, (const char*) view->buf,
2553 view->len);
2554 }
2555 else {
2556 return _save_bytearray_data(self, obj, (const char*) view->buf,
2557 view->len);
2558 }
2559 }
2560 else {
2561 /* Write data out-of-band */
2562 const char next_buffer_op = NEXT_BUFFER;
2563 if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2564 return -1;
2565 }
2566 if (view->readonly) {
2567 const char readonly_buffer_op = READONLY_BUFFER;
2568 if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2569 return -1;
2570 }
2571 }
2572 }
2573 return 0;
2574 }
2575
2576 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2577 backslash and newline characters to \uXXXX escapes. */
2578 static PyObject *
raw_unicode_escape(PyObject * obj)2579 raw_unicode_escape(PyObject *obj)
2580 {
2581 char *p;
2582 Py_ssize_t i, size;
2583 void *data;
2584 unsigned int kind;
2585 _PyBytesWriter writer;
2586
2587 if (PyUnicode_READY(obj))
2588 return NULL;
2589
2590 _PyBytesWriter_Init(&writer);
2591
2592 size = PyUnicode_GET_LENGTH(obj);
2593 data = PyUnicode_DATA(obj);
2594 kind = PyUnicode_KIND(obj);
2595
2596 p = _PyBytesWriter_Alloc(&writer, size);
2597 if (p == NULL)
2598 goto error;
2599 writer.overallocate = 1;
2600
2601 for (i=0; i < size; i++) {
2602 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2603 /* Map 32-bit characters to '\Uxxxxxxxx' */
2604 if (ch >= 0x10000) {
2605 /* -1: subtract 1 preallocated byte */
2606 p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2607 if (p == NULL)
2608 goto error;
2609
2610 *p++ = '\\';
2611 *p++ = 'U';
2612 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2613 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2614 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2615 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2616 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2617 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2618 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2619 *p++ = Py_hexdigits[ch & 15];
2620 }
2621 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2622 else if (ch >= 256 ||
2623 ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2624 ch == 0x1a)
2625 {
2626 /* -1: subtract 1 preallocated byte */
2627 p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2628 if (p == NULL)
2629 goto error;
2630
2631 *p++ = '\\';
2632 *p++ = 'u';
2633 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2634 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2635 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2636 *p++ = Py_hexdigits[ch & 15];
2637 }
2638 /* Copy everything else as-is */
2639 else
2640 *p++ = (char) ch;
2641 }
2642
2643 return _PyBytesWriter_Finish(&writer, p);
2644
2645 error:
2646 _PyBytesWriter_Dealloc(&writer);
2647 return NULL;
2648 }
2649
2650 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2651 write_unicode_binary(PicklerObject *self, PyObject *obj)
2652 {
2653 char header[9];
2654 Py_ssize_t len;
2655 PyObject *encoded = NULL;
2656 Py_ssize_t size;
2657 const char *data;
2658
2659 if (PyUnicode_READY(obj))
2660 return -1;
2661
2662 data = PyUnicode_AsUTF8AndSize(obj, &size);
2663 if (data == NULL) {
2664 /* Issue #8383: for strings with lone surrogates, fallback on the
2665 "surrogatepass" error handler. */
2666 PyErr_Clear();
2667 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2668 if (encoded == NULL)
2669 return -1;
2670
2671 data = PyBytes_AS_STRING(encoded);
2672 size = PyBytes_GET_SIZE(encoded);
2673 }
2674
2675 assert(size >= 0);
2676 if (size <= 0xff && self->proto >= 4) {
2677 header[0] = SHORT_BINUNICODE;
2678 header[1] = (unsigned char)(size & 0xff);
2679 len = 2;
2680 }
2681 else if ((size_t)size <= 0xffffffffUL) {
2682 header[0] = BINUNICODE;
2683 header[1] = (unsigned char)(size & 0xff);
2684 header[2] = (unsigned char)((size >> 8) & 0xff);
2685 header[3] = (unsigned char)((size >> 16) & 0xff);
2686 header[4] = (unsigned char)((size >> 24) & 0xff);
2687 len = 5;
2688 }
2689 else if (self->proto >= 4) {
2690 header[0] = BINUNICODE8;
2691 _write_size64(header + 1, size);
2692 len = 9;
2693 }
2694 else {
2695 PyErr_SetString(PyExc_OverflowError,
2696 "serializing a string larger than 4 GiB "
2697 "requires pickle protocol 4 or higher");
2698 Py_XDECREF(encoded);
2699 return -1;
2700 }
2701
2702 if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2703 Py_XDECREF(encoded);
2704 return -1;
2705 }
2706 Py_XDECREF(encoded);
2707 return 0;
2708 }
2709
2710 static int
save_unicode(PicklerObject * self,PyObject * obj)2711 save_unicode(PicklerObject *self, PyObject *obj)
2712 {
2713 if (self->bin) {
2714 if (write_unicode_binary(self, obj) < 0)
2715 return -1;
2716 }
2717 else {
2718 PyObject *encoded;
2719 Py_ssize_t size;
2720 const char unicode_op = UNICODE;
2721
2722 encoded = raw_unicode_escape(obj);
2723 if (encoded == NULL)
2724 return -1;
2725
2726 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2727 Py_DECREF(encoded);
2728 return -1;
2729 }
2730
2731 size = PyBytes_GET_SIZE(encoded);
2732 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2733 Py_DECREF(encoded);
2734 return -1;
2735 }
2736 Py_DECREF(encoded);
2737
2738 if (_Pickler_Write(self, "\n", 1) < 0)
2739 return -1;
2740 }
2741 if (memo_put(self, obj) < 0)
2742 return -1;
2743
2744 return 0;
2745 }
2746
2747 /* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2748 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2749 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2750 {
2751 Py_ssize_t i;
2752
2753 assert(PyTuple_Size(t) == len);
2754
2755 for (i = 0; i < len; i++) {
2756 PyObject *element = PyTuple_GET_ITEM(t, i);
2757
2758 if (element == NULL)
2759 return -1;
2760 if (save(self, element, 0) < 0)
2761 return -1;
2762 }
2763
2764 return 0;
2765 }
2766
2767 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2768 * used across protocols to minimize the space needed to pickle them.
2769 * Tuples are also the only builtin immutable type that can be recursive
2770 * (a tuple can be reached from itself), and that requires some subtle
2771 * magic so that it works in all cases. IOW, this is a long routine.
2772 */
2773 static int
save_tuple(PicklerObject * self,PyObject * obj)2774 save_tuple(PicklerObject *self, PyObject *obj)
2775 {
2776 Py_ssize_t len, i;
2777
2778 const char mark_op = MARK;
2779 const char tuple_op = TUPLE;
2780 const char pop_op = POP;
2781 const char pop_mark_op = POP_MARK;
2782 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2783
2784 if ((len = PyTuple_Size(obj)) < 0)
2785 return -1;
2786
2787 if (len == 0) {
2788 char pdata[2];
2789
2790 if (self->proto) {
2791 pdata[0] = EMPTY_TUPLE;
2792 len = 1;
2793 }
2794 else {
2795 pdata[0] = MARK;
2796 pdata[1] = TUPLE;
2797 len = 2;
2798 }
2799 if (_Pickler_Write(self, pdata, len) < 0)
2800 return -1;
2801 return 0;
2802 }
2803
2804 /* The tuple isn't in the memo now. If it shows up there after
2805 * saving the tuple elements, the tuple must be recursive, in
2806 * which case we'll pop everything we put on the stack, and fetch
2807 * its value from the memo.
2808 */
2809 if (len <= 3 && self->proto >= 2) {
2810 /* Use TUPLE{1,2,3} opcodes. */
2811 if (store_tuple_elements(self, obj, len) < 0)
2812 return -1;
2813
2814 if (PyMemoTable_Get(self->memo, obj)) {
2815 /* pop the len elements */
2816 for (i = 0; i < len; i++)
2817 if (_Pickler_Write(self, &pop_op, 1) < 0)
2818 return -1;
2819 /* fetch from memo */
2820 if (memo_get(self, obj) < 0)
2821 return -1;
2822
2823 return 0;
2824 }
2825 else { /* Not recursive. */
2826 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2827 return -1;
2828 }
2829 goto memoize;
2830 }
2831
2832 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2833 * Generate MARK e1 e2 ... TUPLE
2834 */
2835 if (_Pickler_Write(self, &mark_op, 1) < 0)
2836 return -1;
2837
2838 if (store_tuple_elements(self, obj, len) < 0)
2839 return -1;
2840
2841 if (PyMemoTable_Get(self->memo, obj)) {
2842 /* pop the stack stuff we pushed */
2843 if (self->bin) {
2844 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2845 return -1;
2846 }
2847 else {
2848 /* Note that we pop one more than len, to remove
2849 * the MARK too.
2850 */
2851 for (i = 0; i <= len; i++)
2852 if (_Pickler_Write(self, &pop_op, 1) < 0)
2853 return -1;
2854 }
2855 /* fetch from memo */
2856 if (memo_get(self, obj) < 0)
2857 return -1;
2858
2859 return 0;
2860 }
2861 else { /* Not recursive. */
2862 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2863 return -1;
2864 }
2865
2866 memoize:
2867 if (memo_put(self, obj) < 0)
2868 return -1;
2869
2870 return 0;
2871 }
2872
2873 /* iter is an iterator giving items, and we batch up chunks of
2874 * MARK item item ... item APPENDS
2875 * opcode sequences. Calling code should have arranged to first create an
2876 * empty list, or list-like object, for the APPENDS to operate on.
2877 * Returns 0 on success, <0 on error.
2878 */
2879 static int
batch_list(PicklerObject * self,PyObject * iter)2880 batch_list(PicklerObject *self, PyObject *iter)
2881 {
2882 PyObject *obj = NULL;
2883 PyObject *firstitem = NULL;
2884 int i, n;
2885
2886 const char mark_op = MARK;
2887 const char append_op = APPEND;
2888 const char appends_op = APPENDS;
2889
2890 assert(iter != NULL);
2891
2892 /* XXX: I think this function could be made faster by avoiding the
2893 iterator interface and fetching objects directly from list using
2894 PyList_GET_ITEM.
2895 */
2896
2897 if (self->proto == 0) {
2898 /* APPENDS isn't available; do one at a time. */
2899 for (;;) {
2900 obj = PyIter_Next(iter);
2901 if (obj == NULL) {
2902 if (PyErr_Occurred())
2903 return -1;
2904 break;
2905 }
2906 i = save(self, obj, 0);
2907 Py_DECREF(obj);
2908 if (i < 0)
2909 return -1;
2910 if (_Pickler_Write(self, &append_op, 1) < 0)
2911 return -1;
2912 }
2913 return 0;
2914 }
2915
2916 /* proto > 0: write in batches of BATCHSIZE. */
2917 do {
2918 /* Get first item */
2919 firstitem = PyIter_Next(iter);
2920 if (firstitem == NULL) {
2921 if (PyErr_Occurred())
2922 goto error;
2923
2924 /* nothing more to add */
2925 break;
2926 }
2927
2928 /* Try to get a second item */
2929 obj = PyIter_Next(iter);
2930 if (obj == NULL) {
2931 if (PyErr_Occurred())
2932 goto error;
2933
2934 /* Only one item to write */
2935 if (save(self, firstitem, 0) < 0)
2936 goto error;
2937 if (_Pickler_Write(self, &append_op, 1) < 0)
2938 goto error;
2939 Py_CLEAR(firstitem);
2940 break;
2941 }
2942
2943 /* More than one item to write */
2944
2945 /* Pump out MARK, items, APPENDS. */
2946 if (_Pickler_Write(self, &mark_op, 1) < 0)
2947 goto error;
2948
2949 if (save(self, firstitem, 0) < 0)
2950 goto error;
2951 Py_CLEAR(firstitem);
2952 n = 1;
2953
2954 /* Fetch and save up to BATCHSIZE items */
2955 while (obj) {
2956 if (save(self, obj, 0) < 0)
2957 goto error;
2958 Py_CLEAR(obj);
2959 n += 1;
2960
2961 if (n == BATCHSIZE)
2962 break;
2963
2964 obj = PyIter_Next(iter);
2965 if (obj == NULL) {
2966 if (PyErr_Occurred())
2967 goto error;
2968 break;
2969 }
2970 }
2971
2972 if (_Pickler_Write(self, &appends_op, 1) < 0)
2973 goto error;
2974
2975 } while (n == BATCHSIZE);
2976 return 0;
2977
2978 error:
2979 Py_XDECREF(firstitem);
2980 Py_XDECREF(obj);
2981 return -1;
2982 }
2983
2984 /* This is a variant of batch_list() above, specialized for lists (with no
2985 * support for list subclasses). Like batch_list(), we batch up chunks of
2986 * MARK item item ... item APPENDS
2987 * opcode sequences. Calling code should have arranged to first create an
2988 * empty list, or list-like object, for the APPENDS to operate on.
2989 * Returns 0 on success, -1 on error.
2990 *
2991 * This version is considerably faster than batch_list(), if less general.
2992 *
2993 * Note that this only works for protocols > 0.
2994 */
2995 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2996 batch_list_exact(PicklerObject *self, PyObject *obj)
2997 {
2998 PyObject *item = NULL;
2999 Py_ssize_t this_batch, total;
3000
3001 const char append_op = APPEND;
3002 const char appends_op = APPENDS;
3003 const char mark_op = MARK;
3004
3005 assert(obj != NULL);
3006 assert(self->proto > 0);
3007 assert(PyList_CheckExact(obj));
3008
3009 if (PyList_GET_SIZE(obj) == 1) {
3010 item = PyList_GET_ITEM(obj, 0);
3011 if (save(self, item, 0) < 0)
3012 return -1;
3013 if (_Pickler_Write(self, &append_op, 1) < 0)
3014 return -1;
3015 return 0;
3016 }
3017
3018 /* Write in batches of BATCHSIZE. */
3019 total = 0;
3020 do {
3021 this_batch = 0;
3022 if (_Pickler_Write(self, &mark_op, 1) < 0)
3023 return -1;
3024 while (total < PyList_GET_SIZE(obj)) {
3025 item = PyList_GET_ITEM(obj, total);
3026 if (save(self, item, 0) < 0)
3027 return -1;
3028 total++;
3029 if (++this_batch == BATCHSIZE)
3030 break;
3031 }
3032 if (_Pickler_Write(self, &appends_op, 1) < 0)
3033 return -1;
3034
3035 } while (total < PyList_GET_SIZE(obj));
3036
3037 return 0;
3038 }
3039
3040 static int
save_list(PicklerObject * self,PyObject * obj)3041 save_list(PicklerObject *self, PyObject *obj)
3042 {
3043 char header[3];
3044 Py_ssize_t len;
3045 int status = 0;
3046
3047 if (self->fast && !fast_save_enter(self, obj))
3048 goto error;
3049
3050 /* Create an empty list. */
3051 if (self->bin) {
3052 header[0] = EMPTY_LIST;
3053 len = 1;
3054 }
3055 else {
3056 header[0] = MARK;
3057 header[1] = LIST;
3058 len = 2;
3059 }
3060
3061 if (_Pickler_Write(self, header, len) < 0)
3062 goto error;
3063
3064 /* Get list length, and bow out early if empty. */
3065 if ((len = PyList_Size(obj)) < 0)
3066 goto error;
3067
3068 if (memo_put(self, obj) < 0)
3069 goto error;
3070
3071 if (len != 0) {
3072 /* Materialize the list elements. */
3073 if (PyList_CheckExact(obj) && self->proto > 0) {
3074 if (Py_EnterRecursiveCall(" while pickling an object"))
3075 goto error;
3076 status = batch_list_exact(self, obj);
3077 Py_LeaveRecursiveCall();
3078 } else {
3079 PyObject *iter = PyObject_GetIter(obj);
3080 if (iter == NULL)
3081 goto error;
3082
3083 if (Py_EnterRecursiveCall(" while pickling an object")) {
3084 Py_DECREF(iter);
3085 goto error;
3086 }
3087 status = batch_list(self, iter);
3088 Py_LeaveRecursiveCall();
3089 Py_DECREF(iter);
3090 }
3091 }
3092 if (0) {
3093 error:
3094 status = -1;
3095 }
3096
3097 if (self->fast && !fast_save_leave(self, obj))
3098 status = -1;
3099
3100 return status;
3101 }
3102
3103 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3104 * MARK key value ... key value SETITEMS
3105 * opcode sequences. Calling code should have arranged to first create an
3106 * empty dict, or dict-like object, for the SETITEMS to operate on.
3107 * Returns 0 on success, <0 on error.
3108 *
3109 * This is very much like batch_list(). The difference between saving
3110 * elements directly, and picking apart two-tuples, is so long-winded at
3111 * the C level, though, that attempts to combine these routines were too
3112 * ugly to bear.
3113 */
3114 static int
batch_dict(PicklerObject * self,PyObject * iter)3115 batch_dict(PicklerObject *self, PyObject *iter)
3116 {
3117 PyObject *obj = NULL;
3118 PyObject *firstitem = NULL;
3119 int i, n;
3120
3121 const char mark_op = MARK;
3122 const char setitem_op = SETITEM;
3123 const char setitems_op = SETITEMS;
3124
3125 assert(iter != NULL);
3126
3127 if (self->proto == 0) {
3128 /* SETITEMS isn't available; do one at a time. */
3129 for (;;) {
3130 obj = PyIter_Next(iter);
3131 if (obj == NULL) {
3132 if (PyErr_Occurred())
3133 return -1;
3134 break;
3135 }
3136 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3137 PyErr_SetString(PyExc_TypeError, "dict items "
3138 "iterator must return 2-tuples");
3139 return -1;
3140 }
3141 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3142 if (i >= 0)
3143 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3144 Py_DECREF(obj);
3145 if (i < 0)
3146 return -1;
3147 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3148 return -1;
3149 }
3150 return 0;
3151 }
3152
3153 /* proto > 0: write in batches of BATCHSIZE. */
3154 do {
3155 /* Get first item */
3156 firstitem = PyIter_Next(iter);
3157 if (firstitem == NULL) {
3158 if (PyErr_Occurred())
3159 goto error;
3160
3161 /* nothing more to add */
3162 break;
3163 }
3164 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3165 PyErr_SetString(PyExc_TypeError, "dict items "
3166 "iterator must return 2-tuples");
3167 goto error;
3168 }
3169
3170 /* Try to get a second item */
3171 obj = PyIter_Next(iter);
3172 if (obj == NULL) {
3173 if (PyErr_Occurred())
3174 goto error;
3175
3176 /* Only one item to write */
3177 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3178 goto error;
3179 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3180 goto error;
3181 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3182 goto error;
3183 Py_CLEAR(firstitem);
3184 break;
3185 }
3186
3187 /* More than one item to write */
3188
3189 /* Pump out MARK, items, SETITEMS. */
3190 if (_Pickler_Write(self, &mark_op, 1) < 0)
3191 goto error;
3192
3193 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3194 goto error;
3195 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3196 goto error;
3197 Py_CLEAR(firstitem);
3198 n = 1;
3199
3200 /* Fetch and save up to BATCHSIZE items */
3201 while (obj) {
3202 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3203 PyErr_SetString(PyExc_TypeError, "dict items "
3204 "iterator must return 2-tuples");
3205 goto error;
3206 }
3207 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3208 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3209 goto error;
3210 Py_CLEAR(obj);
3211 n += 1;
3212
3213 if (n == BATCHSIZE)
3214 break;
3215
3216 obj = PyIter_Next(iter);
3217 if (obj == NULL) {
3218 if (PyErr_Occurred())
3219 goto error;
3220 break;
3221 }
3222 }
3223
3224 if (_Pickler_Write(self, &setitems_op, 1) < 0)
3225 goto error;
3226
3227 } while (n == BATCHSIZE);
3228 return 0;
3229
3230 error:
3231 Py_XDECREF(firstitem);
3232 Py_XDECREF(obj);
3233 return -1;
3234 }
3235
3236 /* This is a variant of batch_dict() above that specializes for dicts, with no
3237 * support for dict subclasses. Like batch_dict(), we batch up chunks of
3238 * MARK key value ... key value SETITEMS
3239 * opcode sequences. Calling code should have arranged to first create an
3240 * empty dict, or dict-like object, for the SETITEMS to operate on.
3241 * Returns 0 on success, -1 on error.
3242 *
3243 * Note that this currently doesn't work for protocol 0.
3244 */
3245 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)3246 batch_dict_exact(PicklerObject *self, PyObject *obj)
3247 {
3248 PyObject *key = NULL, *value = NULL;
3249 int i;
3250 Py_ssize_t dict_size, ppos = 0;
3251
3252 const char mark_op = MARK;
3253 const char setitem_op = SETITEM;
3254 const char setitems_op = SETITEMS;
3255
3256 assert(obj != NULL && PyDict_CheckExact(obj));
3257 assert(self->proto > 0);
3258
3259 dict_size = PyDict_GET_SIZE(obj);
3260
3261 /* Special-case len(d) == 1 to save space. */
3262 if (dict_size == 1) {
3263 PyDict_Next(obj, &ppos, &key, &value);
3264 if (save(self, key, 0) < 0)
3265 return -1;
3266 if (save(self, value, 0) < 0)
3267 return -1;
3268 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3269 return -1;
3270 return 0;
3271 }
3272
3273 /* Write in batches of BATCHSIZE. */
3274 do {
3275 i = 0;
3276 if (_Pickler_Write(self, &mark_op, 1) < 0)
3277 return -1;
3278 while (PyDict_Next(obj, &ppos, &key, &value)) {
3279 if (save(self, key, 0) < 0)
3280 return -1;
3281 if (save(self, value, 0) < 0)
3282 return -1;
3283 if (++i == BATCHSIZE)
3284 break;
3285 }
3286 if (_Pickler_Write(self, &setitems_op, 1) < 0)
3287 return -1;
3288 if (PyDict_GET_SIZE(obj) != dict_size) {
3289 PyErr_Format(
3290 PyExc_RuntimeError,
3291 "dictionary changed size during iteration");
3292 return -1;
3293 }
3294
3295 } while (i == BATCHSIZE);
3296 return 0;
3297 }
3298
3299 static int
save_dict(PicklerObject * self,PyObject * obj)3300 save_dict(PicklerObject *self, PyObject *obj)
3301 {
3302 PyObject *items, *iter;
3303 char header[3];
3304 Py_ssize_t len;
3305 int status = 0;
3306 assert(PyDict_Check(obj));
3307
3308 if (self->fast && !fast_save_enter(self, obj))
3309 goto error;
3310
3311 /* Create an empty dict. */
3312 if (self->bin) {
3313 header[0] = EMPTY_DICT;
3314 len = 1;
3315 }
3316 else {
3317 header[0] = MARK;
3318 header[1] = DICT;
3319 len = 2;
3320 }
3321
3322 if (_Pickler_Write(self, header, len) < 0)
3323 goto error;
3324
3325 if (memo_put(self, obj) < 0)
3326 goto error;
3327
3328 if (PyDict_GET_SIZE(obj)) {
3329 /* Save the dict items. */
3330 if (PyDict_CheckExact(obj) && self->proto > 0) {
3331 /* We can take certain shortcuts if we know this is a dict and
3332 not a dict subclass. */
3333 if (Py_EnterRecursiveCall(" while pickling an object"))
3334 goto error;
3335 status = batch_dict_exact(self, obj);
3336 Py_LeaveRecursiveCall();
3337 } else {
3338 _Py_IDENTIFIER(items);
3339
3340 items = _PyObject_CallMethodId(obj, &PyId_items, NULL);
3341 if (items == NULL)
3342 goto error;
3343 iter = PyObject_GetIter(items);
3344 Py_DECREF(items);
3345 if (iter == NULL)
3346 goto error;
3347 if (Py_EnterRecursiveCall(" while pickling an object")) {
3348 Py_DECREF(iter);
3349 goto error;
3350 }
3351 status = batch_dict(self, iter);
3352 Py_LeaveRecursiveCall();
3353 Py_DECREF(iter);
3354 }
3355 }
3356
3357 if (0) {
3358 error:
3359 status = -1;
3360 }
3361
3362 if (self->fast && !fast_save_leave(self, obj))
3363 status = -1;
3364
3365 return status;
3366 }
3367
3368 static int
save_set(PicklerObject * self,PyObject * obj)3369 save_set(PicklerObject *self, PyObject *obj)
3370 {
3371 PyObject *item;
3372 int i;
3373 Py_ssize_t set_size, ppos = 0;
3374 Py_hash_t hash;
3375
3376 const char empty_set_op = EMPTY_SET;
3377 const char mark_op = MARK;
3378 const char additems_op = ADDITEMS;
3379
3380 if (self->proto < 4) {
3381 PyObject *items;
3382 PyObject *reduce_value;
3383 int status;
3384
3385 items = PySequence_List(obj);
3386 if (items == NULL) {
3387 return -1;
3388 }
3389 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3390 Py_DECREF(items);
3391 if (reduce_value == NULL) {
3392 return -1;
3393 }
3394 /* save_reduce() will memoize the object automatically. */
3395 status = save_reduce(self, reduce_value, obj);
3396 Py_DECREF(reduce_value);
3397 return status;
3398 }
3399
3400 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3401 return -1;
3402
3403 if (memo_put(self, obj) < 0)
3404 return -1;
3405
3406 set_size = PySet_GET_SIZE(obj);
3407 if (set_size == 0)
3408 return 0; /* nothing to do */
3409
3410 /* Write in batches of BATCHSIZE. */
3411 do {
3412 i = 0;
3413 if (_Pickler_Write(self, &mark_op, 1) < 0)
3414 return -1;
3415 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3416 if (save(self, item, 0) < 0)
3417 return -1;
3418 if (++i == BATCHSIZE)
3419 break;
3420 }
3421 if (_Pickler_Write(self, &additems_op, 1) < 0)
3422 return -1;
3423 if (PySet_GET_SIZE(obj) != set_size) {
3424 PyErr_Format(
3425 PyExc_RuntimeError,
3426 "set changed size during iteration");
3427 return -1;
3428 }
3429 } while (i == BATCHSIZE);
3430
3431 return 0;
3432 }
3433
3434 static int
save_frozenset(PicklerObject * self,PyObject * obj)3435 save_frozenset(PicklerObject *self, PyObject *obj)
3436 {
3437 PyObject *iter;
3438
3439 const char mark_op = MARK;
3440 const char frozenset_op = FROZENSET;
3441
3442 if (self->fast && !fast_save_enter(self, obj))
3443 return -1;
3444
3445 if (self->proto < 4) {
3446 PyObject *items;
3447 PyObject *reduce_value;
3448 int status;
3449
3450 items = PySequence_List(obj);
3451 if (items == NULL) {
3452 return -1;
3453 }
3454 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3455 items);
3456 Py_DECREF(items);
3457 if (reduce_value == NULL) {
3458 return -1;
3459 }
3460 /* save_reduce() will memoize the object automatically. */
3461 status = save_reduce(self, reduce_value, obj);
3462 Py_DECREF(reduce_value);
3463 return status;
3464 }
3465
3466 if (_Pickler_Write(self, &mark_op, 1) < 0)
3467 return -1;
3468
3469 iter = PyObject_GetIter(obj);
3470 if (iter == NULL) {
3471 return -1;
3472 }
3473 for (;;) {
3474 PyObject *item;
3475
3476 item = PyIter_Next(iter);
3477 if (item == NULL) {
3478 if (PyErr_Occurred()) {
3479 Py_DECREF(iter);
3480 return -1;
3481 }
3482 break;
3483 }
3484 if (save(self, item, 0) < 0) {
3485 Py_DECREF(item);
3486 Py_DECREF(iter);
3487 return -1;
3488 }
3489 Py_DECREF(item);
3490 }
3491 Py_DECREF(iter);
3492
3493 /* If the object is already in the memo, this means it is
3494 recursive. In this case, throw away everything we put on the
3495 stack, and fetch the object back from the memo. */
3496 if (PyMemoTable_Get(self->memo, obj)) {
3497 const char pop_mark_op = POP_MARK;
3498
3499 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3500 return -1;
3501 if (memo_get(self, obj) < 0)
3502 return -1;
3503 return 0;
3504 }
3505
3506 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3507 return -1;
3508 if (memo_put(self, obj) < 0)
3509 return -1;
3510
3511 return 0;
3512 }
3513
3514 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3515 fix_imports(PyObject **module_name, PyObject **global_name)
3516 {
3517 PyObject *key;
3518 PyObject *item;
3519 PickleState *st = _Pickle_GetGlobalState();
3520
3521 key = PyTuple_Pack(2, *module_name, *global_name);
3522 if (key == NULL)
3523 return -1;
3524 item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3525 Py_DECREF(key);
3526 if (item) {
3527 PyObject *fixed_module_name;
3528 PyObject *fixed_global_name;
3529
3530 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3531 PyErr_Format(PyExc_RuntimeError,
3532 "_compat_pickle.REVERSE_NAME_MAPPING values "
3533 "should be 2-tuples, not %.200s",
3534 Py_TYPE(item)->tp_name);
3535 return -1;
3536 }
3537 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3538 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3539 if (!PyUnicode_Check(fixed_module_name) ||
3540 !PyUnicode_Check(fixed_global_name)) {
3541 PyErr_Format(PyExc_RuntimeError,
3542 "_compat_pickle.REVERSE_NAME_MAPPING values "
3543 "should be pairs of str, not (%.200s, %.200s)",
3544 Py_TYPE(fixed_module_name)->tp_name,
3545 Py_TYPE(fixed_global_name)->tp_name);
3546 return -1;
3547 }
3548
3549 Py_CLEAR(*module_name);
3550 Py_CLEAR(*global_name);
3551 Py_INCREF(fixed_module_name);
3552 Py_INCREF(fixed_global_name);
3553 *module_name = fixed_module_name;
3554 *global_name = fixed_global_name;
3555 return 0;
3556 }
3557 else if (PyErr_Occurred()) {
3558 return -1;
3559 }
3560
3561 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3562 if (item) {
3563 if (!PyUnicode_Check(item)) {
3564 PyErr_Format(PyExc_RuntimeError,
3565 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3566 "should be strings, not %.200s",
3567 Py_TYPE(item)->tp_name);
3568 return -1;
3569 }
3570 Py_INCREF(item);
3571 Py_XSETREF(*module_name, item);
3572 }
3573 else if (PyErr_Occurred()) {
3574 return -1;
3575 }
3576
3577 return 0;
3578 }
3579
3580 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3581 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3582 {
3583 PyObject *global_name = NULL;
3584 PyObject *module_name = NULL;
3585 PyObject *module = NULL;
3586 PyObject *parent = NULL;
3587 PyObject *dotted_path = NULL;
3588 PyObject *lastname = NULL;
3589 PyObject *cls;
3590 PickleState *st = _Pickle_GetGlobalState();
3591 int status = 0;
3592 _Py_IDENTIFIER(__name__);
3593 _Py_IDENTIFIER(__qualname__);
3594
3595 const char global_op = GLOBAL;
3596
3597 if (name) {
3598 Py_INCREF(name);
3599 global_name = name;
3600 }
3601 else {
3602 if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3603 goto error;
3604 if (global_name == NULL) {
3605 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3606 if (global_name == NULL)
3607 goto error;
3608 }
3609 }
3610
3611 dotted_path = get_dotted_path(module, global_name);
3612 if (dotted_path == NULL)
3613 goto error;
3614 module_name = whichmodule(obj, dotted_path);
3615 if (module_name == NULL)
3616 goto error;
3617
3618 /* XXX: Change to use the import C API directly with level=0 to disallow
3619 relative imports.
3620
3621 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3622 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3623 custom import functions (IMHO, this would be a nice security
3624 feature). The import C API would need to be extended to support the
3625 extra parameters of __import__ to fix that. */
3626 module = PyImport_Import(module_name);
3627 if (module == NULL) {
3628 PyErr_Format(st->PicklingError,
3629 "Can't pickle %R: import of module %R failed",
3630 obj, module_name);
3631 goto error;
3632 }
3633 lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3634 Py_INCREF(lastname);
3635 cls = get_deep_attribute(module, dotted_path, &parent);
3636 Py_CLEAR(dotted_path);
3637 if (cls == NULL) {
3638 PyErr_Format(st->PicklingError,
3639 "Can't pickle %R: attribute lookup %S on %S failed",
3640 obj, global_name, module_name);
3641 goto error;
3642 }
3643 if (cls != obj) {
3644 Py_DECREF(cls);
3645 PyErr_Format(st->PicklingError,
3646 "Can't pickle %R: it's not the same object as %S.%S",
3647 obj, module_name, global_name);
3648 goto error;
3649 }
3650 Py_DECREF(cls);
3651
3652 if (self->proto >= 2) {
3653 /* See whether this is in the extension registry, and if
3654 * so generate an EXT opcode.
3655 */
3656 PyObject *extension_key;
3657 PyObject *code_obj; /* extension code as Python object */
3658 long code; /* extension code as C value */
3659 char pdata[5];
3660 Py_ssize_t n;
3661
3662 extension_key = PyTuple_Pack(2, module_name, global_name);
3663 if (extension_key == NULL) {
3664 goto error;
3665 }
3666 code_obj = PyDict_GetItemWithError(st->extension_registry,
3667 extension_key);
3668 Py_DECREF(extension_key);
3669 /* The object is not registered in the extension registry.
3670 This is the most likely code path. */
3671 if (code_obj == NULL) {
3672 if (PyErr_Occurred()) {
3673 goto error;
3674 }
3675 goto gen_global;
3676 }
3677
3678 /* XXX: pickle.py doesn't check neither the type, nor the range
3679 of the value returned by the extension_registry. It should for
3680 consistency. */
3681
3682 /* Verify code_obj has the right type and value. */
3683 if (!PyLong_Check(code_obj)) {
3684 PyErr_Format(st->PicklingError,
3685 "Can't pickle %R: extension code %R isn't an integer",
3686 obj, code_obj);
3687 goto error;
3688 }
3689 code = PyLong_AS_LONG(code_obj);
3690 if (code <= 0 || code > 0x7fffffffL) {
3691 if (!PyErr_Occurred())
3692 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3693 "code %ld is out of range", obj, code);
3694 goto error;
3695 }
3696
3697 /* Generate an EXT opcode. */
3698 if (code <= 0xff) {
3699 pdata[0] = EXT1;
3700 pdata[1] = (unsigned char)code;
3701 n = 2;
3702 }
3703 else if (code <= 0xffff) {
3704 pdata[0] = EXT2;
3705 pdata[1] = (unsigned char)(code & 0xff);
3706 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3707 n = 3;
3708 }
3709 else {
3710 pdata[0] = EXT4;
3711 pdata[1] = (unsigned char)(code & 0xff);
3712 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3713 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3714 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3715 n = 5;
3716 }
3717
3718 if (_Pickler_Write(self, pdata, n) < 0)
3719 goto error;
3720 }
3721 else {
3722 gen_global:
3723 if (parent == module) {
3724 Py_INCREF(lastname);
3725 Py_DECREF(global_name);
3726 global_name = lastname;
3727 }
3728 if (self->proto >= 4) {
3729 const char stack_global_op = STACK_GLOBAL;
3730
3731 if (save(self, module_name, 0) < 0)
3732 goto error;
3733 if (save(self, global_name, 0) < 0)
3734 goto error;
3735
3736 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3737 goto error;
3738 }
3739 else if (parent != module) {
3740 PickleState *st = _Pickle_GetGlobalState();
3741 PyObject *reduce_value = Py_BuildValue("(O(OO))",
3742 st->getattr, parent, lastname);
3743 if (reduce_value == NULL)
3744 goto error;
3745 status = save_reduce(self, reduce_value, NULL);
3746 Py_DECREF(reduce_value);
3747 if (status < 0)
3748 goto error;
3749 }
3750 else {
3751 /* Generate a normal global opcode if we are using a pickle
3752 protocol < 4, or if the object is not registered in the
3753 extension registry. */
3754 PyObject *encoded;
3755 PyObject *(*unicode_encoder)(PyObject *);
3756
3757 if (_Pickler_Write(self, &global_op, 1) < 0)
3758 goto error;
3759
3760 /* For protocol < 3 and if the user didn't request against doing
3761 so, we convert module names to the old 2.x module names. */
3762 if (self->proto < 3 && self->fix_imports) {
3763 if (fix_imports(&module_name, &global_name) < 0) {
3764 goto error;
3765 }
3766 }
3767
3768 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3769 both the module name and the global name using UTF-8. We do so
3770 only when we are using the pickle protocol newer than version
3771 3. This is to ensure compatibility with older Unpickler running
3772 on Python 2.x. */
3773 if (self->proto == 3) {
3774 unicode_encoder = PyUnicode_AsUTF8String;
3775 }
3776 else {
3777 unicode_encoder = PyUnicode_AsASCIIString;
3778 }
3779 encoded = unicode_encoder(module_name);
3780 if (encoded == NULL) {
3781 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3782 PyErr_Format(st->PicklingError,
3783 "can't pickle module identifier '%S' using "
3784 "pickle protocol %i",
3785 module_name, self->proto);
3786 goto error;
3787 }
3788 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3789 PyBytes_GET_SIZE(encoded)) < 0) {
3790 Py_DECREF(encoded);
3791 goto error;
3792 }
3793 Py_DECREF(encoded);
3794 if(_Pickler_Write(self, "\n", 1) < 0)
3795 goto error;
3796
3797 /* Save the name of the module. */
3798 encoded = unicode_encoder(global_name);
3799 if (encoded == NULL) {
3800 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3801 PyErr_Format(st->PicklingError,
3802 "can't pickle global identifier '%S' using "
3803 "pickle protocol %i",
3804 global_name, self->proto);
3805 goto error;
3806 }
3807 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3808 PyBytes_GET_SIZE(encoded)) < 0) {
3809 Py_DECREF(encoded);
3810 goto error;
3811 }
3812 Py_DECREF(encoded);
3813 if (_Pickler_Write(self, "\n", 1) < 0)
3814 goto error;
3815 }
3816 /* Memoize the object. */
3817 if (memo_put(self, obj) < 0)
3818 goto error;
3819 }
3820
3821 if (0) {
3822 error:
3823 status = -1;
3824 }
3825 Py_XDECREF(module_name);
3826 Py_XDECREF(global_name);
3827 Py_XDECREF(module);
3828 Py_XDECREF(parent);
3829 Py_XDECREF(dotted_path);
3830 Py_XDECREF(lastname);
3831
3832 return status;
3833 }
3834
3835 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3836 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3837 {
3838 PyObject *reduce_value;
3839 int status;
3840
3841 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3842 if (reduce_value == NULL) {
3843 return -1;
3844 }
3845 status = save_reduce(self, reduce_value, obj);
3846 Py_DECREF(reduce_value);
3847 return status;
3848 }
3849
3850 static int
save_type(PicklerObject * self,PyObject * obj)3851 save_type(PicklerObject *self, PyObject *obj)
3852 {
3853 if (obj == (PyObject *)&_PyNone_Type) {
3854 return save_singleton_type(self, obj, Py_None);
3855 }
3856 else if (obj == (PyObject *)&PyEllipsis_Type) {
3857 return save_singleton_type(self, obj, Py_Ellipsis);
3858 }
3859 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3860 return save_singleton_type(self, obj, Py_NotImplemented);
3861 }
3862 return save_global(self, obj, NULL);
3863 }
3864
3865 static int
save_pers(PicklerObject * self,PyObject * obj)3866 save_pers(PicklerObject *self, PyObject *obj)
3867 {
3868 PyObject *pid = NULL;
3869 int status = 0;
3870
3871 const char persid_op = PERSID;
3872 const char binpersid_op = BINPERSID;
3873
3874 pid = call_method(self->pers_func, self->pers_func_self, obj);
3875 if (pid == NULL)
3876 return -1;
3877
3878 if (pid != Py_None) {
3879 if (self->bin) {
3880 if (save(self, pid, 1) < 0 ||
3881 _Pickler_Write(self, &binpersid_op, 1) < 0)
3882 goto error;
3883 }
3884 else {
3885 PyObject *pid_str;
3886
3887 pid_str = PyObject_Str(pid);
3888 if (pid_str == NULL)
3889 goto error;
3890
3891 /* XXX: Should it check whether the pid contains embedded
3892 newlines? */
3893 if (!PyUnicode_IS_ASCII(pid_str)) {
3894 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3895 "persistent IDs in protocol 0 must be "
3896 "ASCII strings");
3897 Py_DECREF(pid_str);
3898 goto error;
3899 }
3900
3901 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3902 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3903 PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3904 _Pickler_Write(self, "\n", 1) < 0) {
3905 Py_DECREF(pid_str);
3906 goto error;
3907 }
3908 Py_DECREF(pid_str);
3909 }
3910 status = 1;
3911 }
3912
3913 if (0) {
3914 error:
3915 status = -1;
3916 }
3917 Py_XDECREF(pid);
3918
3919 return status;
3920 }
3921
3922 static PyObject *
get_class(PyObject * obj)3923 get_class(PyObject *obj)
3924 {
3925 PyObject *cls;
3926 _Py_IDENTIFIER(__class__);
3927
3928 if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3929 cls = (PyObject *) Py_TYPE(obj);
3930 Py_INCREF(cls);
3931 }
3932 return cls;
3933 }
3934
3935 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3936 * appropriate __reduce__ method for obj.
3937 */
3938 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3939 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3940 {
3941 PyObject *callable;
3942 PyObject *argtup;
3943 PyObject *state = NULL;
3944 PyObject *listitems = Py_None;
3945 PyObject *dictitems = Py_None;
3946 PyObject *state_setter = Py_None;
3947 PickleState *st = _Pickle_GetGlobalState();
3948 Py_ssize_t size;
3949 int use_newobj = 0, use_newobj_ex = 0;
3950
3951 const char reduce_op = REDUCE;
3952 const char build_op = BUILD;
3953 const char newobj_op = NEWOBJ;
3954 const char newobj_ex_op = NEWOBJ_EX;
3955
3956 size = PyTuple_Size(args);
3957 if (size < 2 || size > 6) {
3958 PyErr_SetString(st->PicklingError, "tuple returned by "
3959 "__reduce__ must contain 2 through 6 elements");
3960 return -1;
3961 }
3962
3963 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3964 &callable, &argtup, &state, &listitems, &dictitems,
3965 &state_setter))
3966 return -1;
3967
3968 if (!PyCallable_Check(callable)) {
3969 PyErr_SetString(st->PicklingError, "first item of the tuple "
3970 "returned by __reduce__ must be callable");
3971 return -1;
3972 }
3973 if (!PyTuple_Check(argtup)) {
3974 PyErr_SetString(st->PicklingError, "second item of the tuple "
3975 "returned by __reduce__ must be a tuple");
3976 return -1;
3977 }
3978
3979 if (state == Py_None)
3980 state = NULL;
3981
3982 if (listitems == Py_None)
3983 listitems = NULL;
3984 else if (!PyIter_Check(listitems)) {
3985 PyErr_Format(st->PicklingError, "fourth element of the tuple "
3986 "returned by __reduce__ must be an iterator, not %s",
3987 Py_TYPE(listitems)->tp_name);
3988 return -1;
3989 }
3990
3991 if (dictitems == Py_None)
3992 dictitems = NULL;
3993 else if (!PyIter_Check(dictitems)) {
3994 PyErr_Format(st->PicklingError, "fifth element of the tuple "
3995 "returned by __reduce__ must be an iterator, not %s",
3996 Py_TYPE(dictitems)->tp_name);
3997 return -1;
3998 }
3999
4000 if (state_setter == Py_None)
4001 state_setter = NULL;
4002 else if (!PyCallable_Check(state_setter)) {
4003 PyErr_Format(st->PicklingError, "sixth element of the tuple "
4004 "returned by __reduce__ must be a function, not %s",
4005 Py_TYPE(state_setter)->tp_name);
4006 return -1;
4007 }
4008
4009 if (self->proto >= 2) {
4010 PyObject *name;
4011 _Py_IDENTIFIER(__name__);
4012
4013 if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
4014 return -1;
4015 }
4016 if (name != NULL && PyUnicode_Check(name)) {
4017 _Py_IDENTIFIER(__newobj_ex__);
4018 use_newobj_ex = _PyUnicode_EqualToASCIIId(
4019 name, &PyId___newobj_ex__);
4020 if (!use_newobj_ex) {
4021 _Py_IDENTIFIER(__newobj__);
4022 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
4023 }
4024 }
4025 Py_XDECREF(name);
4026 }
4027
4028 if (use_newobj_ex) {
4029 PyObject *cls;
4030 PyObject *args;
4031 PyObject *kwargs;
4032
4033 if (PyTuple_GET_SIZE(argtup) != 3) {
4034 PyErr_Format(st->PicklingError,
4035 "length of the NEWOBJ_EX argument tuple must be "
4036 "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
4037 return -1;
4038 }
4039
4040 cls = PyTuple_GET_ITEM(argtup, 0);
4041 if (!PyType_Check(cls)) {
4042 PyErr_Format(st->PicklingError,
4043 "first item from NEWOBJ_EX argument tuple must "
4044 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4045 return -1;
4046 }
4047 args = PyTuple_GET_ITEM(argtup, 1);
4048 if (!PyTuple_Check(args)) {
4049 PyErr_Format(st->PicklingError,
4050 "second item from NEWOBJ_EX argument tuple must "
4051 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4052 return -1;
4053 }
4054 kwargs = PyTuple_GET_ITEM(argtup, 2);
4055 if (!PyDict_Check(kwargs)) {
4056 PyErr_Format(st->PicklingError,
4057 "third item from NEWOBJ_EX argument tuple must "
4058 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4059 return -1;
4060 }
4061
4062 if (self->proto >= 4) {
4063 if (save(self, cls, 0) < 0 ||
4064 save(self, args, 0) < 0 ||
4065 save(self, kwargs, 0) < 0 ||
4066 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4067 return -1;
4068 }
4069 }
4070 else {
4071 PyObject *newargs;
4072 PyObject *cls_new;
4073 Py_ssize_t i;
4074 _Py_IDENTIFIER(__new__);
4075
4076 newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
4077 if (newargs == NULL)
4078 return -1;
4079
4080 cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
4081 if (cls_new == NULL) {
4082 Py_DECREF(newargs);
4083 return -1;
4084 }
4085 PyTuple_SET_ITEM(newargs, 0, cls_new);
4086 Py_INCREF(cls);
4087 PyTuple_SET_ITEM(newargs, 1, cls);
4088 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
4089 PyObject *item = PyTuple_GET_ITEM(args, i);
4090 Py_INCREF(item);
4091 PyTuple_SET_ITEM(newargs, i + 2, item);
4092 }
4093
4094 callable = PyObject_Call(st->partial, newargs, kwargs);
4095 Py_DECREF(newargs);
4096 if (callable == NULL)
4097 return -1;
4098
4099 newargs = PyTuple_New(0);
4100 if (newargs == NULL) {
4101 Py_DECREF(callable);
4102 return -1;
4103 }
4104
4105 if (save(self, callable, 0) < 0 ||
4106 save(self, newargs, 0) < 0 ||
4107 _Pickler_Write(self, &reduce_op, 1) < 0) {
4108 Py_DECREF(newargs);
4109 Py_DECREF(callable);
4110 return -1;
4111 }
4112 Py_DECREF(newargs);
4113 Py_DECREF(callable);
4114 }
4115 }
4116 else if (use_newobj) {
4117 PyObject *cls;
4118 PyObject *newargtup;
4119 PyObject *obj_class;
4120 int p;
4121
4122 /* Sanity checks. */
4123 if (PyTuple_GET_SIZE(argtup) < 1) {
4124 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
4125 return -1;
4126 }
4127
4128 cls = PyTuple_GET_ITEM(argtup, 0);
4129 if (!PyType_Check(cls)) {
4130 PyErr_SetString(st->PicklingError, "args[0] from "
4131 "__newobj__ args is not a type");
4132 return -1;
4133 }
4134
4135 if (obj != NULL) {
4136 obj_class = get_class(obj);
4137 if (obj_class == NULL) {
4138 return -1;
4139 }
4140 p = obj_class != cls;
4141 Py_DECREF(obj_class);
4142 if (p) {
4143 PyErr_SetString(st->PicklingError, "args[0] from "
4144 "__newobj__ args has the wrong class");
4145 return -1;
4146 }
4147 }
4148 /* XXX: These calls save() are prone to infinite recursion. Imagine
4149 what happen if the value returned by the __reduce__() method of
4150 some extension type contains another object of the same type. Ouch!
4151
4152 Here is a quick example, that I ran into, to illustrate what I
4153 mean:
4154
4155 >>> import pickle, copyreg
4156 >>> copyreg.dispatch_table.pop(complex)
4157 >>> pickle.dumps(1+2j)
4158 Traceback (most recent call last):
4159 ...
4160 RecursionError: maximum recursion depth exceeded
4161
4162 Removing the complex class from copyreg.dispatch_table made the
4163 __reduce_ex__() method emit another complex object:
4164
4165 >>> (1+1j).__reduce_ex__(2)
4166 (<function __newobj__ at 0xb7b71c3c>,
4167 (<class 'complex'>, (1+1j)), None, None, None)
4168
4169 Thus when save() was called on newargstup (the 2nd item) recursion
4170 ensued. Of course, the bug was in the complex class which had a
4171 broken __getnewargs__() that emitted another complex object. But,
4172 the point, here, is it is quite easy to end up with a broken reduce
4173 function. */
4174
4175 /* Save the class and its __new__ arguments. */
4176 if (save(self, cls, 0) < 0)
4177 return -1;
4178
4179 newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
4180 if (newargtup == NULL)
4181 return -1;
4182
4183 p = save(self, newargtup, 0);
4184 Py_DECREF(newargtup);
4185 if (p < 0)
4186 return -1;
4187
4188 /* Add NEWOBJ opcode. */
4189 if (_Pickler_Write(self, &newobj_op, 1) < 0)
4190 return -1;
4191 }
4192 else { /* Not using NEWOBJ. */
4193 if (save(self, callable, 0) < 0 ||
4194 save(self, argtup, 0) < 0 ||
4195 _Pickler_Write(self, &reduce_op, 1) < 0)
4196 return -1;
4197 }
4198
4199 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4200 the caller do not want to memoize the object. Not particularly useful,
4201 but that is to mimic the behavior save_reduce() in pickle.py when
4202 obj is None. */
4203 if (obj != NULL) {
4204 /* If the object is already in the memo, this means it is
4205 recursive. In this case, throw away everything we put on the
4206 stack, and fetch the object back from the memo. */
4207 if (PyMemoTable_Get(self->memo, obj)) {
4208 const char pop_op = POP;
4209
4210 if (_Pickler_Write(self, &pop_op, 1) < 0)
4211 return -1;
4212 if (memo_get(self, obj) < 0)
4213 return -1;
4214
4215 return 0;
4216 }
4217 else if (memo_put(self, obj) < 0)
4218 return -1;
4219 }
4220
4221 if (listitems && batch_list(self, listitems) < 0)
4222 return -1;
4223
4224 if (dictitems && batch_dict(self, dictitems) < 0)
4225 return -1;
4226
4227 if (state) {
4228 if (state_setter == NULL) {
4229 if (save(self, state, 0) < 0 ||
4230 _Pickler_Write(self, &build_op, 1) < 0)
4231 return -1;
4232 }
4233 else {
4234
4235 /* If a state_setter is specified, call it instead of load_build to
4236 * update obj's with its previous state.
4237 * The first 4 save/write instructions push state_setter and its
4238 * tuple of expected arguments (obj, state) onto the stack. The
4239 * REDUCE opcode triggers the state_setter(obj, state) function
4240 * call. Finally, because state-updating routines only do in-place
4241 * modification, the whole operation has to be stack-transparent.
4242 * Thus, we finally pop the call's output from the stack.*/
4243
4244 const char tupletwo_op = TUPLE2;
4245 const char pop_op = POP;
4246 if (save(self, state_setter, 0) < 0 ||
4247 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4248 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4249 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4250 _Pickler_Write(self, &pop_op, 1) < 0)
4251 return -1;
4252 }
4253 }
4254 return 0;
4255 }
4256
4257 static int
save(PicklerObject * self,PyObject * obj,int pers_save)4258 save(PicklerObject *self, PyObject *obj, int pers_save)
4259 {
4260 PyTypeObject *type;
4261 PyObject *reduce_func = NULL;
4262 PyObject *reduce_value = NULL;
4263 int status = 0;
4264
4265 if (_Pickler_OpcodeBoundary(self) < 0)
4266 return -1;
4267
4268 /* The extra pers_save argument is necessary to avoid calling save_pers()
4269 on its returned object. */
4270 if (!pers_save && self->pers_func) {
4271 /* save_pers() returns:
4272 -1 to signal an error;
4273 0 if it did nothing successfully;
4274 1 if a persistent id was saved.
4275 */
4276 if ((status = save_pers(self, obj)) != 0)
4277 return status;
4278 }
4279
4280 type = Py_TYPE(obj);
4281
4282 /* The old cPickle had an optimization that used switch-case statement
4283 dispatching on the first letter of the type name. This has was removed
4284 since benchmarks shown that this optimization was actually slowing
4285 things down. */
4286
4287 /* Atom types; these aren't memoized, so don't check the memo. */
4288
4289 if (obj == Py_None) {
4290 return save_none(self, obj);
4291 }
4292 else if (obj == Py_False || obj == Py_True) {
4293 return save_bool(self, obj);
4294 }
4295 else if (type == &PyLong_Type) {
4296 return save_long(self, obj);
4297 }
4298 else if (type == &PyFloat_Type) {
4299 return save_float(self, obj);
4300 }
4301
4302 /* Check the memo to see if it has the object. If so, generate
4303 a GET (or BINGET) opcode, instead of pickling the object
4304 once again. */
4305 if (PyMemoTable_Get(self->memo, obj)) {
4306 return memo_get(self, obj);
4307 }
4308
4309 if (type == &PyBytes_Type) {
4310 return save_bytes(self, obj);
4311 }
4312 else if (type == &PyUnicode_Type) {
4313 return save_unicode(self, obj);
4314 }
4315
4316 /* We're only calling Py_EnterRecursiveCall here so that atomic
4317 types above are pickled faster. */
4318 if (Py_EnterRecursiveCall(" while pickling an object")) {
4319 return -1;
4320 }
4321
4322 if (type == &PyDict_Type) {
4323 status = save_dict(self, obj);
4324 goto done;
4325 }
4326 else if (type == &PySet_Type) {
4327 status = save_set(self, obj);
4328 goto done;
4329 }
4330 else if (type == &PyFrozenSet_Type) {
4331 status = save_frozenset(self, obj);
4332 goto done;
4333 }
4334 else if (type == &PyList_Type) {
4335 status = save_list(self, obj);
4336 goto done;
4337 }
4338 else if (type == &PyTuple_Type) {
4339 status = save_tuple(self, obj);
4340 goto done;
4341 }
4342 else if (type == &PyByteArray_Type) {
4343 status = save_bytearray(self, obj);
4344 goto done;
4345 }
4346 else if (type == &PyPickleBuffer_Type) {
4347 status = save_picklebuffer(self, obj);
4348 goto done;
4349 }
4350
4351 /* Now, check reducer_override. If it returns NotImplemented,
4352 * fallback to save_type or save_global, and then perhaps to the
4353 * regular reduction mechanism.
4354 */
4355 if (self->reducer_override != NULL) {
4356 reduce_value = PyObject_CallFunctionObjArgs(self->reducer_override,
4357 obj, NULL);
4358 if (reduce_value == NULL) {
4359 goto error;
4360 }
4361 if (reduce_value != Py_NotImplemented) {
4362 goto reduce;
4363 }
4364 Py_DECREF(reduce_value);
4365 reduce_value = NULL;
4366 }
4367
4368 if (type == &PyType_Type) {
4369 status = save_type(self, obj);
4370 goto done;
4371 }
4372 else if (type == &PyFunction_Type) {
4373 status = save_global(self, obj, NULL);
4374 goto done;
4375 }
4376
4377 /* XXX: This part needs some unit tests. */
4378
4379 /* Get a reduction callable, and call it. This may come from
4380 * self.dispatch_table, copyreg.dispatch_table, the object's
4381 * __reduce_ex__ method, or the object's __reduce__ method.
4382 */
4383 if (self->dispatch_table == NULL) {
4384 PickleState *st = _Pickle_GetGlobalState();
4385 reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4386 (PyObject *)type);
4387 if (reduce_func == NULL) {
4388 if (PyErr_Occurred()) {
4389 goto error;
4390 }
4391 } else {
4392 /* PyDict_GetItemWithError() returns a borrowed reference.
4393 Increase the reference count to be consistent with
4394 PyObject_GetItem and _PyObject_GetAttrId used below. */
4395 Py_INCREF(reduce_func);
4396 }
4397 } else {
4398 reduce_func = PyObject_GetItem(self->dispatch_table,
4399 (PyObject *)type);
4400 if (reduce_func == NULL) {
4401 if (PyErr_ExceptionMatches(PyExc_KeyError))
4402 PyErr_Clear();
4403 else
4404 goto error;
4405 }
4406 }
4407 if (reduce_func != NULL) {
4408 Py_INCREF(obj);
4409 reduce_value = _Pickle_FastCall(reduce_func, obj);
4410 }
4411 else if (PyType_IsSubtype(type, &PyType_Type)) {
4412 status = save_global(self, obj, NULL);
4413 goto done;
4414 }
4415 else {
4416 _Py_IDENTIFIER(__reduce__);
4417 _Py_IDENTIFIER(__reduce_ex__);
4418
4419 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4420 automatically defined as __reduce__. While this is convenient, this
4421 make it impossible to know which method was actually called. Of
4422 course, this is not a big deal. But still, it would be nice to let
4423 the user know which method was called when something go
4424 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4425 don't actually have to check for a __reduce__ method. */
4426
4427 /* Check for a __reduce_ex__ method. */
4428 if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4429 goto error;
4430 }
4431 if (reduce_func != NULL) {
4432 PyObject *proto;
4433 proto = PyLong_FromLong(self->proto);
4434 if (proto != NULL) {
4435 reduce_value = _Pickle_FastCall(reduce_func, proto);
4436 }
4437 }
4438 else {
4439 /* Check for a __reduce__ method. */
4440 if (_PyObject_LookupAttrId(obj, &PyId___reduce__, &reduce_func) < 0) {
4441 goto error;
4442 }
4443 if (reduce_func != NULL) {
4444 reduce_value = _PyObject_CallNoArg(reduce_func);
4445 }
4446 else {
4447 PickleState *st = _Pickle_GetGlobalState();
4448 PyErr_Format(st->PicklingError,
4449 "can't pickle '%.200s' object: %R",
4450 type->tp_name, obj);
4451 goto error;
4452 }
4453 }
4454 }
4455
4456 if (reduce_value == NULL)
4457 goto error;
4458
4459 reduce:
4460 if (PyUnicode_Check(reduce_value)) {
4461 status = save_global(self, obj, reduce_value);
4462 goto done;
4463 }
4464
4465 if (!PyTuple_Check(reduce_value)) {
4466 PickleState *st = _Pickle_GetGlobalState();
4467 PyErr_SetString(st->PicklingError,
4468 "__reduce__ must return a string or tuple");
4469 goto error;
4470 }
4471
4472 status = save_reduce(self, reduce_value, obj);
4473
4474 if (0) {
4475 error:
4476 status = -1;
4477 }
4478 done:
4479
4480 Py_LeaveRecursiveCall();
4481 Py_XDECREF(reduce_func);
4482 Py_XDECREF(reduce_value);
4483
4484 return status;
4485 }
4486
4487 static int
dump(PicklerObject * self,PyObject * obj)4488 dump(PicklerObject *self, PyObject *obj)
4489 {
4490 const char stop_op = STOP;
4491 int status = -1;
4492 PyObject *tmp;
4493 _Py_IDENTIFIER(reducer_override);
4494
4495 if (_PyObject_LookupAttrId((PyObject *)self, &PyId_reducer_override,
4496 &tmp) < 0) {
4497 goto error;
4498 }
4499 /* Cache the reducer_override method, if it exists. */
4500 if (tmp != NULL) {
4501 Py_XSETREF(self->reducer_override, tmp);
4502 }
4503 else {
4504 Py_CLEAR(self->reducer_override);
4505 }
4506
4507 if (self->proto >= 2) {
4508 char header[2];
4509
4510 header[0] = PROTO;
4511 assert(self->proto >= 0 && self->proto < 256);
4512 header[1] = (unsigned char)self->proto;
4513 if (_Pickler_Write(self, header, 2) < 0)
4514 goto error;
4515 if (self->proto >= 4)
4516 self->framing = 1;
4517 }
4518
4519 if (save(self, obj, 0) < 0 ||
4520 _Pickler_Write(self, &stop_op, 1) < 0 ||
4521 _Pickler_CommitFrame(self) < 0)
4522 goto error;
4523
4524 // Success
4525 status = 0;
4526
4527 error:
4528 self->framing = 0;
4529
4530 /* Break the reference cycle we generated at the beginning this function
4531 * call when setting the reducer_override attribute of the Pickler instance
4532 * to a bound method of the same instance. This is important as the Pickler
4533 * instance holds a reference to each object it has pickled (through its
4534 * memo): thus, these objects wont be garbage-collected as long as the
4535 * Pickler itself is not collected. */
4536 Py_CLEAR(self->reducer_override);
4537 return status;
4538 }
4539
4540 /*[clinic input]
4541
4542 _pickle.Pickler.clear_memo
4543
4544 Clears the pickler's "memo".
4545
4546 The memo is the data structure that remembers which objects the
4547 pickler has already seen, so that shared or recursive objects are
4548 pickled by reference and not by value. This method is useful when
4549 re-using picklers.
4550 [clinic start generated code]*/
4551
4552 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4553 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4554 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4555 {
4556 if (self->memo)
4557 PyMemoTable_Clear(self->memo);
4558
4559 Py_RETURN_NONE;
4560 }
4561
4562 /*[clinic input]
4563
4564 _pickle.Pickler.dump
4565
4566 obj: object
4567 /
4568
4569 Write a pickled representation of the given object to the open file.
4570 [clinic start generated code]*/
4571
4572 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4573 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4574 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4575 {
4576 /* Check whether the Pickler was initialized correctly (issue3664).
4577 Developers often forget to call __init__() in their subclasses, which
4578 would trigger a segfault without this check. */
4579 if (self->write == NULL) {
4580 PickleState *st = _Pickle_GetGlobalState();
4581 PyErr_Format(st->PicklingError,
4582 "Pickler.__init__() was not called by %s.__init__()",
4583 Py_TYPE(self)->tp_name);
4584 return NULL;
4585 }
4586
4587 if (_Pickler_ClearBuffer(self) < 0)
4588 return NULL;
4589
4590 if (dump(self, obj) < 0)
4591 return NULL;
4592
4593 if (_Pickler_FlushToFile(self) < 0)
4594 return NULL;
4595
4596 Py_RETURN_NONE;
4597 }
4598
4599 /*[clinic input]
4600
4601 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4602
4603 Returns size in memory, in bytes.
4604 [clinic start generated code]*/
4605
4606 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4607 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4608 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4609 {
4610 Py_ssize_t res, s;
4611
4612 res = _PyObject_SIZE(Py_TYPE(self));
4613 if (self->memo != NULL) {
4614 res += sizeof(PyMemoTable);
4615 res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4616 }
4617 if (self->output_buffer != NULL) {
4618 s = _PySys_GetSizeOf(self->output_buffer);
4619 if (s == -1)
4620 return -1;
4621 res += s;
4622 }
4623 return res;
4624 }
4625
4626 static struct PyMethodDef Pickler_methods[] = {
4627 _PICKLE_PICKLER_DUMP_METHODDEF
4628 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4629 _PICKLE_PICKLER___SIZEOF___METHODDEF
4630 {NULL, NULL} /* sentinel */
4631 };
4632
4633 static void
Pickler_dealloc(PicklerObject * self)4634 Pickler_dealloc(PicklerObject *self)
4635 {
4636 PyObject_GC_UnTrack(self);
4637
4638 Py_XDECREF(self->output_buffer);
4639 Py_XDECREF(self->write);
4640 Py_XDECREF(self->pers_func);
4641 Py_XDECREF(self->dispatch_table);
4642 Py_XDECREF(self->fast_memo);
4643 Py_XDECREF(self->reducer_override);
4644 Py_XDECREF(self->buffer_callback);
4645
4646 PyMemoTable_Del(self->memo);
4647
4648 Py_TYPE(self)->tp_free((PyObject *)self);
4649 }
4650
4651 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4652 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4653 {
4654 Py_VISIT(self->write);
4655 Py_VISIT(self->pers_func);
4656 Py_VISIT(self->dispatch_table);
4657 Py_VISIT(self->fast_memo);
4658 Py_VISIT(self->reducer_override);
4659 Py_VISIT(self->buffer_callback);
4660 return 0;
4661 }
4662
4663 static int
Pickler_clear(PicklerObject * self)4664 Pickler_clear(PicklerObject *self)
4665 {
4666 Py_CLEAR(self->output_buffer);
4667 Py_CLEAR(self->write);
4668 Py_CLEAR(self->pers_func);
4669 Py_CLEAR(self->dispatch_table);
4670 Py_CLEAR(self->fast_memo);
4671 Py_CLEAR(self->reducer_override);
4672 Py_CLEAR(self->buffer_callback);
4673
4674 if (self->memo != NULL) {
4675 PyMemoTable *memo = self->memo;
4676 self->memo = NULL;
4677 PyMemoTable_Del(memo);
4678 }
4679 return 0;
4680 }
4681
4682
4683 /*[clinic input]
4684
4685 _pickle.Pickler.__init__
4686
4687 file: object
4688 protocol: object = None
4689 fix_imports: bool = True
4690 buffer_callback: object = None
4691
4692 This takes a binary file for writing a pickle data stream.
4693
4694 The optional *protocol* argument tells the pickler to use the given
4695 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
4696 protocol is 4. It was introduced in Python 3.4, and is incompatible
4697 with previous versions.
4698
4699 Specifying a negative protocol version selects the highest protocol
4700 version supported. The higher the protocol used, the more recent the
4701 version of Python needed to read the pickle produced.
4702
4703 The *file* argument must have a write() method that accepts a single
4704 bytes argument. It can thus be a file object opened for binary
4705 writing, an io.BytesIO instance, or any other custom object that meets
4706 this interface.
4707
4708 If *fix_imports* is True and protocol is less than 3, pickle will try
4709 to map the new Python 3 names to the old module names used in Python
4710 2, so that the pickle data stream is readable with Python 2.
4711
4712 If *buffer_callback* is None (the default), buffer views are
4713 serialized into *file* as part of the pickle stream.
4714
4715 If *buffer_callback* is not None, then it can be called any number
4716 of times with a buffer view. If the callback returns a false value
4717 (such as None), the given buffer is out-of-band; otherwise the
4718 buffer is serialized in-band, i.e. inside the pickle stream.
4719
4720 It is an error if *buffer_callback* is not None and *protocol*
4721 is None or smaller than 5.
4722
4723 [clinic start generated code]*/
4724
4725 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)4726 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4727 PyObject *protocol, int fix_imports,
4728 PyObject *buffer_callback)
4729 /*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
4730 {
4731 _Py_IDENTIFIER(persistent_id);
4732 _Py_IDENTIFIER(dispatch_table);
4733
4734 /* In case of multiple __init__() calls, clear previous content. */
4735 if (self->write != NULL)
4736 (void)Pickler_clear(self);
4737
4738 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4739 return -1;
4740
4741 if (_Pickler_SetOutputStream(self, file) < 0)
4742 return -1;
4743
4744 if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4745 return -1;
4746
4747 /* memo and output_buffer may have already been created in _Pickler_New */
4748 if (self->memo == NULL) {
4749 self->memo = PyMemoTable_New();
4750 if (self->memo == NULL)
4751 return -1;
4752 }
4753 self->output_len = 0;
4754 if (self->output_buffer == NULL) {
4755 self->max_output_len = WRITE_BUF_SIZE;
4756 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4757 self->max_output_len);
4758 if (self->output_buffer == NULL)
4759 return -1;
4760 }
4761
4762 self->fast = 0;
4763 self->fast_nesting = 0;
4764 self->fast_memo = NULL;
4765
4766 if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4767 &self->pers_func, &self->pers_func_self) < 0)
4768 {
4769 return -1;
4770 }
4771
4772 if (_PyObject_LookupAttrId((PyObject *)self,
4773 &PyId_dispatch_table, &self->dispatch_table) < 0) {
4774 return -1;
4775 }
4776
4777 return 0;
4778 }
4779
4780
4781 /* Define a proxy object for the Pickler's internal memo object. This is to
4782 * avoid breaking code like:
4783 * pickler.memo.clear()
4784 * and
4785 * pickler.memo = saved_memo
4786 * Is this a good idea? Not really, but we don't want to break code that uses
4787 * it. Note that we don't implement the entire mapping API here. This is
4788 * intentional, as these should be treated as black-box implementation details.
4789 */
4790
4791 /*[clinic input]
4792 _pickle.PicklerMemoProxy.clear
4793
4794 Remove all items from memo.
4795 [clinic start generated code]*/
4796
4797 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4798 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4799 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4800 {
4801 if (self->pickler->memo)
4802 PyMemoTable_Clear(self->pickler->memo);
4803 Py_RETURN_NONE;
4804 }
4805
4806 /*[clinic input]
4807 _pickle.PicklerMemoProxy.copy
4808
4809 Copy the memo to a new object.
4810 [clinic start generated code]*/
4811
4812 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4813 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4814 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4815 {
4816 PyMemoTable *memo;
4817 PyObject *new_memo = PyDict_New();
4818 if (new_memo == NULL)
4819 return NULL;
4820
4821 memo = self->pickler->memo;
4822 for (size_t i = 0; i < memo->mt_allocated; ++i) {
4823 PyMemoEntry entry = memo->mt_table[i];
4824 if (entry.me_key != NULL) {
4825 int status;
4826 PyObject *key, *value;
4827
4828 key = PyLong_FromVoidPtr(entry.me_key);
4829 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4830
4831 if (key == NULL || value == NULL) {
4832 Py_XDECREF(key);
4833 Py_XDECREF(value);
4834 goto error;
4835 }
4836 status = PyDict_SetItem(new_memo, key, value);
4837 Py_DECREF(key);
4838 Py_DECREF(value);
4839 if (status < 0)
4840 goto error;
4841 }
4842 }
4843 return new_memo;
4844
4845 error:
4846 Py_XDECREF(new_memo);
4847 return NULL;
4848 }
4849
4850 /*[clinic input]
4851 _pickle.PicklerMemoProxy.__reduce__
4852
4853 Implement pickle support.
4854 [clinic start generated code]*/
4855
4856 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4857 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4858 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4859 {
4860 PyObject *reduce_value, *dict_args;
4861 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4862 if (contents == NULL)
4863 return NULL;
4864
4865 reduce_value = PyTuple_New(2);
4866 if (reduce_value == NULL) {
4867 Py_DECREF(contents);
4868 return NULL;
4869 }
4870 dict_args = PyTuple_New(1);
4871 if (dict_args == NULL) {
4872 Py_DECREF(contents);
4873 Py_DECREF(reduce_value);
4874 return NULL;
4875 }
4876 PyTuple_SET_ITEM(dict_args, 0, contents);
4877 Py_INCREF((PyObject *)&PyDict_Type);
4878 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4879 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4880 return reduce_value;
4881 }
4882
4883 static PyMethodDef picklerproxy_methods[] = {
4884 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4885 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4886 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4887 {NULL, NULL} /* sentinel */
4888 };
4889
4890 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4891 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4892 {
4893 PyObject_GC_UnTrack(self);
4894 Py_XDECREF(self->pickler);
4895 PyObject_GC_Del((PyObject *)self);
4896 }
4897
4898 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4899 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4900 visitproc visit, void *arg)
4901 {
4902 Py_VISIT(self->pickler);
4903 return 0;
4904 }
4905
4906 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4907 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4908 {
4909 Py_CLEAR(self->pickler);
4910 return 0;
4911 }
4912
4913 static PyTypeObject PicklerMemoProxyType = {
4914 PyVarObject_HEAD_INIT(NULL, 0)
4915 "_pickle.PicklerMemoProxy", /*tp_name*/
4916 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4917 0,
4918 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
4919 0, /* tp_vectorcall_offset */
4920 0, /* tp_getattr */
4921 0, /* tp_setattr */
4922 0, /* tp_as_async */
4923 0, /* tp_repr */
4924 0, /* tp_as_number */
4925 0, /* tp_as_sequence */
4926 0, /* tp_as_mapping */
4927 PyObject_HashNotImplemented, /* tp_hash */
4928 0, /* tp_call */
4929 0, /* tp_str */
4930 PyObject_GenericGetAttr, /* tp_getattro */
4931 PyObject_GenericSetAttr, /* tp_setattro */
4932 0, /* tp_as_buffer */
4933 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4934 0, /* tp_doc */
4935 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4936 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4937 0, /* tp_richcompare */
4938 0, /* tp_weaklistoffset */
4939 0, /* tp_iter */
4940 0, /* tp_iternext */
4941 picklerproxy_methods, /* tp_methods */
4942 };
4943
4944 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4945 PicklerMemoProxy_New(PicklerObject *pickler)
4946 {
4947 PicklerMemoProxyObject *self;
4948
4949 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4950 if (self == NULL)
4951 return NULL;
4952 Py_INCREF(pickler);
4953 self->pickler = pickler;
4954 PyObject_GC_Track(self);
4955 return (PyObject *)self;
4956 }
4957
4958 /*****************************************************************************/
4959
4960 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4961 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4962 {
4963 return PicklerMemoProxy_New(self);
4964 }
4965
4966 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4967 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4968 {
4969 PyMemoTable *new_memo = NULL;
4970
4971 if (obj == NULL) {
4972 PyErr_SetString(PyExc_TypeError,
4973 "attribute deletion is not supported");
4974 return -1;
4975 }
4976
4977 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4978 PicklerObject *pickler =
4979 ((PicklerMemoProxyObject *)obj)->pickler;
4980
4981 new_memo = PyMemoTable_Copy(pickler->memo);
4982 if (new_memo == NULL)
4983 return -1;
4984 }
4985 else if (PyDict_Check(obj)) {
4986 Py_ssize_t i = 0;
4987 PyObject *key, *value;
4988
4989 new_memo = PyMemoTable_New();
4990 if (new_memo == NULL)
4991 return -1;
4992
4993 while (PyDict_Next(obj, &i, &key, &value)) {
4994 Py_ssize_t memo_id;
4995 PyObject *memo_obj;
4996
4997 if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
4998 PyErr_SetString(PyExc_TypeError,
4999 "'memo' values must be 2-item tuples");
5000 goto error;
5001 }
5002 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
5003 if (memo_id == -1 && PyErr_Occurred())
5004 goto error;
5005 memo_obj = PyTuple_GET_ITEM(value, 1);
5006 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5007 goto error;
5008 }
5009 }
5010 else {
5011 PyErr_Format(PyExc_TypeError,
5012 "'memo' attribute must be a PicklerMemoProxy object "
5013 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
5014 return -1;
5015 }
5016
5017 PyMemoTable_Del(self->memo);
5018 self->memo = new_memo;
5019
5020 return 0;
5021
5022 error:
5023 if (new_memo)
5024 PyMemoTable_Del(new_memo);
5025 return -1;
5026 }
5027
5028 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))5029 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
5030 {
5031 if (self->pers_func == NULL) {
5032 PyErr_SetString(PyExc_AttributeError, "persistent_id");
5033 return NULL;
5034 }
5035 return reconstruct_method(self->pers_func, self->pers_func_self);
5036 }
5037
5038 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))5039 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
5040 {
5041 if (value == NULL) {
5042 PyErr_SetString(PyExc_TypeError,
5043 "attribute deletion is not supported");
5044 return -1;
5045 }
5046 if (!PyCallable_Check(value)) {
5047 PyErr_SetString(PyExc_TypeError,
5048 "persistent_id must be a callable taking one argument");
5049 return -1;
5050 }
5051
5052 self->pers_func_self = NULL;
5053 Py_INCREF(value);
5054 Py_XSETREF(self->pers_func, value);
5055
5056 return 0;
5057 }
5058
5059 static PyMemberDef Pickler_members[] = {
5060 {"bin", T_INT, offsetof(PicklerObject, bin)},
5061 {"fast", T_INT, offsetof(PicklerObject, fast)},
5062 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
5063 {NULL}
5064 };
5065
5066 static PyGetSetDef Pickler_getsets[] = {
5067 {"memo", (getter)Pickler_get_memo,
5068 (setter)Pickler_set_memo},
5069 {"persistent_id", (getter)Pickler_get_persid,
5070 (setter)Pickler_set_persid},
5071 {NULL}
5072 };
5073
5074 static PyTypeObject Pickler_Type = {
5075 PyVarObject_HEAD_INIT(NULL, 0)
5076 "_pickle.Pickler" , /*tp_name*/
5077 sizeof(PicklerObject), /*tp_basicsize*/
5078 0, /*tp_itemsize*/
5079 (destructor)Pickler_dealloc, /*tp_dealloc*/
5080 0, /*tp_vectorcall_offset*/
5081 0, /*tp_getattr*/
5082 0, /*tp_setattr*/
5083 0, /*tp_as_async*/
5084 0, /*tp_repr*/
5085 0, /*tp_as_number*/
5086 0, /*tp_as_sequence*/
5087 0, /*tp_as_mapping*/
5088 0, /*tp_hash*/
5089 0, /*tp_call*/
5090 0, /*tp_str*/
5091 0, /*tp_getattro*/
5092 0, /*tp_setattro*/
5093 0, /*tp_as_buffer*/
5094 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5095 _pickle_Pickler___init____doc__, /*tp_doc*/
5096 (traverseproc)Pickler_traverse, /*tp_traverse*/
5097 (inquiry)Pickler_clear, /*tp_clear*/
5098 0, /*tp_richcompare*/
5099 0, /*tp_weaklistoffset*/
5100 0, /*tp_iter*/
5101 0, /*tp_iternext*/
5102 Pickler_methods, /*tp_methods*/
5103 Pickler_members, /*tp_members*/
5104 Pickler_getsets, /*tp_getset*/
5105 0, /*tp_base*/
5106 0, /*tp_dict*/
5107 0, /*tp_descr_get*/
5108 0, /*tp_descr_set*/
5109 0, /*tp_dictoffset*/
5110 _pickle_Pickler___init__, /*tp_init*/
5111 PyType_GenericAlloc, /*tp_alloc*/
5112 PyType_GenericNew, /*tp_new*/
5113 PyObject_GC_Del, /*tp_free*/
5114 0, /*tp_is_gc*/
5115 };
5116
5117 /* Temporary helper for calling self.find_class().
5118
5119 XXX: It would be nice to able to avoid Python function call overhead, by
5120 using directly the C version of find_class(), when find_class() is not
5121 overridden by a subclass. Although, this could become rather hackish. A
5122 simpler optimization would be to call the C function when self is not a
5123 subclass instance. */
5124 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)5125 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5126 {
5127 _Py_IDENTIFIER(find_class);
5128
5129 return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
5130 module_name, global_name, NULL);
5131 }
5132
5133 static Py_ssize_t
marker(UnpicklerObject * self)5134 marker(UnpicklerObject *self)
5135 {
5136 Py_ssize_t mark;
5137
5138 if (self->num_marks < 1) {
5139 PickleState *st = _Pickle_GetGlobalState();
5140 PyErr_SetString(st->UnpicklingError, "could not find MARK");
5141 return -1;
5142 }
5143
5144 mark = self->marks[--self->num_marks];
5145 self->stack->mark_set = self->num_marks != 0;
5146 self->stack->fence = self->num_marks ?
5147 self->marks[self->num_marks - 1] : 0;
5148 return mark;
5149 }
5150
5151 static int
load_none(UnpicklerObject * self)5152 load_none(UnpicklerObject *self)
5153 {
5154 PDATA_APPEND(self->stack, Py_None, -1);
5155 return 0;
5156 }
5157
5158 static int
load_int(UnpicklerObject * self)5159 load_int(UnpicklerObject *self)
5160 {
5161 PyObject *value;
5162 char *endptr, *s;
5163 Py_ssize_t len;
5164 long x;
5165
5166 if ((len = _Unpickler_Readline(self, &s)) < 0)
5167 return -1;
5168 if (len < 2)
5169 return bad_readline();
5170
5171 errno = 0;
5172 /* XXX: Should the base argument of strtol() be explicitly set to 10?
5173 XXX(avassalotti): Should this uses PyOS_strtol()? */
5174 x = strtol(s, &endptr, 0);
5175
5176 if (errno || (*endptr != '\n' && *endptr != '\0')) {
5177 /* Hm, maybe we've got something long. Let's try reading
5178 * it as a Python int object. */
5179 errno = 0;
5180 /* XXX: Same thing about the base here. */
5181 value = PyLong_FromString(s, NULL, 0);
5182 if (value == NULL) {
5183 PyErr_SetString(PyExc_ValueError,
5184 "could not convert string to int");
5185 return -1;
5186 }
5187 }
5188 else {
5189 if (len == 3 && (x == 0 || x == 1)) {
5190 if ((value = PyBool_FromLong(x)) == NULL)
5191 return -1;
5192 }
5193 else {
5194 if ((value = PyLong_FromLong(x)) == NULL)
5195 return -1;
5196 }
5197 }
5198
5199 PDATA_PUSH(self->stack, value, -1);
5200 return 0;
5201 }
5202
5203 static int
load_bool(UnpicklerObject * self,PyObject * boolean)5204 load_bool(UnpicklerObject *self, PyObject *boolean)
5205 {
5206 assert(boolean == Py_True || boolean == Py_False);
5207 PDATA_APPEND(self->stack, boolean, -1);
5208 return 0;
5209 }
5210
5211 /* s contains x bytes of an unsigned little-endian integer. Return its value
5212 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5213 */
5214 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)5215 calc_binsize(char *bytes, int nbytes)
5216 {
5217 unsigned char *s = (unsigned char *)bytes;
5218 int i;
5219 size_t x = 0;
5220
5221 if (nbytes > (int)sizeof(size_t)) {
5222 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes
5223 * have 64-bit size that can't be represented on 32-bit platform.
5224 */
5225 for (i = (int)sizeof(size_t); i < nbytes; i++) {
5226 if (s[i])
5227 return -1;
5228 }
5229 nbytes = (int)sizeof(size_t);
5230 }
5231 for (i = 0; i < nbytes; i++) {
5232 x |= (size_t) s[i] << (8 * i);
5233 }
5234
5235 if (x > PY_SSIZE_T_MAX)
5236 return -1;
5237 else
5238 return (Py_ssize_t) x;
5239 }
5240
5241 /* s contains x bytes of a little-endian integer. Return its value as a
5242 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
5243 * int, but when x is 4 it's a signed one. This is a historical source
5244 * of x-platform bugs.
5245 */
5246 static long
calc_binint(char * bytes,int nbytes)5247 calc_binint(char *bytes, int nbytes)
5248 {
5249 unsigned char *s = (unsigned char *)bytes;
5250 Py_ssize_t i;
5251 long x = 0;
5252
5253 for (i = 0; i < nbytes; i++) {
5254 x |= (long)s[i] << (8 * i);
5255 }
5256
5257 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5258 * is signed, so on a box with longs bigger than 4 bytes we need
5259 * to extend a BININT's sign bit to the full width.
5260 */
5261 if (SIZEOF_LONG > 4 && nbytes == 4) {
5262 x |= -(x & (1L << 31));
5263 }
5264
5265 return x;
5266 }
5267
5268 static int
load_binintx(UnpicklerObject * self,char * s,int size)5269 load_binintx(UnpicklerObject *self, char *s, int size)
5270 {
5271 PyObject *value;
5272 long x;
5273
5274 x = calc_binint(s, size);
5275
5276 if ((value = PyLong_FromLong(x)) == NULL)
5277 return -1;
5278
5279 PDATA_PUSH(self->stack, value, -1);
5280 return 0;
5281 }
5282
5283 static int
load_binint(UnpicklerObject * self)5284 load_binint(UnpicklerObject *self)
5285 {
5286 char *s;
5287
5288 if (_Unpickler_Read(self, &s, 4) < 0)
5289 return -1;
5290
5291 return load_binintx(self, s, 4);
5292 }
5293
5294 static int
load_binint1(UnpicklerObject * self)5295 load_binint1(UnpicklerObject *self)
5296 {
5297 char *s;
5298
5299 if (_Unpickler_Read(self, &s, 1) < 0)
5300 return -1;
5301
5302 return load_binintx(self, s, 1);
5303 }
5304
5305 static int
load_binint2(UnpicklerObject * self)5306 load_binint2(UnpicklerObject *self)
5307 {
5308 char *s;
5309
5310 if (_Unpickler_Read(self, &s, 2) < 0)
5311 return -1;
5312
5313 return load_binintx(self, s, 2);
5314 }
5315
5316 static int
load_long(UnpicklerObject * self)5317 load_long(UnpicklerObject *self)
5318 {
5319 PyObject *value;
5320 char *s = NULL;
5321 Py_ssize_t len;
5322
5323 if ((len = _Unpickler_Readline(self, &s)) < 0)
5324 return -1;
5325 if (len < 2)
5326 return bad_readline();
5327
5328 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5329 the 'L' before calling PyLong_FromString. In order to maintain
5330 compatibility with Python 3.0.0, we don't actually *require*
5331 the 'L' to be present. */
5332 if (s[len-2] == 'L')
5333 s[len-2] = '\0';
5334 /* XXX: Should the base argument explicitly set to 10? */
5335 value = PyLong_FromString(s, NULL, 0);
5336 if (value == NULL)
5337 return -1;
5338
5339 PDATA_PUSH(self->stack, value, -1);
5340 return 0;
5341 }
5342
5343 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
5344 * data following.
5345 */
5346 static int
load_counted_long(UnpicklerObject * self,int size)5347 load_counted_long(UnpicklerObject *self, int size)
5348 {
5349 PyObject *value;
5350 char *nbytes;
5351 char *pdata;
5352
5353 assert(size == 1 || size == 4);
5354 if (_Unpickler_Read(self, &nbytes, size) < 0)
5355 return -1;
5356
5357 size = calc_binint(nbytes, size);
5358 if (size < 0) {
5359 PickleState *st = _Pickle_GetGlobalState();
5360 /* Corrupt or hostile pickle -- we never write one like this */
5361 PyErr_SetString(st->UnpicklingError,
5362 "LONG pickle has negative byte count");
5363 return -1;
5364 }
5365
5366 if (size == 0)
5367 value = PyLong_FromLong(0L);
5368 else {
5369 /* Read the raw little-endian bytes and convert. */
5370 if (_Unpickler_Read(self, &pdata, size) < 0)
5371 return -1;
5372 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5373 1 /* little endian */ , 1 /* signed */ );
5374 }
5375 if (value == NULL)
5376 return -1;
5377 PDATA_PUSH(self->stack, value, -1);
5378 return 0;
5379 }
5380
5381 static int
load_float(UnpicklerObject * self)5382 load_float(UnpicklerObject *self)
5383 {
5384 PyObject *value;
5385 char *endptr, *s;
5386 Py_ssize_t len;
5387 double d;
5388
5389 if ((len = _Unpickler_Readline(self, &s)) < 0)
5390 return -1;
5391 if (len < 2)
5392 return bad_readline();
5393
5394 errno = 0;
5395 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5396 if (d == -1.0 && PyErr_Occurred())
5397 return -1;
5398 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5399 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5400 return -1;
5401 }
5402 value = PyFloat_FromDouble(d);
5403 if (value == NULL)
5404 return -1;
5405
5406 PDATA_PUSH(self->stack, value, -1);
5407 return 0;
5408 }
5409
5410 static int
load_binfloat(UnpicklerObject * self)5411 load_binfloat(UnpicklerObject *self)
5412 {
5413 PyObject *value;
5414 double x;
5415 char *s;
5416
5417 if (_Unpickler_Read(self, &s, 8) < 0)
5418 return -1;
5419
5420 x = _PyFloat_Unpack8((unsigned char *)s, 0);
5421 if (x == -1.0 && PyErr_Occurred())
5422 return -1;
5423
5424 if ((value = PyFloat_FromDouble(x)) == NULL)
5425 return -1;
5426
5427 PDATA_PUSH(self->stack, value, -1);
5428 return 0;
5429 }
5430
5431 static int
load_string(UnpicklerObject * self)5432 load_string(UnpicklerObject *self)
5433 {
5434 PyObject *bytes;
5435 PyObject *obj;
5436 Py_ssize_t len;
5437 char *s, *p;
5438
5439 if ((len = _Unpickler_Readline(self, &s)) < 0)
5440 return -1;
5441 /* Strip the newline */
5442 len--;
5443 /* Strip outermost quotes */
5444 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5445 p = s + 1;
5446 len -= 2;
5447 }
5448 else {
5449 PickleState *st = _Pickle_GetGlobalState();
5450 PyErr_SetString(st->UnpicklingError,
5451 "the STRING opcode argument must be quoted");
5452 return -1;
5453 }
5454 assert(len >= 0);
5455
5456 /* Use the PyBytes API to decode the string, since that is what is used
5457 to encode, and then coerce the result to Unicode. */
5458 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5459 if (bytes == NULL)
5460 return -1;
5461
5462 /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5463 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5464 if (strcmp(self->encoding, "bytes") == 0) {
5465 obj = bytes;
5466 }
5467 else {
5468 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5469 Py_DECREF(bytes);
5470 if (obj == NULL) {
5471 return -1;
5472 }
5473 }
5474
5475 PDATA_PUSH(self->stack, obj, -1);
5476 return 0;
5477 }
5478
5479 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5480 load_counted_binstring(UnpicklerObject *self, int nbytes)
5481 {
5482 PyObject *obj;
5483 Py_ssize_t size;
5484 char *s;
5485
5486 if (_Unpickler_Read(self, &s, nbytes) < 0)
5487 return -1;
5488
5489 size = calc_binsize(s, nbytes);
5490 if (size < 0) {
5491 PickleState *st = _Pickle_GetGlobalState();
5492 PyErr_Format(st->UnpicklingError,
5493 "BINSTRING exceeds system's maximum size of %zd bytes",
5494 PY_SSIZE_T_MAX);
5495 return -1;
5496 }
5497
5498 if (_Unpickler_Read(self, &s, size) < 0)
5499 return -1;
5500
5501 /* Convert Python 2.x strings to bytes if the *encoding* given to the
5502 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5503 if (strcmp(self->encoding, "bytes") == 0) {
5504 obj = PyBytes_FromStringAndSize(s, size);
5505 }
5506 else {
5507 obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5508 }
5509 if (obj == NULL) {
5510 return -1;
5511 }
5512
5513 PDATA_PUSH(self->stack, obj, -1);
5514 return 0;
5515 }
5516
5517 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5518 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5519 {
5520 PyObject *bytes;
5521 Py_ssize_t size;
5522 char *s;
5523
5524 if (_Unpickler_Read(self, &s, nbytes) < 0)
5525 return -1;
5526
5527 size = calc_binsize(s, nbytes);
5528 if (size < 0) {
5529 PyErr_Format(PyExc_OverflowError,
5530 "BINBYTES exceeds system's maximum size of %zd bytes",
5531 PY_SSIZE_T_MAX);
5532 return -1;
5533 }
5534
5535 bytes = PyBytes_FromStringAndSize(NULL, size);
5536 if (bytes == NULL)
5537 return -1;
5538 if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5539 Py_DECREF(bytes);
5540 return -1;
5541 }
5542
5543 PDATA_PUSH(self->stack, bytes, -1);
5544 return 0;
5545 }
5546
5547 static int
load_counted_bytearray(UnpicklerObject * self)5548 load_counted_bytearray(UnpicklerObject *self)
5549 {
5550 PyObject *bytearray;
5551 Py_ssize_t size;
5552 char *s;
5553
5554 if (_Unpickler_Read(self, &s, 8) < 0) {
5555 return -1;
5556 }
5557
5558 size = calc_binsize(s, 8);
5559 if (size < 0) {
5560 PyErr_Format(PyExc_OverflowError,
5561 "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5562 PY_SSIZE_T_MAX);
5563 return -1;
5564 }
5565
5566 bytearray = PyByteArray_FromStringAndSize(NULL, size);
5567 if (bytearray == NULL) {
5568 return -1;
5569 }
5570 if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5571 Py_DECREF(bytearray);
5572 return -1;
5573 }
5574
5575 PDATA_PUSH(self->stack, bytearray, -1);
5576 return 0;
5577 }
5578
5579 static int
load_next_buffer(UnpicklerObject * self)5580 load_next_buffer(UnpicklerObject *self)
5581 {
5582 if (self->buffers == NULL) {
5583 PickleState *st = _Pickle_GetGlobalState();
5584 PyErr_SetString(st->UnpicklingError,
5585 "pickle stream refers to out-of-band data "
5586 "but no *buffers* argument was given");
5587 return -1;
5588 }
5589 PyObject *buf = PyIter_Next(self->buffers);
5590 if (buf == NULL) {
5591 if (!PyErr_Occurred()) {
5592 PickleState *st = _Pickle_GetGlobalState();
5593 PyErr_SetString(st->UnpicklingError,
5594 "not enough out-of-band buffers");
5595 }
5596 return -1;
5597 }
5598
5599 PDATA_PUSH(self->stack, buf, -1);
5600 return 0;
5601 }
5602
5603 static int
load_readonly_buffer(UnpicklerObject * self)5604 load_readonly_buffer(UnpicklerObject *self)
5605 {
5606 Py_ssize_t len = Py_SIZE(self->stack);
5607 if (len <= self->stack->fence) {
5608 return Pdata_stack_underflow(self->stack);
5609 }
5610
5611 PyObject *obj = self->stack->data[len - 1];
5612 PyObject *view = PyMemoryView_FromObject(obj);
5613 if (view == NULL) {
5614 return -1;
5615 }
5616 if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5617 /* Original object is writable */
5618 PyMemoryView_GET_BUFFER(view)->readonly = 1;
5619 self->stack->data[len - 1] = view;
5620 Py_DECREF(obj);
5621 }
5622 else {
5623 /* Original object is read-only, no need to replace it */
5624 Py_DECREF(view);
5625 }
5626 return 0;
5627 }
5628
5629 static int
load_unicode(UnpicklerObject * self)5630 load_unicode(UnpicklerObject *self)
5631 {
5632 PyObject *str;
5633 Py_ssize_t len;
5634 char *s = NULL;
5635
5636 if ((len = _Unpickler_Readline(self, &s)) < 0)
5637 return -1;
5638 if (len < 1)
5639 return bad_readline();
5640
5641 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5642 if (str == NULL)
5643 return -1;
5644
5645 PDATA_PUSH(self->stack, str, -1);
5646 return 0;
5647 }
5648
5649 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5650 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5651 {
5652 PyObject *str;
5653 Py_ssize_t size;
5654 char *s;
5655
5656 if (_Unpickler_Read(self, &s, nbytes) < 0)
5657 return -1;
5658
5659 size = calc_binsize(s, nbytes);
5660 if (size < 0) {
5661 PyErr_Format(PyExc_OverflowError,
5662 "BINUNICODE exceeds system's maximum size of %zd bytes",
5663 PY_SSIZE_T_MAX);
5664 return -1;
5665 }
5666
5667 if (_Unpickler_Read(self, &s, size) < 0)
5668 return -1;
5669
5670 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5671 if (str == NULL)
5672 return -1;
5673
5674 PDATA_PUSH(self->stack, str, -1);
5675 return 0;
5676 }
5677
5678 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5679 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5680 {
5681 PyObject *tuple;
5682
5683 if (Py_SIZE(self->stack) < len)
5684 return Pdata_stack_underflow(self->stack);
5685
5686 tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5687 if (tuple == NULL)
5688 return -1;
5689 PDATA_PUSH(self->stack, tuple, -1);
5690 return 0;
5691 }
5692
5693 static int
load_tuple(UnpicklerObject * self)5694 load_tuple(UnpicklerObject *self)
5695 {
5696 Py_ssize_t i;
5697
5698 if ((i = marker(self)) < 0)
5699 return -1;
5700
5701 return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5702 }
5703
5704 static int
load_empty_list(UnpicklerObject * self)5705 load_empty_list(UnpicklerObject *self)
5706 {
5707 PyObject *list;
5708
5709 if ((list = PyList_New(0)) == NULL)
5710 return -1;
5711 PDATA_PUSH(self->stack, list, -1);
5712 return 0;
5713 }
5714
5715 static int
load_empty_dict(UnpicklerObject * self)5716 load_empty_dict(UnpicklerObject *self)
5717 {
5718 PyObject *dict;
5719
5720 if ((dict = PyDict_New()) == NULL)
5721 return -1;
5722 PDATA_PUSH(self->stack, dict, -1);
5723 return 0;
5724 }
5725
5726 static int
load_empty_set(UnpicklerObject * self)5727 load_empty_set(UnpicklerObject *self)
5728 {
5729 PyObject *set;
5730
5731 if ((set = PySet_New(NULL)) == NULL)
5732 return -1;
5733 PDATA_PUSH(self->stack, set, -1);
5734 return 0;
5735 }
5736
5737 static int
load_list(UnpicklerObject * self)5738 load_list(UnpicklerObject *self)
5739 {
5740 PyObject *list;
5741 Py_ssize_t i;
5742
5743 if ((i = marker(self)) < 0)
5744 return -1;
5745
5746 list = Pdata_poplist(self->stack, i);
5747 if (list == NULL)
5748 return -1;
5749 PDATA_PUSH(self->stack, list, -1);
5750 return 0;
5751 }
5752
5753 static int
load_dict(UnpicklerObject * self)5754 load_dict(UnpicklerObject *self)
5755 {
5756 PyObject *dict, *key, *value;
5757 Py_ssize_t i, j, k;
5758
5759 if ((i = marker(self)) < 0)
5760 return -1;
5761 j = Py_SIZE(self->stack);
5762
5763 if ((dict = PyDict_New()) == NULL)
5764 return -1;
5765
5766 if ((j - i) % 2 != 0) {
5767 PickleState *st = _Pickle_GetGlobalState();
5768 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5769 Py_DECREF(dict);
5770 return -1;
5771 }
5772
5773 for (k = i + 1; k < j; k += 2) {
5774 key = self->stack->data[k - 1];
5775 value = self->stack->data[k];
5776 if (PyDict_SetItem(dict, key, value) < 0) {
5777 Py_DECREF(dict);
5778 return -1;
5779 }
5780 }
5781 Pdata_clear(self->stack, i);
5782 PDATA_PUSH(self->stack, dict, -1);
5783 return 0;
5784 }
5785
5786 static int
load_frozenset(UnpicklerObject * self)5787 load_frozenset(UnpicklerObject *self)
5788 {
5789 PyObject *items;
5790 PyObject *frozenset;
5791 Py_ssize_t i;
5792
5793 if ((i = marker(self)) < 0)
5794 return -1;
5795
5796 items = Pdata_poptuple(self->stack, i);
5797 if (items == NULL)
5798 return -1;
5799
5800 frozenset = PyFrozenSet_New(items);
5801 Py_DECREF(items);
5802 if (frozenset == NULL)
5803 return -1;
5804
5805 PDATA_PUSH(self->stack, frozenset, -1);
5806 return 0;
5807 }
5808
5809 static PyObject *
instantiate(PyObject * cls,PyObject * args)5810 instantiate(PyObject *cls, PyObject *args)
5811 {
5812 /* Caller must assure args are a tuple. Normally, args come from
5813 Pdata_poptuple which packs objects from the top of the stack
5814 into a newly created tuple. */
5815 assert(PyTuple_Check(args));
5816 if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5817 _Py_IDENTIFIER(__getinitargs__);
5818 _Py_IDENTIFIER(__new__);
5819 PyObject *func;
5820 if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5821 return NULL;
5822 }
5823 if (func == NULL) {
5824 return _PyObject_CallMethodIdObjArgs(cls, &PyId___new__, cls, NULL);
5825 }
5826 Py_DECREF(func);
5827 }
5828 return PyObject_CallObject(cls, args);
5829 }
5830
5831 static int
load_obj(UnpicklerObject * self)5832 load_obj(UnpicklerObject *self)
5833 {
5834 PyObject *cls, *args, *obj = NULL;
5835 Py_ssize_t i;
5836
5837 if ((i = marker(self)) < 0)
5838 return -1;
5839
5840 if (Py_SIZE(self->stack) - i < 1)
5841 return Pdata_stack_underflow(self->stack);
5842
5843 args = Pdata_poptuple(self->stack, i + 1);
5844 if (args == NULL)
5845 return -1;
5846
5847 PDATA_POP(self->stack, cls);
5848 if (cls) {
5849 obj = instantiate(cls, args);
5850 Py_DECREF(cls);
5851 }
5852 Py_DECREF(args);
5853 if (obj == NULL)
5854 return -1;
5855
5856 PDATA_PUSH(self->stack, obj, -1);
5857 return 0;
5858 }
5859
5860 static int
load_inst(UnpicklerObject * self)5861 load_inst(UnpicklerObject *self)
5862 {
5863 PyObject *cls = NULL;
5864 PyObject *args = NULL;
5865 PyObject *obj = NULL;
5866 PyObject *module_name;
5867 PyObject *class_name;
5868 Py_ssize_t len;
5869 Py_ssize_t i;
5870 char *s;
5871
5872 if ((i = marker(self)) < 0)
5873 return -1;
5874 if ((len = _Unpickler_Readline(self, &s)) < 0)
5875 return -1;
5876 if (len < 2)
5877 return bad_readline();
5878
5879 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5880 identifiers are permitted in Python 3.0, since the INST opcode is only
5881 supported by older protocols on Python 2.x. */
5882 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5883 if (module_name == NULL)
5884 return -1;
5885
5886 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5887 if (len < 2) {
5888 Py_DECREF(module_name);
5889 return bad_readline();
5890 }
5891 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5892 if (class_name != NULL) {
5893 cls = find_class(self, module_name, class_name);
5894 Py_DECREF(class_name);
5895 }
5896 }
5897 Py_DECREF(module_name);
5898
5899 if (cls == NULL)
5900 return -1;
5901
5902 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5903 obj = instantiate(cls, args);
5904 Py_DECREF(args);
5905 }
5906 Py_DECREF(cls);
5907
5908 if (obj == NULL)
5909 return -1;
5910
5911 PDATA_PUSH(self->stack, obj, -1);
5912 return 0;
5913 }
5914
5915 static int
load_newobj(UnpicklerObject * self)5916 load_newobj(UnpicklerObject *self)
5917 {
5918 PyObject *args = NULL;
5919 PyObject *clsraw = NULL;
5920 PyTypeObject *cls; /* clsraw cast to its true type */
5921 PyObject *obj;
5922 PickleState *st = _Pickle_GetGlobalState();
5923
5924 /* Stack is ... cls argtuple, and we want to call
5925 * cls.__new__(cls, *argtuple).
5926 */
5927 PDATA_POP(self->stack, args);
5928 if (args == NULL)
5929 goto error;
5930 if (!PyTuple_Check(args)) {
5931 PyErr_SetString(st->UnpicklingError,
5932 "NEWOBJ expected an arg " "tuple.");
5933 goto error;
5934 }
5935
5936 PDATA_POP(self->stack, clsraw);
5937 cls = (PyTypeObject *)clsraw;
5938 if (cls == NULL)
5939 goto error;
5940 if (!PyType_Check(cls)) {
5941 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5942 "isn't a type object");
5943 goto error;
5944 }
5945 if (cls->tp_new == NULL) {
5946 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
5947 "has NULL tp_new");
5948 goto error;
5949 }
5950
5951 /* Call __new__. */
5952 obj = cls->tp_new(cls, args, NULL);
5953 if (obj == NULL)
5954 goto error;
5955
5956 Py_DECREF(args);
5957 Py_DECREF(clsraw);
5958 PDATA_PUSH(self->stack, obj, -1);
5959 return 0;
5960
5961 error:
5962 Py_XDECREF(args);
5963 Py_XDECREF(clsraw);
5964 return -1;
5965 }
5966
5967 static int
load_newobj_ex(UnpicklerObject * self)5968 load_newobj_ex(UnpicklerObject *self)
5969 {
5970 PyObject *cls, *args, *kwargs;
5971 PyObject *obj;
5972 PickleState *st = _Pickle_GetGlobalState();
5973
5974 PDATA_POP(self->stack, kwargs);
5975 if (kwargs == NULL) {
5976 return -1;
5977 }
5978 PDATA_POP(self->stack, args);
5979 if (args == NULL) {
5980 Py_DECREF(kwargs);
5981 return -1;
5982 }
5983 PDATA_POP(self->stack, cls);
5984 if (cls == NULL) {
5985 Py_DECREF(kwargs);
5986 Py_DECREF(args);
5987 return -1;
5988 }
5989
5990 if (!PyType_Check(cls)) {
5991 PyErr_Format(st->UnpicklingError,
5992 "NEWOBJ_EX class argument must be a type, not %.200s",
5993 Py_TYPE(cls)->tp_name);
5994 goto error;
5995 }
5996
5997 if (((PyTypeObject *)cls)->tp_new == NULL) {
5998 PyErr_SetString(st->UnpicklingError,
5999 "NEWOBJ_EX class argument doesn't have __new__");
6000 goto error;
6001 }
6002 if (!PyTuple_Check(args)) {
6003 PyErr_Format(st->UnpicklingError,
6004 "NEWOBJ_EX args argument must be a tuple, not %.200s",
6005 Py_TYPE(args)->tp_name);
6006 goto error;
6007 }
6008 if (!PyDict_Check(kwargs)) {
6009 PyErr_Format(st->UnpicklingError,
6010 "NEWOBJ_EX kwargs argument must be a dict, not %.200s",
6011 Py_TYPE(kwargs)->tp_name);
6012 goto error;
6013 }
6014
6015 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
6016 Py_DECREF(kwargs);
6017 Py_DECREF(args);
6018 Py_DECREF(cls);
6019 if (obj == NULL) {
6020 return -1;
6021 }
6022 PDATA_PUSH(self->stack, obj, -1);
6023 return 0;
6024
6025 error:
6026 Py_DECREF(kwargs);
6027 Py_DECREF(args);
6028 Py_DECREF(cls);
6029 return -1;
6030 }
6031
6032 static int
load_global(UnpicklerObject * self)6033 load_global(UnpicklerObject *self)
6034 {
6035 PyObject *global = NULL;
6036 PyObject *module_name;
6037 PyObject *global_name;
6038 Py_ssize_t len;
6039 char *s;
6040
6041 if ((len = _Unpickler_Readline(self, &s)) < 0)
6042 return -1;
6043 if (len < 2)
6044 return bad_readline();
6045 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6046 if (!module_name)
6047 return -1;
6048
6049 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
6050 if (len < 2) {
6051 Py_DECREF(module_name);
6052 return bad_readline();
6053 }
6054 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6055 if (global_name) {
6056 global = find_class(self, module_name, global_name);
6057 Py_DECREF(global_name);
6058 }
6059 }
6060 Py_DECREF(module_name);
6061
6062 if (global == NULL)
6063 return -1;
6064 PDATA_PUSH(self->stack, global, -1);
6065 return 0;
6066 }
6067
6068 static int
load_stack_global(UnpicklerObject * self)6069 load_stack_global(UnpicklerObject *self)
6070 {
6071 PyObject *global;
6072 PyObject *module_name;
6073 PyObject *global_name;
6074
6075 PDATA_POP(self->stack, global_name);
6076 PDATA_POP(self->stack, module_name);
6077 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6078 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
6079 PickleState *st = _Pickle_GetGlobalState();
6080 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
6081 Py_XDECREF(global_name);
6082 Py_XDECREF(module_name);
6083 return -1;
6084 }
6085 global = find_class(self, module_name, global_name);
6086 Py_DECREF(global_name);
6087 Py_DECREF(module_name);
6088 if (global == NULL)
6089 return -1;
6090 PDATA_PUSH(self->stack, global, -1);
6091 return 0;
6092 }
6093
6094 static int
load_persid(UnpicklerObject * self)6095 load_persid(UnpicklerObject *self)
6096 {
6097 PyObject *pid, *obj;
6098 Py_ssize_t len;
6099 char *s;
6100
6101 if (self->pers_func) {
6102 if ((len = _Unpickler_Readline(self, &s)) < 0)
6103 return -1;
6104 if (len < 1)
6105 return bad_readline();
6106
6107 pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6108 if (pid == NULL) {
6109 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6110 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6111 "persistent IDs in protocol 0 must be "
6112 "ASCII strings");
6113 }
6114 return -1;
6115 }
6116
6117 obj = call_method(self->pers_func, self->pers_func_self, pid);
6118 Py_DECREF(pid);
6119 if (obj == NULL)
6120 return -1;
6121
6122 PDATA_PUSH(self->stack, obj, -1);
6123 return 0;
6124 }
6125 else {
6126 PickleState *st = _Pickle_GetGlobalState();
6127 PyErr_SetString(st->UnpicklingError,
6128 "A load persistent id instruction was encountered,\n"
6129 "but no persistent_load function was specified.");
6130 return -1;
6131 }
6132 }
6133
6134 static int
load_binpersid(UnpicklerObject * self)6135 load_binpersid(UnpicklerObject *self)
6136 {
6137 PyObject *pid, *obj;
6138
6139 if (self->pers_func) {
6140 PDATA_POP(self->stack, pid);
6141 if (pid == NULL)
6142 return -1;
6143
6144 obj = call_method(self->pers_func, self->pers_func_self, pid);
6145 Py_DECREF(pid);
6146 if (obj == NULL)
6147 return -1;
6148
6149 PDATA_PUSH(self->stack, obj, -1);
6150 return 0;
6151 }
6152 else {
6153 PickleState *st = _Pickle_GetGlobalState();
6154 PyErr_SetString(st->UnpicklingError,
6155 "A load persistent id instruction was encountered,\n"
6156 "but no persistent_load function was specified.");
6157 return -1;
6158 }
6159 }
6160
6161 static int
load_pop(UnpicklerObject * self)6162 load_pop(UnpicklerObject *self)
6163 {
6164 Py_ssize_t len = Py_SIZE(self->stack);
6165
6166 /* Note that we split the (pickle.py) stack into two stacks,
6167 * an object stack and a mark stack. We have to be clever and
6168 * pop the right one. We do this by looking at the top of the
6169 * mark stack first, and only signalling a stack underflow if
6170 * the object stack is empty and the mark stack doesn't match
6171 * our expectations.
6172 */
6173 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
6174 self->num_marks--;
6175 self->stack->mark_set = self->num_marks != 0;
6176 self->stack->fence = self->num_marks ?
6177 self->marks[self->num_marks - 1] : 0;
6178 } else if (len <= self->stack->fence)
6179 return Pdata_stack_underflow(self->stack);
6180 else {
6181 len--;
6182 Py_DECREF(self->stack->data[len]);
6183 Py_SIZE(self->stack) = len;
6184 }
6185 return 0;
6186 }
6187
6188 static int
load_pop_mark(UnpicklerObject * self)6189 load_pop_mark(UnpicklerObject *self)
6190 {
6191 Py_ssize_t i;
6192
6193 if ((i = marker(self)) < 0)
6194 return -1;
6195
6196 Pdata_clear(self->stack, i);
6197
6198 return 0;
6199 }
6200
6201 static int
load_dup(UnpicklerObject * self)6202 load_dup(UnpicklerObject *self)
6203 {
6204 PyObject *last;
6205 Py_ssize_t len = Py_SIZE(self->stack);
6206
6207 if (len <= self->stack->fence)
6208 return Pdata_stack_underflow(self->stack);
6209 last = self->stack->data[len - 1];
6210 PDATA_APPEND(self->stack, last, -1);
6211 return 0;
6212 }
6213
6214 static int
load_get(UnpicklerObject * self)6215 load_get(UnpicklerObject *self)
6216 {
6217 PyObject *key, *value;
6218 Py_ssize_t idx;
6219 Py_ssize_t len;
6220 char *s;
6221
6222 if ((len = _Unpickler_Readline(self, &s)) < 0)
6223 return -1;
6224 if (len < 2)
6225 return bad_readline();
6226
6227 key = PyLong_FromString(s, NULL, 10);
6228 if (key == NULL)
6229 return -1;
6230 idx = PyLong_AsSsize_t(key);
6231 if (idx == -1 && PyErr_Occurred()) {
6232 Py_DECREF(key);
6233 return -1;
6234 }
6235
6236 value = _Unpickler_MemoGet(self, idx);
6237 if (value == NULL) {
6238 if (!PyErr_Occurred())
6239 PyErr_SetObject(PyExc_KeyError, key);
6240 Py_DECREF(key);
6241 return -1;
6242 }
6243 Py_DECREF(key);
6244
6245 PDATA_APPEND(self->stack, value, -1);
6246 return 0;
6247 }
6248
6249 static int
load_binget(UnpicklerObject * self)6250 load_binget(UnpicklerObject *self)
6251 {
6252 PyObject *value;
6253 Py_ssize_t idx;
6254 char *s;
6255
6256 if (_Unpickler_Read(self, &s, 1) < 0)
6257 return -1;
6258
6259 idx = Py_CHARMASK(s[0]);
6260
6261 value = _Unpickler_MemoGet(self, idx);
6262 if (value == NULL) {
6263 PyObject *key = PyLong_FromSsize_t(idx);
6264 if (key != NULL) {
6265 PyErr_SetObject(PyExc_KeyError, key);
6266 Py_DECREF(key);
6267 }
6268 return -1;
6269 }
6270
6271 PDATA_APPEND(self->stack, value, -1);
6272 return 0;
6273 }
6274
6275 static int
load_long_binget(UnpicklerObject * self)6276 load_long_binget(UnpicklerObject *self)
6277 {
6278 PyObject *value;
6279 Py_ssize_t idx;
6280 char *s;
6281
6282 if (_Unpickler_Read(self, &s, 4) < 0)
6283 return -1;
6284
6285 idx = calc_binsize(s, 4);
6286
6287 value = _Unpickler_MemoGet(self, idx);
6288 if (value == NULL) {
6289 PyObject *key = PyLong_FromSsize_t(idx);
6290 if (key != NULL) {
6291 PyErr_SetObject(PyExc_KeyError, key);
6292 Py_DECREF(key);
6293 }
6294 return -1;
6295 }
6296
6297 PDATA_APPEND(self->stack, value, -1);
6298 return 0;
6299 }
6300
6301 /* Push an object from the extension registry (EXT[124]). nbytes is
6302 * the number of bytes following the opcode, holding the index (code) value.
6303 */
6304 static int
load_extension(UnpicklerObject * self,int nbytes)6305 load_extension(UnpicklerObject *self, int nbytes)
6306 {
6307 char *codebytes; /* the nbytes bytes after the opcode */
6308 long code; /* calc_binint returns long */
6309 PyObject *py_code; /* code as a Python int */
6310 PyObject *obj; /* the object to push */
6311 PyObject *pair; /* (module_name, class_name) */
6312 PyObject *module_name, *class_name;
6313 PickleState *st = _Pickle_GetGlobalState();
6314
6315 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
6316 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
6317 return -1;
6318 code = calc_binint(codebytes, nbytes);
6319 if (code <= 0) { /* note that 0 is forbidden */
6320 /* Corrupt or hostile pickle. */
6321 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
6322 return -1;
6323 }
6324
6325 /* Look for the code in the cache. */
6326 py_code = PyLong_FromLong(code);
6327 if (py_code == NULL)
6328 return -1;
6329 obj = PyDict_GetItemWithError(st->extension_cache, py_code);
6330 if (obj != NULL) {
6331 /* Bingo. */
6332 Py_DECREF(py_code);
6333 PDATA_APPEND(self->stack, obj, -1);
6334 return 0;
6335 }
6336 if (PyErr_Occurred()) {
6337 Py_DECREF(py_code);
6338 return -1;
6339 }
6340
6341 /* Look up the (module_name, class_name) pair. */
6342 pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
6343 if (pair == NULL) {
6344 Py_DECREF(py_code);
6345 if (!PyErr_Occurred()) {
6346 PyErr_Format(PyExc_ValueError, "unregistered extension "
6347 "code %ld", code);
6348 }
6349 return -1;
6350 }
6351 /* Since the extension registry is manipulable via Python code,
6352 * confirm that pair is really a 2-tuple of strings.
6353 */
6354 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6355 goto error;
6356 }
6357
6358 module_name = PyTuple_GET_ITEM(pair, 0);
6359 if (!PyUnicode_Check(module_name)) {
6360 goto error;
6361 }
6362
6363 class_name = PyTuple_GET_ITEM(pair, 1);
6364 if (!PyUnicode_Check(class_name)) {
6365 goto error;
6366 }
6367
6368 /* Load the object. */
6369 obj = find_class(self, module_name, class_name);
6370 if (obj == NULL) {
6371 Py_DECREF(py_code);
6372 return -1;
6373 }
6374 /* Cache code -> obj. */
6375 code = PyDict_SetItem(st->extension_cache, py_code, obj);
6376 Py_DECREF(py_code);
6377 if (code < 0) {
6378 Py_DECREF(obj);
6379 return -1;
6380 }
6381 PDATA_PUSH(self->stack, obj, -1);
6382 return 0;
6383
6384 error:
6385 Py_DECREF(py_code);
6386 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6387 "isn't a 2-tuple of strings", code);
6388 return -1;
6389 }
6390
6391 static int
load_put(UnpicklerObject * self)6392 load_put(UnpicklerObject *self)
6393 {
6394 PyObject *key, *value;
6395 Py_ssize_t idx;
6396 Py_ssize_t len;
6397 char *s = NULL;
6398
6399 if ((len = _Unpickler_Readline(self, &s)) < 0)
6400 return -1;
6401 if (len < 2)
6402 return bad_readline();
6403 if (Py_SIZE(self->stack) <= self->stack->fence)
6404 return Pdata_stack_underflow(self->stack);
6405 value = self->stack->data[Py_SIZE(self->stack) - 1];
6406
6407 key = PyLong_FromString(s, NULL, 10);
6408 if (key == NULL)
6409 return -1;
6410 idx = PyLong_AsSsize_t(key);
6411 Py_DECREF(key);
6412 if (idx < 0) {
6413 if (!PyErr_Occurred())
6414 PyErr_SetString(PyExc_ValueError,
6415 "negative PUT argument");
6416 return -1;
6417 }
6418
6419 return _Unpickler_MemoPut(self, idx, value);
6420 }
6421
6422 static int
load_binput(UnpicklerObject * self)6423 load_binput(UnpicklerObject *self)
6424 {
6425 PyObject *value;
6426 Py_ssize_t idx;
6427 char *s;
6428
6429 if (_Unpickler_Read(self, &s, 1) < 0)
6430 return -1;
6431
6432 if (Py_SIZE(self->stack) <= self->stack->fence)
6433 return Pdata_stack_underflow(self->stack);
6434 value = self->stack->data[Py_SIZE(self->stack) - 1];
6435
6436 idx = Py_CHARMASK(s[0]);
6437
6438 return _Unpickler_MemoPut(self, idx, value);
6439 }
6440
6441 static int
load_long_binput(UnpicklerObject * self)6442 load_long_binput(UnpicklerObject *self)
6443 {
6444 PyObject *value;
6445 Py_ssize_t idx;
6446 char *s;
6447
6448 if (_Unpickler_Read(self, &s, 4) < 0)
6449 return -1;
6450
6451 if (Py_SIZE(self->stack) <= self->stack->fence)
6452 return Pdata_stack_underflow(self->stack);
6453 value = self->stack->data[Py_SIZE(self->stack) - 1];
6454
6455 idx = calc_binsize(s, 4);
6456 if (idx < 0) {
6457 PyErr_SetString(PyExc_ValueError,
6458 "negative LONG_BINPUT argument");
6459 return -1;
6460 }
6461
6462 return _Unpickler_MemoPut(self, idx, value);
6463 }
6464
6465 static int
load_memoize(UnpicklerObject * self)6466 load_memoize(UnpicklerObject *self)
6467 {
6468 PyObject *value;
6469
6470 if (Py_SIZE(self->stack) <= self->stack->fence)
6471 return Pdata_stack_underflow(self->stack);
6472 value = self->stack->data[Py_SIZE(self->stack) - 1];
6473
6474 return _Unpickler_MemoPut(self, self->memo_len, value);
6475 }
6476
6477 static int
do_append(UnpicklerObject * self,Py_ssize_t x)6478 do_append(UnpicklerObject *self, Py_ssize_t x)
6479 {
6480 PyObject *value;
6481 PyObject *slice;
6482 PyObject *list;
6483 PyObject *result;
6484 Py_ssize_t len, i;
6485
6486 len = Py_SIZE(self->stack);
6487 if (x > len || x <= self->stack->fence)
6488 return Pdata_stack_underflow(self->stack);
6489 if (len == x) /* nothing to do */
6490 return 0;
6491
6492 list = self->stack->data[x - 1];
6493
6494 if (PyList_CheckExact(list)) {
6495 Py_ssize_t list_len;
6496 int ret;
6497
6498 slice = Pdata_poplist(self->stack, x);
6499 if (!slice)
6500 return -1;
6501 list_len = PyList_GET_SIZE(list);
6502 ret = PyList_SetSlice(list, list_len, list_len, slice);
6503 Py_DECREF(slice);
6504 return ret;
6505 }
6506 else {
6507 PyObject *extend_func;
6508 _Py_IDENTIFIER(extend);
6509
6510 if (_PyObject_LookupAttrId(list, &PyId_extend, &extend_func) < 0) {
6511 return -1;
6512 }
6513 if (extend_func != NULL) {
6514 slice = Pdata_poplist(self->stack, x);
6515 if (!slice) {
6516 Py_DECREF(extend_func);
6517 return -1;
6518 }
6519 result = _Pickle_FastCall(extend_func, slice);
6520 Py_DECREF(extend_func);
6521 if (result == NULL)
6522 return -1;
6523 Py_DECREF(result);
6524 }
6525 else {
6526 PyObject *append_func;
6527 _Py_IDENTIFIER(append);
6528
6529 /* Even if the PEP 307 requires extend() and append() methods,
6530 fall back on append() if the object has no extend() method
6531 for backward compatibility. */
6532 append_func = _PyObject_GetAttrId(list, &PyId_append);
6533 if (append_func == NULL)
6534 return -1;
6535 for (i = x; i < len; i++) {
6536 value = self->stack->data[i];
6537 result = _Pickle_FastCall(append_func, value);
6538 if (result == NULL) {
6539 Pdata_clear(self->stack, i + 1);
6540 Py_SIZE(self->stack) = x;
6541 Py_DECREF(append_func);
6542 return -1;
6543 }
6544 Py_DECREF(result);
6545 }
6546 Py_SIZE(self->stack) = x;
6547 Py_DECREF(append_func);
6548 }
6549 }
6550
6551 return 0;
6552 }
6553
6554 static int
load_append(UnpicklerObject * self)6555 load_append(UnpicklerObject *self)
6556 {
6557 if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6558 return Pdata_stack_underflow(self->stack);
6559 return do_append(self, Py_SIZE(self->stack) - 1);
6560 }
6561
6562 static int
load_appends(UnpicklerObject * self)6563 load_appends(UnpicklerObject *self)
6564 {
6565 Py_ssize_t i = marker(self);
6566 if (i < 0)
6567 return -1;
6568 return do_append(self, i);
6569 }
6570
6571 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6572 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6573 {
6574 PyObject *value, *key;
6575 PyObject *dict;
6576 Py_ssize_t len, i;
6577 int status = 0;
6578
6579 len = Py_SIZE(self->stack);
6580 if (x > len || x <= self->stack->fence)
6581 return Pdata_stack_underflow(self->stack);
6582 if (len == x) /* nothing to do */
6583 return 0;
6584 if ((len - x) % 2 != 0) {
6585 PickleState *st = _Pickle_GetGlobalState();
6586 /* Currupt or hostile pickle -- we never write one like this. */
6587 PyErr_SetString(st->UnpicklingError,
6588 "odd number of items for SETITEMS");
6589 return -1;
6590 }
6591
6592 /* Here, dict does not actually need to be a PyDict; it could be anything
6593 that supports the __setitem__ attribute. */
6594 dict = self->stack->data[x - 1];
6595
6596 for (i = x + 1; i < len; i += 2) {
6597 key = self->stack->data[i - 1];
6598 value = self->stack->data[i];
6599 if (PyObject_SetItem(dict, key, value) < 0) {
6600 status = -1;
6601 break;
6602 }
6603 }
6604
6605 Pdata_clear(self->stack, x);
6606 return status;
6607 }
6608
6609 static int
load_setitem(UnpicklerObject * self)6610 load_setitem(UnpicklerObject *self)
6611 {
6612 return do_setitems(self, Py_SIZE(self->stack) - 2);
6613 }
6614
6615 static int
load_setitems(UnpicklerObject * self)6616 load_setitems(UnpicklerObject *self)
6617 {
6618 Py_ssize_t i = marker(self);
6619 if (i < 0)
6620 return -1;
6621 return do_setitems(self, i);
6622 }
6623
6624 static int
load_additems(UnpicklerObject * self)6625 load_additems(UnpicklerObject *self)
6626 {
6627 PyObject *set;
6628 Py_ssize_t mark, len, i;
6629
6630 mark = marker(self);
6631 if (mark < 0)
6632 return -1;
6633 len = Py_SIZE(self->stack);
6634 if (mark > len || mark <= self->stack->fence)
6635 return Pdata_stack_underflow(self->stack);
6636 if (len == mark) /* nothing to do */
6637 return 0;
6638
6639 set = self->stack->data[mark - 1];
6640
6641 if (PySet_Check(set)) {
6642 PyObject *items;
6643 int status;
6644
6645 items = Pdata_poptuple(self->stack, mark);
6646 if (items == NULL)
6647 return -1;
6648
6649 status = _PySet_Update(set, items);
6650 Py_DECREF(items);
6651 return status;
6652 }
6653 else {
6654 PyObject *add_func;
6655 _Py_IDENTIFIER(add);
6656
6657 add_func = _PyObject_GetAttrId(set, &PyId_add);
6658 if (add_func == NULL)
6659 return -1;
6660 for (i = mark; i < len; i++) {
6661 PyObject *result;
6662 PyObject *item;
6663
6664 item = self->stack->data[i];
6665 result = _Pickle_FastCall(add_func, item);
6666 if (result == NULL) {
6667 Pdata_clear(self->stack, i + 1);
6668 Py_SIZE(self->stack) = mark;
6669 return -1;
6670 }
6671 Py_DECREF(result);
6672 }
6673 Py_SIZE(self->stack) = mark;
6674 }
6675
6676 return 0;
6677 }
6678
6679 static int
load_build(UnpicklerObject * self)6680 load_build(UnpicklerObject *self)
6681 {
6682 PyObject *state, *inst, *slotstate;
6683 PyObject *setstate;
6684 int status = 0;
6685 _Py_IDENTIFIER(__setstate__);
6686
6687 /* Stack is ... instance, state. We want to leave instance at
6688 * the stack top, possibly mutated via instance.__setstate__(state).
6689 */
6690 if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6691 return Pdata_stack_underflow(self->stack);
6692
6693 PDATA_POP(self->stack, state);
6694 if (state == NULL)
6695 return -1;
6696
6697 inst = self->stack->data[Py_SIZE(self->stack) - 1];
6698
6699 if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6700 Py_DECREF(state);
6701 return -1;
6702 }
6703 if (setstate != NULL) {
6704 PyObject *result;
6705
6706 /* The explicit __setstate__ is responsible for everything. */
6707 result = _Pickle_FastCall(setstate, state);
6708 Py_DECREF(setstate);
6709 if (result == NULL)
6710 return -1;
6711 Py_DECREF(result);
6712 return 0;
6713 }
6714
6715 /* A default __setstate__. First see whether state embeds a
6716 * slot state dict too (a proto 2 addition).
6717 */
6718 if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6719 PyObject *tmp = state;
6720
6721 state = PyTuple_GET_ITEM(tmp, 0);
6722 slotstate = PyTuple_GET_ITEM(tmp, 1);
6723 Py_INCREF(state);
6724 Py_INCREF(slotstate);
6725 Py_DECREF(tmp);
6726 }
6727 else
6728 slotstate = NULL;
6729
6730 /* Set inst.__dict__ from the state dict (if any). */
6731 if (state != Py_None) {
6732 PyObject *dict;
6733 PyObject *d_key, *d_value;
6734 Py_ssize_t i;
6735 _Py_IDENTIFIER(__dict__);
6736
6737 if (!PyDict_Check(state)) {
6738 PickleState *st = _Pickle_GetGlobalState();
6739 PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6740 goto error;
6741 }
6742 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
6743 if (dict == NULL)
6744 goto error;
6745
6746 i = 0;
6747 while (PyDict_Next(state, &i, &d_key, &d_value)) {
6748 /* normally the keys for instance attributes are
6749 interned. we should try to do that here. */
6750 Py_INCREF(d_key);
6751 if (PyUnicode_CheckExact(d_key))
6752 PyUnicode_InternInPlace(&d_key);
6753 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6754 Py_DECREF(d_key);
6755 goto error;
6756 }
6757 Py_DECREF(d_key);
6758 }
6759 Py_DECREF(dict);
6760 }
6761
6762 /* Also set instance attributes from the slotstate dict (if any). */
6763 if (slotstate != NULL) {
6764 PyObject *d_key, *d_value;
6765 Py_ssize_t i;
6766
6767 if (!PyDict_Check(slotstate)) {
6768 PickleState *st = _Pickle_GetGlobalState();
6769 PyErr_SetString(st->UnpicklingError,
6770 "slot state is not a dictionary");
6771 goto error;
6772 }
6773 i = 0;
6774 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6775 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6776 goto error;
6777 }
6778 }
6779
6780 if (0) {
6781 error:
6782 status = -1;
6783 }
6784
6785 Py_DECREF(state);
6786 Py_XDECREF(slotstate);
6787 return status;
6788 }
6789
6790 static int
load_mark(UnpicklerObject * self)6791 load_mark(UnpicklerObject *self)
6792 {
6793
6794 /* Note that we split the (pickle.py) stack into two stacks, an
6795 * object stack and a mark stack. Here we push a mark onto the
6796 * mark stack.
6797 */
6798
6799 if (self->num_marks >= self->marks_size) {
6800 size_t alloc = ((size_t)self->num_marks << 1) + 20;
6801 Py_ssize_t *marks_new = self->marks;
6802 PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6803 if (marks_new == NULL) {
6804 PyErr_NoMemory();
6805 return -1;
6806 }
6807 self->marks = marks_new;
6808 self->marks_size = (Py_ssize_t)alloc;
6809 }
6810
6811 self->stack->mark_set = 1;
6812 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6813
6814 return 0;
6815 }
6816
6817 static int
load_reduce(UnpicklerObject * self)6818 load_reduce(UnpicklerObject *self)
6819 {
6820 PyObject *callable = NULL;
6821 PyObject *argtup = NULL;
6822 PyObject *obj = NULL;
6823
6824 PDATA_POP(self->stack, argtup);
6825 if (argtup == NULL)
6826 return -1;
6827 PDATA_POP(self->stack, callable);
6828 if (callable) {
6829 obj = PyObject_CallObject(callable, argtup);
6830 Py_DECREF(callable);
6831 }
6832 Py_DECREF(argtup);
6833
6834 if (obj == NULL)
6835 return -1;
6836
6837 PDATA_PUSH(self->stack, obj, -1);
6838 return 0;
6839 }
6840
6841 /* Just raises an error if we don't know the protocol specified. PROTO
6842 * is the first opcode for protocols >= 2.
6843 */
6844 static int
load_proto(UnpicklerObject * self)6845 load_proto(UnpicklerObject *self)
6846 {
6847 char *s;
6848 int i;
6849
6850 if (_Unpickler_Read(self, &s, 1) < 0)
6851 return -1;
6852
6853 i = (unsigned char)s[0];
6854 if (i <= HIGHEST_PROTOCOL) {
6855 self->proto = i;
6856 return 0;
6857 }
6858
6859 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6860 return -1;
6861 }
6862
6863 static int
load_frame(UnpicklerObject * self)6864 load_frame(UnpicklerObject *self)
6865 {
6866 char *s;
6867 Py_ssize_t frame_len;
6868
6869 if (_Unpickler_Read(self, &s, 8) < 0)
6870 return -1;
6871
6872 frame_len = calc_binsize(s, 8);
6873 if (frame_len < 0) {
6874 PyErr_Format(PyExc_OverflowError,
6875 "FRAME length exceeds system's maximum of %zd bytes",
6876 PY_SSIZE_T_MAX);
6877 return -1;
6878 }
6879
6880 if (_Unpickler_Read(self, &s, frame_len) < 0)
6881 return -1;
6882
6883 /* Rewind to start of frame */
6884 self->next_read_idx -= frame_len;
6885 return 0;
6886 }
6887
6888 static PyObject *
load(UnpicklerObject * self)6889 load(UnpicklerObject *self)
6890 {
6891 PyObject *value = NULL;
6892 char *s = NULL;
6893
6894 self->num_marks = 0;
6895 self->stack->mark_set = 0;
6896 self->stack->fence = 0;
6897 self->proto = 0;
6898 if (Py_SIZE(self->stack))
6899 Pdata_clear(self->stack, 0);
6900
6901 /* Convenient macros for the dispatch while-switch loop just below. */
6902 #define OP(opcode, load_func) \
6903 case opcode: if (load_func(self) < 0) break; continue;
6904
6905 #define OP_ARG(opcode, load_func, arg) \
6906 case opcode: if (load_func(self, (arg)) < 0) break; continue;
6907
6908 while (1) {
6909 if (_Unpickler_Read(self, &s, 1) < 0) {
6910 PickleState *st = _Pickle_GetGlobalState();
6911 if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6912 PyErr_Format(PyExc_EOFError, "Ran out of input");
6913 }
6914 return NULL;
6915 }
6916
6917 switch ((enum opcode)s[0]) {
6918 OP(NONE, load_none)
6919 OP(BININT, load_binint)
6920 OP(BININT1, load_binint1)
6921 OP(BININT2, load_binint2)
6922 OP(INT, load_int)
6923 OP(LONG, load_long)
6924 OP_ARG(LONG1, load_counted_long, 1)
6925 OP_ARG(LONG4, load_counted_long, 4)
6926 OP(FLOAT, load_float)
6927 OP(BINFLOAT, load_binfloat)
6928 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6929 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6930 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6931 OP(BYTEARRAY8, load_counted_bytearray)
6932 OP(NEXT_BUFFER, load_next_buffer)
6933 OP(READONLY_BUFFER, load_readonly_buffer)
6934 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6935 OP_ARG(BINSTRING, load_counted_binstring, 4)
6936 OP(STRING, load_string)
6937 OP(UNICODE, load_unicode)
6938 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6939 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6940 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6941 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6942 OP_ARG(TUPLE1, load_counted_tuple, 1)
6943 OP_ARG(TUPLE2, load_counted_tuple, 2)
6944 OP_ARG(TUPLE3, load_counted_tuple, 3)
6945 OP(TUPLE, load_tuple)
6946 OP(EMPTY_LIST, load_empty_list)
6947 OP(LIST, load_list)
6948 OP(EMPTY_DICT, load_empty_dict)
6949 OP(DICT, load_dict)
6950 OP(EMPTY_SET, load_empty_set)
6951 OP(ADDITEMS, load_additems)
6952 OP(FROZENSET, load_frozenset)
6953 OP(OBJ, load_obj)
6954 OP(INST, load_inst)
6955 OP(NEWOBJ, load_newobj)
6956 OP(NEWOBJ_EX, load_newobj_ex)
6957 OP(GLOBAL, load_global)
6958 OP(STACK_GLOBAL, load_stack_global)
6959 OP(APPEND, load_append)
6960 OP(APPENDS, load_appends)
6961 OP(BUILD, load_build)
6962 OP(DUP, load_dup)
6963 OP(BINGET, load_binget)
6964 OP(LONG_BINGET, load_long_binget)
6965 OP(GET, load_get)
6966 OP(MARK, load_mark)
6967 OP(BINPUT, load_binput)
6968 OP(LONG_BINPUT, load_long_binput)
6969 OP(PUT, load_put)
6970 OP(MEMOIZE, load_memoize)
6971 OP(POP, load_pop)
6972 OP(POP_MARK, load_pop_mark)
6973 OP(SETITEM, load_setitem)
6974 OP(SETITEMS, load_setitems)
6975 OP(PERSID, load_persid)
6976 OP(BINPERSID, load_binpersid)
6977 OP(REDUCE, load_reduce)
6978 OP(PROTO, load_proto)
6979 OP(FRAME, load_frame)
6980 OP_ARG(EXT1, load_extension, 1)
6981 OP_ARG(EXT2, load_extension, 2)
6982 OP_ARG(EXT4, load_extension, 4)
6983 OP_ARG(NEWTRUE, load_bool, Py_True)
6984 OP_ARG(NEWFALSE, load_bool, Py_False)
6985
6986 case STOP:
6987 break;
6988
6989 default:
6990 {
6991 PickleState *st = _Pickle_GetGlobalState();
6992 unsigned char c = (unsigned char) *s;
6993 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6994 PyErr_Format(st->UnpicklingError,
6995 "invalid load key, '%c'.", c);
6996 }
6997 else {
6998 PyErr_Format(st->UnpicklingError,
6999 "invalid load key, '\\x%02x'.", c);
7000 }
7001 return NULL;
7002 }
7003 }
7004
7005 break; /* and we are done! */
7006 }
7007
7008 if (PyErr_Occurred()) {
7009 return NULL;
7010 }
7011
7012 if (_Unpickler_SkipConsumed(self) < 0)
7013 return NULL;
7014
7015 PDATA_POP(self->stack, value);
7016 return value;
7017 }
7018
7019 /*[clinic input]
7020
7021 _pickle.Unpickler.load
7022
7023 Load a pickle.
7024
7025 Read a pickled object representation from the open file object given
7026 in the constructor, and return the reconstituted object hierarchy
7027 specified therein.
7028 [clinic start generated code]*/
7029
7030 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)7031 _pickle_Unpickler_load_impl(UnpicklerObject *self)
7032 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
7033 {
7034 UnpicklerObject *unpickler = (UnpicklerObject*)self;
7035
7036 /* Check whether the Unpickler was initialized correctly. This prevents
7037 segfaulting if a subclass overridden __init__ with a function that does
7038 not call Unpickler.__init__(). Here, we simply ensure that self->read
7039 is not NULL. */
7040 if (unpickler->read == NULL) {
7041 PickleState *st = _Pickle_GetGlobalState();
7042 PyErr_Format(st->UnpicklingError,
7043 "Unpickler.__init__() was not called by %s.__init__()",
7044 Py_TYPE(unpickler)->tp_name);
7045 return NULL;
7046 }
7047
7048 return load(unpickler);
7049 }
7050
7051 /* The name of find_class() is misleading. In newer pickle protocols, this
7052 function is used for loading any global (i.e., functions), not just
7053 classes. The name is kept only for backward compatibility. */
7054
7055 /*[clinic input]
7056
7057 _pickle.Unpickler.find_class
7058
7059 module_name: object
7060 global_name: object
7061 /
7062
7063 Return an object from a specified module.
7064
7065 If necessary, the module will be imported. Subclasses may override
7066 this method (e.g. to restrict unpickling of arbitrary classes and
7067 functions).
7068
7069 This method is called whenever a class or a function object is
7070 needed. Both arguments passed are str objects.
7071 [clinic start generated code]*/
7072
7073 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)7074 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7075 PyObject *module_name,
7076 PyObject *global_name)
7077 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
7078 {
7079 PyObject *global;
7080 PyObject *module;
7081
7082 if (PySys_Audit("pickle.find_class", "OO",
7083 module_name, global_name) < 0) {
7084 return NULL;
7085 }
7086
7087 /* Try to map the old names used in Python 2.x to the new ones used in
7088 Python 3.x. We do this only with old pickle protocols and when the
7089 user has not disabled the feature. */
7090 if (self->proto < 3 && self->fix_imports) {
7091 PyObject *key;
7092 PyObject *item;
7093 PickleState *st = _Pickle_GetGlobalState();
7094
7095 /* Check if the global (i.e., a function or a class) was renamed
7096 or moved to another module. */
7097 key = PyTuple_Pack(2, module_name, global_name);
7098 if (key == NULL)
7099 return NULL;
7100 item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
7101 Py_DECREF(key);
7102 if (item) {
7103 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7104 PyErr_Format(PyExc_RuntimeError,
7105 "_compat_pickle.NAME_MAPPING values should be "
7106 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7107 return NULL;
7108 }
7109 module_name = PyTuple_GET_ITEM(item, 0);
7110 global_name = PyTuple_GET_ITEM(item, 1);
7111 if (!PyUnicode_Check(module_name) ||
7112 !PyUnicode_Check(global_name)) {
7113 PyErr_Format(PyExc_RuntimeError,
7114 "_compat_pickle.NAME_MAPPING values should be "
7115 "pairs of str, not (%.200s, %.200s)",
7116 Py_TYPE(module_name)->tp_name,
7117 Py_TYPE(global_name)->tp_name);
7118 return NULL;
7119 }
7120 }
7121 else if (PyErr_Occurred()) {
7122 return NULL;
7123 }
7124 else {
7125 /* Check if the module was renamed. */
7126 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7127 if (item) {
7128 if (!PyUnicode_Check(item)) {
7129 PyErr_Format(PyExc_RuntimeError,
7130 "_compat_pickle.IMPORT_MAPPING values should be "
7131 "strings, not %.200s", Py_TYPE(item)->tp_name);
7132 return NULL;
7133 }
7134 module_name = item;
7135 }
7136 else if (PyErr_Occurred()) {
7137 return NULL;
7138 }
7139 }
7140 }
7141
7142 /*
7143 * we don't use PyImport_GetModule here, because it can return partially-
7144 * initialised modules, which then cause the getattribute to fail.
7145 */
7146 module = PyImport_Import(module_name);
7147 if (module == NULL) {
7148 return NULL;
7149 }
7150 global = getattribute(module, global_name, self->proto >= 4);
7151 Py_DECREF(module);
7152 return global;
7153 }
7154
7155 /*[clinic input]
7156
7157 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
7158
7159 Returns size in memory, in bytes.
7160 [clinic start generated code]*/
7161
7162 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)7163 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7164 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7165 {
7166 Py_ssize_t res;
7167
7168 res = _PyObject_SIZE(Py_TYPE(self));
7169 if (self->memo != NULL)
7170 res += self->memo_size * sizeof(PyObject *);
7171 if (self->marks != NULL)
7172 res += self->marks_size * sizeof(Py_ssize_t);
7173 if (self->input_line != NULL)
7174 res += strlen(self->input_line) + 1;
7175 if (self->encoding != NULL)
7176 res += strlen(self->encoding) + 1;
7177 if (self->errors != NULL)
7178 res += strlen(self->errors) + 1;
7179 return res;
7180 }
7181
7182 static struct PyMethodDef Unpickler_methods[] = {
7183 _PICKLE_UNPICKLER_LOAD_METHODDEF
7184 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
7185 _PICKLE_UNPICKLER___SIZEOF___METHODDEF
7186 {NULL, NULL} /* sentinel */
7187 };
7188
7189 static void
Unpickler_dealloc(UnpicklerObject * self)7190 Unpickler_dealloc(UnpicklerObject *self)
7191 {
7192 PyObject_GC_UnTrack((PyObject *)self);
7193 Py_XDECREF(self->readline);
7194 Py_XDECREF(self->readinto);
7195 Py_XDECREF(self->read);
7196 Py_XDECREF(self->peek);
7197 Py_XDECREF(self->stack);
7198 Py_XDECREF(self->pers_func);
7199 Py_XDECREF(self->buffers);
7200 if (self->buffer.buf != NULL) {
7201 PyBuffer_Release(&self->buffer);
7202 self->buffer.buf = NULL;
7203 }
7204
7205 _Unpickler_MemoCleanup(self);
7206 PyMem_Free(self->marks);
7207 PyMem_Free(self->input_line);
7208 PyMem_Free(self->encoding);
7209 PyMem_Free(self->errors);
7210
7211 Py_TYPE(self)->tp_free((PyObject *)self);
7212 }
7213
7214 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)7215 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7216 {
7217 Py_VISIT(self->readline);
7218 Py_VISIT(self->readinto);
7219 Py_VISIT(self->read);
7220 Py_VISIT(self->peek);
7221 Py_VISIT(self->stack);
7222 Py_VISIT(self->pers_func);
7223 Py_VISIT(self->buffers);
7224 return 0;
7225 }
7226
7227 static int
Unpickler_clear(UnpicklerObject * self)7228 Unpickler_clear(UnpicklerObject *self)
7229 {
7230 Py_CLEAR(self->readline);
7231 Py_CLEAR(self->readinto);
7232 Py_CLEAR(self->read);
7233 Py_CLEAR(self->peek);
7234 Py_CLEAR(self->stack);
7235 Py_CLEAR(self->pers_func);
7236 Py_CLEAR(self->buffers);
7237 if (self->buffer.buf != NULL) {
7238 PyBuffer_Release(&self->buffer);
7239 self->buffer.buf = NULL;
7240 }
7241
7242 _Unpickler_MemoCleanup(self);
7243 PyMem_Free(self->marks);
7244 self->marks = NULL;
7245 PyMem_Free(self->input_line);
7246 self->input_line = NULL;
7247 PyMem_Free(self->encoding);
7248 self->encoding = NULL;
7249 PyMem_Free(self->errors);
7250 self->errors = NULL;
7251
7252 return 0;
7253 }
7254
7255 /*[clinic input]
7256
7257 _pickle.Unpickler.__init__
7258
7259 file: object
7260 *
7261 fix_imports: bool = True
7262 encoding: str = 'ASCII'
7263 errors: str = 'strict'
7264 buffers: object(c_default="NULL") = ()
7265
7266 This takes a binary file for reading a pickle data stream.
7267
7268 The protocol version of the pickle is detected automatically, so no
7269 protocol argument is needed. Bytes past the pickled object's
7270 representation are ignored.
7271
7272 The argument *file* must have two methods, a read() method that takes
7273 an integer argument, and a readline() method that requires no
7274 arguments. Both methods should return bytes. Thus *file* can be a
7275 binary file object opened for reading, an io.BytesIO object, or any
7276 other custom object that meets this interface.
7277
7278 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7279 which are used to control compatibility support for pickle stream
7280 generated by Python 2. If *fix_imports* is True, pickle will try to
7281 map the old Python 2 names to the new names used in Python 3. The
7282 *encoding* and *errors* tell pickle how to decode 8-bit string
7283 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7284 respectively. The *encoding* can be 'bytes' to read these 8-bit
7285 string instances as bytes objects.
7286 [clinic start generated code]*/
7287
7288 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7289 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7290 int fix_imports, const char *encoding,
7291 const char *errors, PyObject *buffers)
7292 /*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
7293 {
7294 _Py_IDENTIFIER(persistent_load);
7295
7296 /* In case of multiple __init__() calls, clear previous content. */
7297 if (self->read != NULL)
7298 (void)Unpickler_clear(self);
7299
7300 if (_Unpickler_SetInputStream(self, file) < 0)
7301 return -1;
7302
7303 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
7304 return -1;
7305
7306 if (_Unpickler_SetBuffers(self, buffers) < 0)
7307 return -1;
7308
7309 self->fix_imports = fix_imports;
7310
7311 if (init_method_ref((PyObject *)self, &PyId_persistent_load,
7312 &self->pers_func, &self->pers_func_self) < 0)
7313 {
7314 return -1;
7315 }
7316
7317 self->stack = (Pdata *)Pdata_New();
7318 if (self->stack == NULL)
7319 return -1;
7320
7321 self->memo_size = 32;
7322 self->memo = _Unpickler_NewMemo(self->memo_size);
7323 if (self->memo == NULL)
7324 return -1;
7325
7326 self->proto = 0;
7327
7328 return 0;
7329 }
7330
7331
7332 /* Define a proxy object for the Unpickler's internal memo object. This is to
7333 * avoid breaking code like:
7334 * unpickler.memo.clear()
7335 * and
7336 * unpickler.memo = saved_memo
7337 * Is this a good idea? Not really, but we don't want to break code that uses
7338 * it. Note that we don't implement the entire mapping API here. This is
7339 * intentional, as these should be treated as black-box implementation details.
7340 *
7341 * We do, however, have to implement pickling/unpickling support because of
7342 * real-world code like cvs2svn.
7343 */
7344
7345 /*[clinic input]
7346 _pickle.UnpicklerMemoProxy.clear
7347
7348 Remove all items from memo.
7349 [clinic start generated code]*/
7350
7351 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)7352 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
7353 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
7354 {
7355 _Unpickler_MemoCleanup(self->unpickler);
7356 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7357 if (self->unpickler->memo == NULL)
7358 return NULL;
7359 Py_RETURN_NONE;
7360 }
7361
7362 /*[clinic input]
7363 _pickle.UnpicklerMemoProxy.copy
7364
7365 Copy the memo to a new object.
7366 [clinic start generated code]*/
7367
7368 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)7369 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
7370 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
7371 {
7372 size_t i;
7373 PyObject *new_memo = PyDict_New();
7374 if (new_memo == NULL)
7375 return NULL;
7376
7377 for (i = 0; i < self->unpickler->memo_size; i++) {
7378 int status;
7379 PyObject *key, *value;
7380
7381 value = self->unpickler->memo[i];
7382 if (value == NULL)
7383 continue;
7384
7385 key = PyLong_FromSsize_t(i);
7386 if (key == NULL)
7387 goto error;
7388 status = PyDict_SetItem(new_memo, key, value);
7389 Py_DECREF(key);
7390 if (status < 0)
7391 goto error;
7392 }
7393 return new_memo;
7394
7395 error:
7396 Py_DECREF(new_memo);
7397 return NULL;
7398 }
7399
7400 /*[clinic input]
7401 _pickle.UnpicklerMemoProxy.__reduce__
7402
7403 Implement pickling support.
7404 [clinic start generated code]*/
7405
7406 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)7407 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
7408 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
7409 {
7410 PyObject *reduce_value;
7411 PyObject *constructor_args;
7412 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
7413 if (contents == NULL)
7414 return NULL;
7415
7416 reduce_value = PyTuple_New(2);
7417 if (reduce_value == NULL) {
7418 Py_DECREF(contents);
7419 return NULL;
7420 }
7421 constructor_args = PyTuple_New(1);
7422 if (constructor_args == NULL) {
7423 Py_DECREF(contents);
7424 Py_DECREF(reduce_value);
7425 return NULL;
7426 }
7427 PyTuple_SET_ITEM(constructor_args, 0, contents);
7428 Py_INCREF((PyObject *)&PyDict_Type);
7429 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7430 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7431 return reduce_value;
7432 }
7433
7434 static PyMethodDef unpicklerproxy_methods[] = {
7435 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7436 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7437 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
7438 {NULL, NULL} /* sentinel */
7439 };
7440
7441 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)7442 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7443 {
7444 PyObject_GC_UnTrack(self);
7445 Py_XDECREF(self->unpickler);
7446 PyObject_GC_Del((PyObject *)self);
7447 }
7448
7449 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)7450 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7451 visitproc visit, void *arg)
7452 {
7453 Py_VISIT(self->unpickler);
7454 return 0;
7455 }
7456
7457 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)7458 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7459 {
7460 Py_CLEAR(self->unpickler);
7461 return 0;
7462 }
7463
7464 static PyTypeObject UnpicklerMemoProxyType = {
7465 PyVarObject_HEAD_INIT(NULL, 0)
7466 "_pickle.UnpicklerMemoProxy", /*tp_name*/
7467 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
7468 0,
7469 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
7470 0, /* tp_vectorcall_offset */
7471 0, /* tp_getattr */
7472 0, /* tp_setattr */
7473 0, /* tp_as_async */
7474 0, /* tp_repr */
7475 0, /* tp_as_number */
7476 0, /* tp_as_sequence */
7477 0, /* tp_as_mapping */
7478 PyObject_HashNotImplemented, /* tp_hash */
7479 0, /* tp_call */
7480 0, /* tp_str */
7481 PyObject_GenericGetAttr, /* tp_getattro */
7482 PyObject_GenericSetAttr, /* tp_setattro */
7483 0, /* tp_as_buffer */
7484 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7485 0, /* tp_doc */
7486 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
7487 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
7488 0, /* tp_richcompare */
7489 0, /* tp_weaklistoffset */
7490 0, /* tp_iter */
7491 0, /* tp_iternext */
7492 unpicklerproxy_methods, /* tp_methods */
7493 };
7494
7495 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7496 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7497 {
7498 UnpicklerMemoProxyObject *self;
7499
7500 self = PyObject_GC_New(UnpicklerMemoProxyObject,
7501 &UnpicklerMemoProxyType);
7502 if (self == NULL)
7503 return NULL;
7504 Py_INCREF(unpickler);
7505 self->unpickler = unpickler;
7506 PyObject_GC_Track(self);
7507 return (PyObject *)self;
7508 }
7509
7510 /*****************************************************************************/
7511
7512
7513 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7514 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7515 {
7516 return UnpicklerMemoProxy_New(self);
7517 }
7518
7519 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7520 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7521 {
7522 PyObject **new_memo;
7523 size_t new_memo_size = 0;
7524
7525 if (obj == NULL) {
7526 PyErr_SetString(PyExc_TypeError,
7527 "attribute deletion is not supported");
7528 return -1;
7529 }
7530
7531 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
7532 UnpicklerObject *unpickler =
7533 ((UnpicklerMemoProxyObject *)obj)->unpickler;
7534
7535 new_memo_size = unpickler->memo_size;
7536 new_memo = _Unpickler_NewMemo(new_memo_size);
7537 if (new_memo == NULL)
7538 return -1;
7539
7540 for (size_t i = 0; i < new_memo_size; i++) {
7541 Py_XINCREF(unpickler->memo[i]);
7542 new_memo[i] = unpickler->memo[i];
7543 }
7544 }
7545 else if (PyDict_Check(obj)) {
7546 Py_ssize_t i = 0;
7547 PyObject *key, *value;
7548
7549 new_memo_size = PyDict_GET_SIZE(obj);
7550 new_memo = _Unpickler_NewMemo(new_memo_size);
7551 if (new_memo == NULL)
7552 return -1;
7553
7554 while (PyDict_Next(obj, &i, &key, &value)) {
7555 Py_ssize_t idx;
7556 if (!PyLong_Check(key)) {
7557 PyErr_SetString(PyExc_TypeError,
7558 "memo key must be integers");
7559 goto error;
7560 }
7561 idx = PyLong_AsSsize_t(key);
7562 if (idx == -1 && PyErr_Occurred())
7563 goto error;
7564 if (idx < 0) {
7565 PyErr_SetString(PyExc_ValueError,
7566 "memo key must be positive integers.");
7567 goto error;
7568 }
7569 if (_Unpickler_MemoPut(self, idx, value) < 0)
7570 goto error;
7571 }
7572 }
7573 else {
7574 PyErr_Format(PyExc_TypeError,
7575 "'memo' attribute must be an UnpicklerMemoProxy object "
7576 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7577 return -1;
7578 }
7579
7580 _Unpickler_MemoCleanup(self);
7581 self->memo_size = new_memo_size;
7582 self->memo = new_memo;
7583
7584 return 0;
7585
7586 error:
7587 if (new_memo_size) {
7588 for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7589 Py_XDECREF(new_memo[i]);
7590 }
7591 PyMem_FREE(new_memo);
7592 }
7593 return -1;
7594 }
7595
7596 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7597 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7598 {
7599 if (self->pers_func == NULL) {
7600 PyErr_SetString(PyExc_AttributeError, "persistent_load");
7601 return NULL;
7602 }
7603 return reconstruct_method(self->pers_func, self->pers_func_self);
7604 }
7605
7606 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7607 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7608 {
7609 if (value == NULL) {
7610 PyErr_SetString(PyExc_TypeError,
7611 "attribute deletion is not supported");
7612 return -1;
7613 }
7614 if (!PyCallable_Check(value)) {
7615 PyErr_SetString(PyExc_TypeError,
7616 "persistent_load must be a callable taking "
7617 "one argument");
7618 return -1;
7619 }
7620
7621 self->pers_func_self = NULL;
7622 Py_INCREF(value);
7623 Py_XSETREF(self->pers_func, value);
7624
7625 return 0;
7626 }
7627
7628 static PyGetSetDef Unpickler_getsets[] = {
7629 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7630 {"persistent_load", (getter)Unpickler_get_persload,
7631 (setter)Unpickler_set_persload},
7632 {NULL}
7633 };
7634
7635 static PyTypeObject Unpickler_Type = {
7636 PyVarObject_HEAD_INIT(NULL, 0)
7637 "_pickle.Unpickler", /*tp_name*/
7638 sizeof(UnpicklerObject), /*tp_basicsize*/
7639 0, /*tp_itemsize*/
7640 (destructor)Unpickler_dealloc, /*tp_dealloc*/
7641 0, /*tp_vectorcall_offset*/
7642 0, /*tp_getattr*/
7643 0, /*tp_setattr*/
7644 0, /*tp_as_async*/
7645 0, /*tp_repr*/
7646 0, /*tp_as_number*/
7647 0, /*tp_as_sequence*/
7648 0, /*tp_as_mapping*/
7649 0, /*tp_hash*/
7650 0, /*tp_call*/
7651 0, /*tp_str*/
7652 0, /*tp_getattro*/
7653 0, /*tp_setattro*/
7654 0, /*tp_as_buffer*/
7655 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7656 _pickle_Unpickler___init____doc__, /*tp_doc*/
7657 (traverseproc)Unpickler_traverse, /*tp_traverse*/
7658 (inquiry)Unpickler_clear, /*tp_clear*/
7659 0, /*tp_richcompare*/
7660 0, /*tp_weaklistoffset*/
7661 0, /*tp_iter*/
7662 0, /*tp_iternext*/
7663 Unpickler_methods, /*tp_methods*/
7664 0, /*tp_members*/
7665 Unpickler_getsets, /*tp_getset*/
7666 0, /*tp_base*/
7667 0, /*tp_dict*/
7668 0, /*tp_descr_get*/
7669 0, /*tp_descr_set*/
7670 0, /*tp_dictoffset*/
7671 _pickle_Unpickler___init__, /*tp_init*/
7672 PyType_GenericAlloc, /*tp_alloc*/
7673 PyType_GenericNew, /*tp_new*/
7674 PyObject_GC_Del, /*tp_free*/
7675 0, /*tp_is_gc*/
7676 };
7677
7678 /*[clinic input]
7679
7680 _pickle.dump
7681
7682 obj: object
7683 file: object
7684 protocol: object = None
7685 *
7686 fix_imports: bool = True
7687 buffer_callback: object = None
7688
7689 Write a pickled representation of obj to the open file object file.
7690
7691 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7692 be more efficient.
7693
7694 The optional *protocol* argument tells the pickler to use the given
7695 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7696 protocol is 4. It was introduced in Python 3.4, and is incompatible
7697 with previous versions.
7698
7699 Specifying a negative protocol version selects the highest protocol
7700 version supported. The higher the protocol used, the more recent the
7701 version of Python needed to read the pickle produced.
7702
7703 The *file* argument must have a write() method that accepts a single
7704 bytes argument. It can thus be a file object opened for binary
7705 writing, an io.BytesIO instance, or any other custom object that meets
7706 this interface.
7707
7708 If *fix_imports* is True and protocol is less than 3, pickle will try
7709 to map the new Python 3 names to the old module names used in Python
7710 2, so that the pickle data stream is readable with Python 2.
7711
7712 If *buffer_callback* is None (the default), buffer views are serialized
7713 into *file* as part of the pickle stream. It is an error if
7714 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7715
7716 [clinic start generated code]*/
7717
7718 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7719 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7720 PyObject *protocol, int fix_imports,
7721 PyObject *buffer_callback)
7722 /*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
7723 {
7724 PicklerObject *pickler = _Pickler_New();
7725
7726 if (pickler == NULL)
7727 return NULL;
7728
7729 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7730 goto error;
7731
7732 if (_Pickler_SetOutputStream(pickler, file) < 0)
7733 goto error;
7734
7735 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7736 goto error;
7737
7738 if (dump(pickler, obj) < 0)
7739 goto error;
7740
7741 if (_Pickler_FlushToFile(pickler) < 0)
7742 goto error;
7743
7744 Py_DECREF(pickler);
7745 Py_RETURN_NONE;
7746
7747 error:
7748 Py_XDECREF(pickler);
7749 return NULL;
7750 }
7751
7752 /*[clinic input]
7753
7754 _pickle.dumps
7755
7756 obj: object
7757 protocol: object = None
7758 *
7759 fix_imports: bool = True
7760 buffer_callback: object = None
7761
7762 Return the pickled representation of the object as a bytes object.
7763
7764 The optional *protocol* argument tells the pickler to use the given
7765 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7766 protocol is 4. It was introduced in Python 3.4, and is incompatible
7767 with previous versions.
7768
7769 Specifying a negative protocol version selects the highest protocol
7770 version supported. The higher the protocol used, the more recent the
7771 version of Python needed to read the pickle produced.
7772
7773 If *fix_imports* is True and *protocol* is less than 3, pickle will
7774 try to map the new Python 3 names to the old module names used in
7775 Python 2, so that the pickle data stream is readable with Python 2.
7776
7777 If *buffer_callback* is None (the default), buffer views are serialized
7778 into *file* as part of the pickle stream. It is an error if
7779 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7780
7781 [clinic start generated code]*/
7782
7783 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7784 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7785 int fix_imports, PyObject *buffer_callback)
7786 /*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
7787 {
7788 PyObject *result;
7789 PicklerObject *pickler = _Pickler_New();
7790
7791 if (pickler == NULL)
7792 return NULL;
7793
7794 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7795 goto error;
7796
7797 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7798 goto error;
7799
7800 if (dump(pickler, obj) < 0)
7801 goto error;
7802
7803 result = _Pickler_GetString(pickler);
7804 Py_DECREF(pickler);
7805 return result;
7806
7807 error:
7808 Py_XDECREF(pickler);
7809 return NULL;
7810 }
7811
7812 /*[clinic input]
7813
7814 _pickle.load
7815
7816 file: object
7817 *
7818 fix_imports: bool = True
7819 encoding: str = 'ASCII'
7820 errors: str = 'strict'
7821 buffers: object(c_default="NULL") = ()
7822
7823 Read and return an object from the pickle data stored in a file.
7824
7825 This is equivalent to ``Unpickler(file).load()``, but may be more
7826 efficient.
7827
7828 The protocol version of the pickle is detected automatically, so no
7829 protocol argument is needed. Bytes past the pickled object's
7830 representation are ignored.
7831
7832 The argument *file* must have two methods, a read() method that takes
7833 an integer argument, and a readline() method that requires no
7834 arguments. Both methods should return bytes. Thus *file* can be a
7835 binary file object opened for reading, an io.BytesIO object, or any
7836 other custom object that meets this interface.
7837
7838 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7839 which are used to control compatibility support for pickle stream
7840 generated by Python 2. If *fix_imports* is True, pickle will try to
7841 map the old Python 2 names to the new names used in Python 3. The
7842 *encoding* and *errors* tell pickle how to decode 8-bit string
7843 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7844 respectively. The *encoding* can be 'bytes' to read these 8-bit
7845 string instances as bytes objects.
7846 [clinic start generated code]*/
7847
7848 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7849 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7850 const char *encoding, const char *errors,
7851 PyObject *buffers)
7852 /*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
7853 {
7854 PyObject *result;
7855 UnpicklerObject *unpickler = _Unpickler_New();
7856
7857 if (unpickler == NULL)
7858 return NULL;
7859
7860 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7861 goto error;
7862
7863 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7864 goto error;
7865
7866 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7867 goto error;
7868
7869 unpickler->fix_imports = fix_imports;
7870
7871 result = load(unpickler);
7872 Py_DECREF(unpickler);
7873 return result;
7874
7875 error:
7876 Py_XDECREF(unpickler);
7877 return NULL;
7878 }
7879
7880 /*[clinic input]
7881
7882 _pickle.loads
7883
7884 data: object
7885 *
7886 fix_imports: bool = True
7887 encoding: str = 'ASCII'
7888 errors: str = 'strict'
7889 buffers: object(c_default="NULL") = ()
7890
7891 Read and return an object from the given pickle data.
7892
7893 The protocol version of the pickle is detected automatically, so no
7894 protocol argument is needed. Bytes past the pickled object's
7895 representation are ignored.
7896
7897 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7898 which are used to control compatibility support for pickle stream
7899 generated by Python 2. If *fix_imports* is True, pickle will try to
7900 map the old Python 2 names to the new names used in Python 3. The
7901 *encoding* and *errors* tell pickle how to decode 8-bit string
7902 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7903 respectively. The *encoding* can be 'bytes' to read these 8-bit
7904 string instances as bytes objects.
7905 [clinic start generated code]*/
7906
7907 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7908 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7909 const char *encoding, const char *errors,
7910 PyObject *buffers)
7911 /*[clinic end generated code: output=82ac1e6b588e6d02 input=9c2ab6a0960185ea]*/
7912 {
7913 PyObject *result;
7914 UnpicklerObject *unpickler = _Unpickler_New();
7915
7916 if (unpickler == NULL)
7917 return NULL;
7918
7919 if (_Unpickler_SetStringInput(unpickler, data) < 0)
7920 goto error;
7921
7922 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7923 goto error;
7924
7925 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7926 goto error;
7927
7928 unpickler->fix_imports = fix_imports;
7929
7930 result = load(unpickler);
7931 Py_DECREF(unpickler);
7932 return result;
7933
7934 error:
7935 Py_XDECREF(unpickler);
7936 return NULL;
7937 }
7938
7939 static struct PyMethodDef pickle_methods[] = {
7940 _PICKLE_DUMP_METHODDEF
7941 _PICKLE_DUMPS_METHODDEF
7942 _PICKLE_LOAD_METHODDEF
7943 _PICKLE_LOADS_METHODDEF
7944 {NULL, NULL} /* sentinel */
7945 };
7946
7947 static int
pickle_clear(PyObject * m)7948 pickle_clear(PyObject *m)
7949 {
7950 _Pickle_ClearState(_Pickle_GetState(m));
7951 return 0;
7952 }
7953
7954 static void
pickle_free(PyObject * m)7955 pickle_free(PyObject *m)
7956 {
7957 _Pickle_ClearState(_Pickle_GetState(m));
7958 }
7959
7960 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7961 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7962 {
7963 PickleState *st = _Pickle_GetState(m);
7964 Py_VISIT(st->PickleError);
7965 Py_VISIT(st->PicklingError);
7966 Py_VISIT(st->UnpicklingError);
7967 Py_VISIT(st->dispatch_table);
7968 Py_VISIT(st->extension_registry);
7969 Py_VISIT(st->extension_cache);
7970 Py_VISIT(st->inverted_registry);
7971 Py_VISIT(st->name_mapping_2to3);
7972 Py_VISIT(st->import_mapping_2to3);
7973 Py_VISIT(st->name_mapping_3to2);
7974 Py_VISIT(st->import_mapping_3to2);
7975 Py_VISIT(st->codecs_encode);
7976 Py_VISIT(st->getattr);
7977 Py_VISIT(st->partial);
7978 return 0;
7979 }
7980
7981 static struct PyModuleDef _picklemodule = {
7982 PyModuleDef_HEAD_INIT,
7983 "_pickle", /* m_name */
7984 pickle_module_doc, /* m_doc */
7985 sizeof(PickleState), /* m_size */
7986 pickle_methods, /* m_methods */
7987 NULL, /* m_reload */
7988 pickle_traverse, /* m_traverse */
7989 pickle_clear, /* m_clear */
7990 (freefunc)pickle_free /* m_free */
7991 };
7992
7993 PyMODINIT_FUNC
PyInit__pickle(void)7994 PyInit__pickle(void)
7995 {
7996 PyObject *m;
7997 PickleState *st;
7998
7999 m = PyState_FindModule(&_picklemodule);
8000 if (m) {
8001 Py_INCREF(m);
8002 return m;
8003 }
8004
8005 if (PyType_Ready(&Unpickler_Type) < 0)
8006 return NULL;
8007 if (PyType_Ready(&Pickler_Type) < 0)
8008 return NULL;
8009 if (PyType_Ready(&Pdata_Type) < 0)
8010 return NULL;
8011 if (PyType_Ready(&PicklerMemoProxyType) < 0)
8012 return NULL;
8013 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
8014 return NULL;
8015
8016 /* Create the module and add the functions. */
8017 m = PyModule_Create(&_picklemodule);
8018 if (m == NULL)
8019 return NULL;
8020
8021 /* Add types */
8022 Py_INCREF(&Pickler_Type);
8023 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
8024 return NULL;
8025 Py_INCREF(&Unpickler_Type);
8026 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
8027 return NULL;
8028 Py_INCREF(&PyPickleBuffer_Type);
8029 if (PyModule_AddObject(m, "PickleBuffer",
8030 (PyObject *)&PyPickleBuffer_Type) < 0)
8031 return NULL;
8032
8033 st = _Pickle_GetState(m);
8034
8035 /* Initialize the exceptions. */
8036 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
8037 if (st->PickleError == NULL)
8038 return NULL;
8039 st->PicklingError = \
8040 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
8041 if (st->PicklingError == NULL)
8042 return NULL;
8043 st->UnpicklingError = \
8044 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
8045 if (st->UnpicklingError == NULL)
8046 return NULL;
8047
8048 Py_INCREF(st->PickleError);
8049 if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
8050 return NULL;
8051 Py_INCREF(st->PicklingError);
8052 if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
8053 return NULL;
8054 Py_INCREF(st->UnpicklingError);
8055 if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
8056 return NULL;
8057
8058 if (_Pickle_InitState(st) < 0)
8059 return NULL;
8060
8061 return m;
8062 }
8063