1
2 /* Write Python objects to files and read them back.
3 This is primarily intended for writing and reading compiled Python code,
4 even though dicts, lists, sets and frozensets, not commonly seen in
5 code objects, are supported.
6 Version 3 of this protocol properly supports circular links
7 and sharing. */
8
9 #define PY_SSIZE_T_CLEAN
10
11 #include "Python.h"
12 #include "pycore_call.h" // _PyObject_CallNoArgs()
13 #include "pycore_code.h" // _PyCode_New()
14 #include "pycore_floatobject.h" // _PyFloat_Pack8()
15 #include "pycore_hashtable.h" // _Py_hashtable_t
16 #include "code.h"
17 #include "marshal.h" // Py_MARSHAL_VERSION
18
19 /*[clinic input]
20 module marshal
21 [clinic start generated code]*/
22 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
23
24 #include "clinic/marshal.c.h"
25
26 /* High water mark to determine when the marshalled object is dangerously deep
27 * and risks coring the interpreter. When the object stack gets this deep,
28 * raise an exception instead of continuing.
29 * On Windows debug builds, reduce this value.
30 *
31 * BUG: https://bugs.python.org/issue33720
32 * On Windows PGO builds, the r_object function overallocates its stack and
33 * can cause a stack overflow. We reduce the maximum depth for all Windows
34 * releases to protect against this.
35 * #if defined(MS_WINDOWS) && defined(_DEBUG)
36 */
37 #if defined(MS_WINDOWS)
38 #define MAX_MARSHAL_STACK_DEPTH 1000
39 #else
40 #define MAX_MARSHAL_STACK_DEPTH 2000
41 #endif
42
43 #define TYPE_NULL '0'
44 #define TYPE_NONE 'N'
45 #define TYPE_FALSE 'F'
46 #define TYPE_TRUE 'T'
47 #define TYPE_STOPITER 'S'
48 #define TYPE_ELLIPSIS '.'
49 #define TYPE_INT 'i'
50 /* TYPE_INT64 is not generated anymore.
51 Supported for backward compatibility only. */
52 #define TYPE_INT64 'I'
53 #define TYPE_FLOAT 'f'
54 #define TYPE_BINARY_FLOAT 'g'
55 #define TYPE_COMPLEX 'x'
56 #define TYPE_BINARY_COMPLEX 'y'
57 #define TYPE_LONG 'l'
58 #define TYPE_STRING 's'
59 #define TYPE_INTERNED 't'
60 #define TYPE_REF 'r'
61 #define TYPE_TUPLE '('
62 #define TYPE_LIST '['
63 #define TYPE_DICT '{'
64 #define TYPE_CODE 'c'
65 #define TYPE_UNICODE 'u'
66 #define TYPE_UNKNOWN '?'
67 #define TYPE_SET '<'
68 #define TYPE_FROZENSET '>'
69 #define FLAG_REF '\x80' /* with a type, add obj to index */
70
71 #define TYPE_ASCII 'a'
72 #define TYPE_ASCII_INTERNED 'A'
73 #define TYPE_SMALL_TUPLE ')'
74 #define TYPE_SHORT_ASCII 'z'
75 #define TYPE_SHORT_ASCII_INTERNED 'Z'
76
77 #define WFERR_OK 0
78 #define WFERR_UNMARSHALLABLE 1
79 #define WFERR_NESTEDTOODEEP 2
80 #define WFERR_NOMEMORY 3
81
82 typedef struct {
83 FILE *fp;
84 int error; /* see WFERR_* values */
85 int depth;
86 PyObject *str;
87 char *ptr;
88 const char *end;
89 char *buf;
90 _Py_hashtable_t *hashtable;
91 int version;
92 } WFILE;
93
94 #define w_byte(c, p) do { \
95 if ((p)->ptr != (p)->end || w_reserve((p), 1)) \
96 *(p)->ptr++ = (c); \
97 } while(0)
98
99 static void
w_flush(WFILE * p)100 w_flush(WFILE *p)
101 {
102 assert(p->fp != NULL);
103 fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
104 p->ptr = p->buf;
105 }
106
107 static int
w_reserve(WFILE * p,Py_ssize_t needed)108 w_reserve(WFILE *p, Py_ssize_t needed)
109 {
110 Py_ssize_t pos, size, delta;
111 if (p->ptr == NULL)
112 return 0; /* An error already occurred */
113 if (p->fp != NULL) {
114 w_flush(p);
115 return needed <= p->end - p->ptr;
116 }
117 assert(p->str != NULL);
118 pos = p->ptr - p->buf;
119 size = PyBytes_GET_SIZE(p->str);
120 if (size > 16*1024*1024)
121 delta = (size >> 3); /* 12.5% overallocation */
122 else
123 delta = size + 1024;
124 delta = Py_MAX(delta, needed);
125 if (delta > PY_SSIZE_T_MAX - size) {
126 p->error = WFERR_NOMEMORY;
127 return 0;
128 }
129 size += delta;
130 if (_PyBytes_Resize(&p->str, size) != 0) {
131 p->end = p->ptr = p->buf = NULL;
132 return 0;
133 }
134 else {
135 p->buf = PyBytes_AS_STRING(p->str);
136 p->ptr = p->buf + pos;
137 p->end = p->buf + size;
138 return 1;
139 }
140 }
141
142 static void
w_string(const void * s,Py_ssize_t n,WFILE * p)143 w_string(const void *s, Py_ssize_t n, WFILE *p)
144 {
145 Py_ssize_t m;
146 if (!n || p->ptr == NULL)
147 return;
148 m = p->end - p->ptr;
149 if (p->fp != NULL) {
150 if (n <= m) {
151 memcpy(p->ptr, s, n);
152 p->ptr += n;
153 }
154 else {
155 w_flush(p);
156 fwrite(s, 1, n, p->fp);
157 }
158 }
159 else {
160 if (n <= m || w_reserve(p, n - m)) {
161 memcpy(p->ptr, s, n);
162 p->ptr += n;
163 }
164 }
165 }
166
167 static void
w_short(int x,WFILE * p)168 w_short(int x, WFILE *p)
169 {
170 w_byte((char)( x & 0xff), p);
171 w_byte((char)((x>> 8) & 0xff), p);
172 }
173
174 static void
w_long(long x,WFILE * p)175 w_long(long x, WFILE *p)
176 {
177 w_byte((char)( x & 0xff), p);
178 w_byte((char)((x>> 8) & 0xff), p);
179 w_byte((char)((x>>16) & 0xff), p);
180 w_byte((char)((x>>24) & 0xff), p);
181 }
182
183 #define SIZE32_MAX 0x7FFFFFFF
184
185 #if SIZEOF_SIZE_T > 4
186 # define W_SIZE(n, p) do { \
187 if ((n) > SIZE32_MAX) { \
188 (p)->depth--; \
189 (p)->error = WFERR_UNMARSHALLABLE; \
190 return; \
191 } \
192 w_long((long)(n), p); \
193 } while(0)
194 #else
195 # define W_SIZE w_long
196 #endif
197
198 static void
w_pstring(const void * s,Py_ssize_t n,WFILE * p)199 w_pstring(const void *s, Py_ssize_t n, WFILE *p)
200 {
201 W_SIZE(n, p);
202 w_string(s, n, p);
203 }
204
205 static void
w_short_pstring(const void * s,Py_ssize_t n,WFILE * p)206 w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
207 {
208 w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
209 w_string(s, n, p);
210 }
211
212 /* We assume that Python ints are stored internally in base some power of
213 2**15; for the sake of portability we'll always read and write them in base
214 exactly 2**15. */
215
216 #define PyLong_MARSHAL_SHIFT 15
217 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
218 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
219 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
220 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
221 #endif
222 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
223
224 #define W_TYPE(t, p) do { \
225 w_byte((t) | flag, (p)); \
226 } while(0)
227
228 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)229 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
230 {
231 Py_ssize_t i, j, n, l;
232 digit d;
233
234 W_TYPE(TYPE_LONG, p);
235 if (Py_SIZE(ob) == 0) {
236 w_long((long)0, p);
237 return;
238 }
239
240 /* set l to number of base PyLong_MARSHAL_BASE digits */
241 n = Py_ABS(Py_SIZE(ob));
242 l = (n-1) * PyLong_MARSHAL_RATIO;
243 d = ob->ob_digit[n-1];
244 assert(d != 0); /* a PyLong is always normalized */
245 do {
246 d >>= PyLong_MARSHAL_SHIFT;
247 l++;
248 } while (d != 0);
249 if (l > SIZE32_MAX) {
250 p->depth--;
251 p->error = WFERR_UNMARSHALLABLE;
252 return;
253 }
254 w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
255
256 for (i=0; i < n-1; i++) {
257 d = ob->ob_digit[i];
258 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
259 w_short(d & PyLong_MARSHAL_MASK, p);
260 d >>= PyLong_MARSHAL_SHIFT;
261 }
262 assert (d == 0);
263 }
264 d = ob->ob_digit[n-1];
265 do {
266 w_short(d & PyLong_MARSHAL_MASK, p);
267 d >>= PyLong_MARSHAL_SHIFT;
268 } while (d != 0);
269 }
270
271 static void
w_float_bin(double v,WFILE * p)272 w_float_bin(double v, WFILE *p)
273 {
274 unsigned char buf[8];
275 if (_PyFloat_Pack8(v, buf, 1) < 0) {
276 p->error = WFERR_UNMARSHALLABLE;
277 return;
278 }
279 w_string(buf, 8, p);
280 }
281
282 static void
w_float_str(double v,WFILE * p)283 w_float_str(double v, WFILE *p)
284 {
285 char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
286 if (!buf) {
287 p->error = WFERR_NOMEMORY;
288 return;
289 }
290 w_short_pstring(buf, strlen(buf), p);
291 PyMem_Free(buf);
292 }
293
294 static int
w_ref(PyObject * v,char * flag,WFILE * p)295 w_ref(PyObject *v, char *flag, WFILE *p)
296 {
297 _Py_hashtable_entry_t *entry;
298 int w;
299
300 if (p->version < 3 || p->hashtable == NULL)
301 return 0; /* not writing object references */
302
303 /* if it has only one reference, it definitely isn't shared */
304 if (Py_REFCNT(v) == 1)
305 return 0;
306
307 entry = _Py_hashtable_get_entry(p->hashtable, v);
308 if (entry != NULL) {
309 /* write the reference index to the stream */
310 w = (int)(uintptr_t)entry->value;
311 /* we don't store "long" indices in the dict */
312 assert(0 <= w && w <= 0x7fffffff);
313 w_byte(TYPE_REF, p);
314 w_long(w, p);
315 return 1;
316 } else {
317 size_t s = p->hashtable->nentries;
318 /* we don't support long indices */
319 if (s >= 0x7fffffff) {
320 PyErr_SetString(PyExc_ValueError, "too many objects");
321 goto err;
322 }
323 w = (int)s;
324 Py_INCREF(v);
325 if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) {
326 Py_DECREF(v);
327 goto err;
328 }
329 *flag |= FLAG_REF;
330 return 0;
331 }
332 err:
333 p->error = WFERR_UNMARSHALLABLE;
334 return 1;
335 }
336
337 static void
338 w_complex_object(PyObject *v, char flag, WFILE *p);
339
340 static void
w_object(PyObject * v,WFILE * p)341 w_object(PyObject *v, WFILE *p)
342 {
343 char flag = '\0';
344
345 p->depth++;
346
347 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
348 p->error = WFERR_NESTEDTOODEEP;
349 }
350 else if (v == NULL) {
351 w_byte(TYPE_NULL, p);
352 }
353 else if (v == Py_None) {
354 w_byte(TYPE_NONE, p);
355 }
356 else if (v == PyExc_StopIteration) {
357 w_byte(TYPE_STOPITER, p);
358 }
359 else if (v == Py_Ellipsis) {
360 w_byte(TYPE_ELLIPSIS, p);
361 }
362 else if (v == Py_False) {
363 w_byte(TYPE_FALSE, p);
364 }
365 else if (v == Py_True) {
366 w_byte(TYPE_TRUE, p);
367 }
368 else if (!w_ref(v, &flag, p))
369 w_complex_object(v, flag, p);
370
371 p->depth--;
372 }
373
374 static void
w_complex_object(PyObject * v,char flag,WFILE * p)375 w_complex_object(PyObject *v, char flag, WFILE *p)
376 {
377 Py_ssize_t i, n;
378
379 if (PyLong_CheckExact(v)) {
380 int overflow;
381 long x = PyLong_AsLongAndOverflow(v, &overflow);
382 if (overflow) {
383 w_PyLong((PyLongObject *)v, flag, p);
384 }
385 else {
386 #if SIZEOF_LONG > 4
387 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
388 if (y && y != -1) {
389 /* Too large for TYPE_INT */
390 w_PyLong((PyLongObject*)v, flag, p);
391 }
392 else
393 #endif
394 {
395 W_TYPE(TYPE_INT, p);
396 w_long(x, p);
397 }
398 }
399 }
400 else if (PyFloat_CheckExact(v)) {
401 if (p->version > 1) {
402 W_TYPE(TYPE_BINARY_FLOAT, p);
403 w_float_bin(PyFloat_AS_DOUBLE(v), p);
404 }
405 else {
406 W_TYPE(TYPE_FLOAT, p);
407 w_float_str(PyFloat_AS_DOUBLE(v), p);
408 }
409 }
410 else if (PyComplex_CheckExact(v)) {
411 if (p->version > 1) {
412 W_TYPE(TYPE_BINARY_COMPLEX, p);
413 w_float_bin(PyComplex_RealAsDouble(v), p);
414 w_float_bin(PyComplex_ImagAsDouble(v), p);
415 }
416 else {
417 W_TYPE(TYPE_COMPLEX, p);
418 w_float_str(PyComplex_RealAsDouble(v), p);
419 w_float_str(PyComplex_ImagAsDouble(v), p);
420 }
421 }
422 else if (PyBytes_CheckExact(v)) {
423 W_TYPE(TYPE_STRING, p);
424 w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
425 }
426 else if (PyUnicode_CheckExact(v)) {
427 if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
428 int is_short = PyUnicode_GET_LENGTH(v) < 256;
429 if (is_short) {
430 if (PyUnicode_CHECK_INTERNED(v))
431 W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
432 else
433 W_TYPE(TYPE_SHORT_ASCII, p);
434 w_short_pstring(PyUnicode_1BYTE_DATA(v),
435 PyUnicode_GET_LENGTH(v), p);
436 }
437 else {
438 if (PyUnicode_CHECK_INTERNED(v))
439 W_TYPE(TYPE_ASCII_INTERNED, p);
440 else
441 W_TYPE(TYPE_ASCII, p);
442 w_pstring(PyUnicode_1BYTE_DATA(v),
443 PyUnicode_GET_LENGTH(v), p);
444 }
445 }
446 else {
447 PyObject *utf8;
448 utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
449 if (utf8 == NULL) {
450 p->depth--;
451 p->error = WFERR_UNMARSHALLABLE;
452 return;
453 }
454 if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v))
455 W_TYPE(TYPE_INTERNED, p);
456 else
457 W_TYPE(TYPE_UNICODE, p);
458 w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
459 Py_DECREF(utf8);
460 }
461 }
462 else if (PyTuple_CheckExact(v)) {
463 n = PyTuple_GET_SIZE(v);
464 if (p->version >= 4 && n < 256) {
465 W_TYPE(TYPE_SMALL_TUPLE, p);
466 w_byte((unsigned char)n, p);
467 }
468 else {
469 W_TYPE(TYPE_TUPLE, p);
470 W_SIZE(n, p);
471 }
472 for (i = 0; i < n; i++) {
473 w_object(PyTuple_GET_ITEM(v, i), p);
474 }
475 }
476 else if (PyList_CheckExact(v)) {
477 W_TYPE(TYPE_LIST, p);
478 n = PyList_GET_SIZE(v);
479 W_SIZE(n, p);
480 for (i = 0; i < n; i++) {
481 w_object(PyList_GET_ITEM(v, i), p);
482 }
483 }
484 else if (PyDict_CheckExact(v)) {
485 Py_ssize_t pos;
486 PyObject *key, *value;
487 W_TYPE(TYPE_DICT, p);
488 /* This one is NULL object terminated! */
489 pos = 0;
490 while (PyDict_Next(v, &pos, &key, &value)) {
491 w_object(key, p);
492 w_object(value, p);
493 }
494 w_object((PyObject *)NULL, p);
495 }
496 else if (PyAnySet_CheckExact(v)) {
497 PyObject *value;
498 Py_ssize_t pos = 0;
499 Py_hash_t hash;
500
501 if (PyFrozenSet_CheckExact(v))
502 W_TYPE(TYPE_FROZENSET, p);
503 else
504 W_TYPE(TYPE_SET, p);
505 n = PySet_GET_SIZE(v);
506 W_SIZE(n, p);
507 // bpo-37596: To support reproducible builds, sets and frozensets need
508 // to have their elements serialized in a consistent order (even when
509 // they have been scrambled by hash randomization). To ensure this, we
510 // use an order equivalent to sorted(v, key=marshal.dumps):
511 PyObject *pairs = PyList_New(n);
512 if (pairs == NULL) {
513 p->error = WFERR_NOMEMORY;
514 return;
515 }
516 Py_ssize_t i = 0;
517 while (_PySet_NextEntry(v, &pos, &value, &hash)) {
518 PyObject *dump = PyMarshal_WriteObjectToString(value, p->version);
519 if (dump == NULL) {
520 p->error = WFERR_UNMARSHALLABLE;
521 Py_DECREF(pairs);
522 return;
523 }
524 PyObject *pair = PyTuple_Pack(2, dump, value);
525 Py_DECREF(dump);
526 if (pair == NULL) {
527 p->error = WFERR_NOMEMORY;
528 Py_DECREF(pairs);
529 return;
530 }
531 PyList_SET_ITEM(pairs, i++, pair);
532 }
533 assert(i == n);
534 if (PyList_Sort(pairs)) {
535 p->error = WFERR_NOMEMORY;
536 Py_DECREF(pairs);
537 return;
538 }
539 for (Py_ssize_t i = 0; i < n; i++) {
540 PyObject *pair = PyList_GET_ITEM(pairs, i);
541 value = PyTuple_GET_ITEM(pair, 1);
542 w_object(value, p);
543 }
544 Py_DECREF(pairs);
545 }
546 else if (PyCode_Check(v)) {
547 PyCodeObject *co = (PyCodeObject *)v;
548 W_TYPE(TYPE_CODE, p);
549 w_long(co->co_argcount, p);
550 w_long(co->co_posonlyargcount, p);
551 w_long(co->co_kwonlyargcount, p);
552 w_long(co->co_stacksize, p);
553 w_long(co->co_flags, p);
554 w_object(co->co_code, p);
555 w_object(co->co_consts, p);
556 w_object(co->co_names, p);
557 w_object(co->co_localsplusnames, p);
558 w_object(co->co_localspluskinds, p);
559 w_object(co->co_filename, p);
560 w_object(co->co_name, p);
561 w_object(co->co_qualname, p);
562 w_long(co->co_firstlineno, p);
563 w_object(co->co_linetable, p);
564 w_object(co->co_endlinetable, p);
565 w_object(co->co_columntable, p);
566 w_object(co->co_exceptiontable, p);
567 }
568 else if (PyObject_CheckBuffer(v)) {
569 /* Write unknown bytes-like objects as a bytes object */
570 Py_buffer view;
571 if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
572 w_byte(TYPE_UNKNOWN, p);
573 p->depth--;
574 p->error = WFERR_UNMARSHALLABLE;
575 return;
576 }
577 W_TYPE(TYPE_STRING, p);
578 w_pstring(view.buf, view.len, p);
579 PyBuffer_Release(&view);
580 }
581 else {
582 W_TYPE(TYPE_UNKNOWN, p);
583 p->error = WFERR_UNMARSHALLABLE;
584 }
585 }
586
587 static void
w_decref_entry(void * key)588 w_decref_entry(void *key)
589 {
590 PyObject *entry_key = (PyObject *)key;
591 Py_XDECREF(entry_key);
592 }
593
594 static int
w_init_refs(WFILE * wf,int version)595 w_init_refs(WFILE *wf, int version)
596 {
597 if (version >= 3) {
598 wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
599 _Py_hashtable_compare_direct,
600 w_decref_entry, NULL, NULL);
601 if (wf->hashtable == NULL) {
602 PyErr_NoMemory();
603 return -1;
604 }
605 }
606 return 0;
607 }
608
609 static void
w_clear_refs(WFILE * wf)610 w_clear_refs(WFILE *wf)
611 {
612 if (wf->hashtable != NULL) {
613 _Py_hashtable_destroy(wf->hashtable);
614 }
615 }
616
617 /* version currently has no effect for writing ints. */
618 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)619 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
620 {
621 char buf[4];
622 WFILE wf;
623 memset(&wf, 0, sizeof(wf));
624 wf.fp = fp;
625 wf.ptr = wf.buf = buf;
626 wf.end = wf.ptr + sizeof(buf);
627 wf.error = WFERR_OK;
628 wf.version = version;
629 w_long(x, &wf);
630 w_flush(&wf);
631 }
632
633 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)634 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
635 {
636 char buf[BUFSIZ];
637 WFILE wf;
638 if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
639 return; /* caller must check PyErr_Occurred() */
640 }
641 memset(&wf, 0, sizeof(wf));
642 wf.fp = fp;
643 wf.ptr = wf.buf = buf;
644 wf.end = wf.ptr + sizeof(buf);
645 wf.error = WFERR_OK;
646 wf.version = version;
647 if (w_init_refs(&wf, version)) {
648 return; /* caller must check PyErr_Occurred() */
649 }
650 w_object(x, &wf);
651 w_clear_refs(&wf);
652 w_flush(&wf);
653 }
654
655 typedef struct {
656 FILE *fp;
657 int depth;
658 PyObject *readable; /* Stream-like object being read from */
659 const char *ptr;
660 const char *end;
661 char *buf;
662 Py_ssize_t buf_size;
663 PyObject *refs; /* a list */
664 } RFILE;
665
666 static const char *
r_string(Py_ssize_t n,RFILE * p)667 r_string(Py_ssize_t n, RFILE *p)
668 {
669 Py_ssize_t read = -1;
670
671 if (p->ptr != NULL) {
672 /* Fast path for loads() */
673 const char *res = p->ptr;
674 Py_ssize_t left = p->end - p->ptr;
675 if (left < n) {
676 PyErr_SetString(PyExc_EOFError,
677 "marshal data too short");
678 return NULL;
679 }
680 p->ptr += n;
681 return res;
682 }
683 if (p->buf == NULL) {
684 p->buf = PyMem_Malloc(n);
685 if (p->buf == NULL) {
686 PyErr_NoMemory();
687 return NULL;
688 }
689 p->buf_size = n;
690 }
691 else if (p->buf_size < n) {
692 char *tmp = PyMem_Realloc(p->buf, n);
693 if (tmp == NULL) {
694 PyErr_NoMemory();
695 return NULL;
696 }
697 p->buf = tmp;
698 p->buf_size = n;
699 }
700
701 if (!p->readable) {
702 assert(p->fp != NULL);
703 read = fread(p->buf, 1, n, p->fp);
704 }
705 else {
706 _Py_IDENTIFIER(readinto);
707 PyObject *res, *mview;
708 Py_buffer buf;
709
710 if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
711 return NULL;
712 mview = PyMemoryView_FromBuffer(&buf);
713 if (mview == NULL)
714 return NULL;
715
716 res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
717 if (res != NULL) {
718 read = PyNumber_AsSsize_t(res, PyExc_ValueError);
719 Py_DECREF(res);
720 }
721 }
722 if (read != n) {
723 if (!PyErr_Occurred()) {
724 if (read > n)
725 PyErr_Format(PyExc_ValueError,
726 "read() returned too much data: "
727 "%zd bytes requested, %zd returned",
728 n, read);
729 else
730 PyErr_SetString(PyExc_EOFError,
731 "EOF read where not expected");
732 }
733 return NULL;
734 }
735 return p->buf;
736 }
737
738 static int
r_byte(RFILE * p)739 r_byte(RFILE *p)
740 {
741 int c = EOF;
742
743 if (p->ptr != NULL) {
744 if (p->ptr < p->end)
745 c = (unsigned char) *p->ptr++;
746 return c;
747 }
748 if (!p->readable) {
749 assert(p->fp);
750 c = getc(p->fp);
751 }
752 else {
753 const char *ptr = r_string(1, p);
754 if (ptr != NULL)
755 c = *(const unsigned char *) ptr;
756 }
757 return c;
758 }
759
760 static int
r_short(RFILE * p)761 r_short(RFILE *p)
762 {
763 short x = -1;
764 const unsigned char *buffer;
765
766 buffer = (const unsigned char *) r_string(2, p);
767 if (buffer != NULL) {
768 x = buffer[0];
769 x |= buffer[1] << 8;
770 /* Sign-extension, in case short greater than 16 bits */
771 x |= -(x & 0x8000);
772 }
773 return x;
774 }
775
776 static long
r_long(RFILE * p)777 r_long(RFILE *p)
778 {
779 long x = -1;
780 const unsigned char *buffer;
781
782 buffer = (const unsigned char *) r_string(4, p);
783 if (buffer != NULL) {
784 x = buffer[0];
785 x |= (long)buffer[1] << 8;
786 x |= (long)buffer[2] << 16;
787 x |= (long)buffer[3] << 24;
788 #if SIZEOF_LONG > 4
789 /* Sign extension for 64-bit machines */
790 x |= -(x & 0x80000000L);
791 #endif
792 }
793 return x;
794 }
795
796 /* r_long64 deals with the TYPE_INT64 code. */
797 static PyObject *
r_long64(RFILE * p)798 r_long64(RFILE *p)
799 {
800 const unsigned char *buffer = (const unsigned char *) r_string(8, p);
801 if (buffer == NULL) {
802 return NULL;
803 }
804 return _PyLong_FromByteArray(buffer, 8,
805 1 /* little endian */,
806 1 /* signed */);
807 }
808
809 static PyObject *
r_PyLong(RFILE * p)810 r_PyLong(RFILE *p)
811 {
812 PyLongObject *ob;
813 long n, size, i;
814 int j, md, shorts_in_top_digit;
815 digit d;
816
817 n = r_long(p);
818 if (PyErr_Occurred())
819 return NULL;
820 if (n == 0)
821 return (PyObject *)_PyLong_New(0);
822 if (n < -SIZE32_MAX || n > SIZE32_MAX) {
823 PyErr_SetString(PyExc_ValueError,
824 "bad marshal data (long size out of range)");
825 return NULL;
826 }
827
828 size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
829 shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
830 ob = _PyLong_New(size);
831 if (ob == NULL)
832 return NULL;
833
834 Py_SET_SIZE(ob, n > 0 ? size : -size);
835
836 for (i = 0; i < size-1; i++) {
837 d = 0;
838 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
839 md = r_short(p);
840 if (PyErr_Occurred()) {
841 Py_DECREF(ob);
842 return NULL;
843 }
844 if (md < 0 || md > PyLong_MARSHAL_BASE)
845 goto bad_digit;
846 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
847 }
848 ob->ob_digit[i] = d;
849 }
850
851 d = 0;
852 for (j=0; j < shorts_in_top_digit; j++) {
853 md = r_short(p);
854 if (PyErr_Occurred()) {
855 Py_DECREF(ob);
856 return NULL;
857 }
858 if (md < 0 || md > PyLong_MARSHAL_BASE)
859 goto bad_digit;
860 /* topmost marshal digit should be nonzero */
861 if (md == 0 && j == shorts_in_top_digit - 1) {
862 Py_DECREF(ob);
863 PyErr_SetString(PyExc_ValueError,
864 "bad marshal data (unnormalized long data)");
865 return NULL;
866 }
867 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
868 }
869 if (PyErr_Occurred()) {
870 Py_DECREF(ob);
871 return NULL;
872 }
873 /* top digit should be nonzero, else the resulting PyLong won't be
874 normalized */
875 ob->ob_digit[size-1] = d;
876 return (PyObject *)ob;
877 bad_digit:
878 Py_DECREF(ob);
879 PyErr_SetString(PyExc_ValueError,
880 "bad marshal data (digit out of range in long)");
881 return NULL;
882 }
883
884 static double
r_float_bin(RFILE * p)885 r_float_bin(RFILE *p)
886 {
887 const unsigned char *buf = (const unsigned char *) r_string(8, p);
888 if (buf == NULL)
889 return -1;
890 return _PyFloat_Unpack8(buf, 1);
891 }
892
893 /* Issue #33720: Disable inlining for reducing the C stack consumption
894 on PGO builds. */
895 Py_NO_INLINE static double
r_float_str(RFILE * p)896 r_float_str(RFILE *p)
897 {
898 int n;
899 char buf[256];
900 const char *ptr;
901 n = r_byte(p);
902 if (n == EOF) {
903 PyErr_SetString(PyExc_EOFError,
904 "EOF read where object expected");
905 return -1;
906 }
907 ptr = r_string(n, p);
908 if (ptr == NULL) {
909 return -1;
910 }
911 memcpy(buf, ptr, n);
912 buf[n] = '\0';
913 return PyOS_string_to_double(buf, NULL, NULL);
914 }
915
916 /* allocate the reflist index for a new object. Return -1 on failure */
917 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)918 r_ref_reserve(int flag, RFILE *p)
919 {
920 if (flag) { /* currently only FLAG_REF is defined */
921 Py_ssize_t idx = PyList_GET_SIZE(p->refs);
922 if (idx >= 0x7ffffffe) {
923 PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
924 return -1;
925 }
926 if (PyList_Append(p->refs, Py_None) < 0)
927 return -1;
928 return idx;
929 } else
930 return 0;
931 }
932
933 /* insert the new object 'o' to the reflist at previously
934 * allocated index 'idx'.
935 * 'o' can be NULL, in which case nothing is done.
936 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
937 * if 'o' was non-NULL, and the function fails, 'o' is released and
938 * NULL returned. This simplifies error checking at the call site since
939 * a single test for NULL for the function result is enough.
940 */
941 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)942 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
943 {
944 if (o != NULL && flag) { /* currently only FLAG_REF is defined */
945 PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
946 Py_INCREF(o);
947 PyList_SET_ITEM(p->refs, idx, o);
948 Py_DECREF(tmp);
949 }
950 return o;
951 }
952
953 /* combination of both above, used when an object can be
954 * created whenever it is seen in the file, as opposed to
955 * after having loaded its sub-objects.
956 */
957 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)958 r_ref(PyObject *o, int flag, RFILE *p)
959 {
960 assert(flag & FLAG_REF);
961 if (o == NULL)
962 return NULL;
963 if (PyList_Append(p->refs, o) < 0) {
964 Py_DECREF(o); /* release the new object */
965 return NULL;
966 }
967 return o;
968 }
969
970 static PyObject *
r_object(RFILE * p)971 r_object(RFILE *p)
972 {
973 /* NULL is a valid return value, it does not necessarily means that
974 an exception is set. */
975 PyObject *v, *v2;
976 Py_ssize_t idx = 0;
977 long i, n;
978 int type, code = r_byte(p);
979 int flag, is_interned = 0;
980 PyObject *retval = NULL;
981
982 if (code == EOF) {
983 PyErr_SetString(PyExc_EOFError,
984 "EOF read where object expected");
985 return NULL;
986 }
987
988 p->depth++;
989
990 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
991 p->depth--;
992 PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
993 return NULL;
994 }
995
996 flag = code & FLAG_REF;
997 type = code & ~FLAG_REF;
998
999 #define R_REF(O) do{\
1000 if (flag) \
1001 O = r_ref(O, flag, p);\
1002 } while (0)
1003
1004 switch (type) {
1005
1006 case TYPE_NULL:
1007 break;
1008
1009 case TYPE_NONE:
1010 Py_INCREF(Py_None);
1011 retval = Py_None;
1012 break;
1013
1014 case TYPE_STOPITER:
1015 Py_INCREF(PyExc_StopIteration);
1016 retval = PyExc_StopIteration;
1017 break;
1018
1019 case TYPE_ELLIPSIS:
1020 Py_INCREF(Py_Ellipsis);
1021 retval = Py_Ellipsis;
1022 break;
1023
1024 case TYPE_FALSE:
1025 Py_INCREF(Py_False);
1026 retval = Py_False;
1027 break;
1028
1029 case TYPE_TRUE:
1030 Py_INCREF(Py_True);
1031 retval = Py_True;
1032 break;
1033
1034 case TYPE_INT:
1035 n = r_long(p);
1036 retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
1037 R_REF(retval);
1038 break;
1039
1040 case TYPE_INT64:
1041 retval = r_long64(p);
1042 R_REF(retval);
1043 break;
1044
1045 case TYPE_LONG:
1046 retval = r_PyLong(p);
1047 R_REF(retval);
1048 break;
1049
1050 case TYPE_FLOAT:
1051 {
1052 double x = r_float_str(p);
1053 if (x == -1.0 && PyErr_Occurred())
1054 break;
1055 retval = PyFloat_FromDouble(x);
1056 R_REF(retval);
1057 break;
1058 }
1059
1060 case TYPE_BINARY_FLOAT:
1061 {
1062 double x = r_float_bin(p);
1063 if (x == -1.0 && PyErr_Occurred())
1064 break;
1065 retval = PyFloat_FromDouble(x);
1066 R_REF(retval);
1067 break;
1068 }
1069
1070 case TYPE_COMPLEX:
1071 {
1072 Py_complex c;
1073 c.real = r_float_str(p);
1074 if (c.real == -1.0 && PyErr_Occurred())
1075 break;
1076 c.imag = r_float_str(p);
1077 if (c.imag == -1.0 && PyErr_Occurred())
1078 break;
1079 retval = PyComplex_FromCComplex(c);
1080 R_REF(retval);
1081 break;
1082 }
1083
1084 case TYPE_BINARY_COMPLEX:
1085 {
1086 Py_complex c;
1087 c.real = r_float_bin(p);
1088 if (c.real == -1.0 && PyErr_Occurred())
1089 break;
1090 c.imag = r_float_bin(p);
1091 if (c.imag == -1.0 && PyErr_Occurred())
1092 break;
1093 retval = PyComplex_FromCComplex(c);
1094 R_REF(retval);
1095 break;
1096 }
1097
1098 case TYPE_STRING:
1099 {
1100 const char *ptr;
1101 n = r_long(p);
1102 if (PyErr_Occurred())
1103 break;
1104 if (n < 0 || n > SIZE32_MAX) {
1105 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1106 break;
1107 }
1108 v = PyBytes_FromStringAndSize((char *)NULL, n);
1109 if (v == NULL)
1110 break;
1111 ptr = r_string(n, p);
1112 if (ptr == NULL) {
1113 Py_DECREF(v);
1114 break;
1115 }
1116 memcpy(PyBytes_AS_STRING(v), ptr, n);
1117 retval = v;
1118 R_REF(retval);
1119 break;
1120 }
1121
1122 case TYPE_ASCII_INTERNED:
1123 is_interned = 1;
1124 /* fall through */
1125 case TYPE_ASCII:
1126 n = r_long(p);
1127 if (PyErr_Occurred())
1128 break;
1129 if (n < 0 || n > SIZE32_MAX) {
1130 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1131 break;
1132 }
1133 goto _read_ascii;
1134
1135 case TYPE_SHORT_ASCII_INTERNED:
1136 is_interned = 1;
1137 /* fall through */
1138 case TYPE_SHORT_ASCII:
1139 n = r_byte(p);
1140 if (n == EOF) {
1141 PyErr_SetString(PyExc_EOFError,
1142 "EOF read where object expected");
1143 break;
1144 }
1145 _read_ascii:
1146 {
1147 const char *ptr;
1148 ptr = r_string(n, p);
1149 if (ptr == NULL)
1150 break;
1151 v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1152 if (v == NULL)
1153 break;
1154 if (is_interned)
1155 PyUnicode_InternInPlace(&v);
1156 retval = v;
1157 R_REF(retval);
1158 break;
1159 }
1160
1161 case TYPE_INTERNED:
1162 is_interned = 1;
1163 /* fall through */
1164 case TYPE_UNICODE:
1165 {
1166 const char *buffer;
1167
1168 n = r_long(p);
1169 if (PyErr_Occurred())
1170 break;
1171 if (n < 0 || n > SIZE32_MAX) {
1172 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1173 break;
1174 }
1175 if (n != 0) {
1176 buffer = r_string(n, p);
1177 if (buffer == NULL)
1178 break;
1179 v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1180 }
1181 else {
1182 v = PyUnicode_New(0, 0);
1183 }
1184 if (v == NULL)
1185 break;
1186 if (is_interned)
1187 PyUnicode_InternInPlace(&v);
1188 retval = v;
1189 R_REF(retval);
1190 break;
1191 }
1192
1193 case TYPE_SMALL_TUPLE:
1194 n = (unsigned char) r_byte(p);
1195 if (PyErr_Occurred())
1196 break;
1197 goto _read_tuple;
1198 case TYPE_TUPLE:
1199 n = r_long(p);
1200 if (PyErr_Occurred())
1201 break;
1202 if (n < 0 || n > SIZE32_MAX) {
1203 PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1204 break;
1205 }
1206 _read_tuple:
1207 v = PyTuple_New(n);
1208 R_REF(v);
1209 if (v == NULL)
1210 break;
1211
1212 for (i = 0; i < n; i++) {
1213 v2 = r_object(p);
1214 if ( v2 == NULL ) {
1215 if (!PyErr_Occurred())
1216 PyErr_SetString(PyExc_TypeError,
1217 "NULL object in marshal data for tuple");
1218 Py_DECREF(v);
1219 v = NULL;
1220 break;
1221 }
1222 PyTuple_SET_ITEM(v, i, v2);
1223 }
1224 retval = v;
1225 break;
1226
1227 case TYPE_LIST:
1228 n = r_long(p);
1229 if (PyErr_Occurred())
1230 break;
1231 if (n < 0 || n > SIZE32_MAX) {
1232 PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1233 break;
1234 }
1235 v = PyList_New(n);
1236 R_REF(v);
1237 if (v == NULL)
1238 break;
1239 for (i = 0; i < n; i++) {
1240 v2 = r_object(p);
1241 if ( v2 == NULL ) {
1242 if (!PyErr_Occurred())
1243 PyErr_SetString(PyExc_TypeError,
1244 "NULL object in marshal data for list");
1245 Py_DECREF(v);
1246 v = NULL;
1247 break;
1248 }
1249 PyList_SET_ITEM(v, i, v2);
1250 }
1251 retval = v;
1252 break;
1253
1254 case TYPE_DICT:
1255 v = PyDict_New();
1256 R_REF(v);
1257 if (v == NULL)
1258 break;
1259 for (;;) {
1260 PyObject *key, *val;
1261 key = r_object(p);
1262 if (key == NULL)
1263 break;
1264 val = r_object(p);
1265 if (val == NULL) {
1266 Py_DECREF(key);
1267 break;
1268 }
1269 if (PyDict_SetItem(v, key, val) < 0) {
1270 Py_DECREF(key);
1271 Py_DECREF(val);
1272 break;
1273 }
1274 Py_DECREF(key);
1275 Py_DECREF(val);
1276 }
1277 if (PyErr_Occurred()) {
1278 Py_DECREF(v);
1279 v = NULL;
1280 }
1281 retval = v;
1282 break;
1283
1284 case TYPE_SET:
1285 case TYPE_FROZENSET:
1286 n = r_long(p);
1287 if (PyErr_Occurred())
1288 break;
1289 if (n < 0 || n > SIZE32_MAX) {
1290 PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1291 break;
1292 }
1293
1294 if (n == 0 && type == TYPE_FROZENSET) {
1295 /* call frozenset() to get the empty frozenset singleton */
1296 v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1297 if (v == NULL)
1298 break;
1299 R_REF(v);
1300 retval = v;
1301 }
1302 else {
1303 v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1304 if (type == TYPE_SET) {
1305 R_REF(v);
1306 } else {
1307 /* must use delayed registration of frozensets because they must
1308 * be init with a refcount of 1
1309 */
1310 idx = r_ref_reserve(flag, p);
1311 if (idx < 0)
1312 Py_CLEAR(v); /* signal error */
1313 }
1314 if (v == NULL)
1315 break;
1316
1317 for (i = 0; i < n; i++) {
1318 v2 = r_object(p);
1319 if ( v2 == NULL ) {
1320 if (!PyErr_Occurred())
1321 PyErr_SetString(PyExc_TypeError,
1322 "NULL object in marshal data for set");
1323 Py_DECREF(v);
1324 v = NULL;
1325 break;
1326 }
1327 if (PySet_Add(v, v2) == -1) {
1328 Py_DECREF(v);
1329 Py_DECREF(v2);
1330 v = NULL;
1331 break;
1332 }
1333 Py_DECREF(v2);
1334 }
1335 if (type != TYPE_SET)
1336 v = r_ref_insert(v, idx, flag, p);
1337 retval = v;
1338 }
1339 break;
1340
1341 case TYPE_CODE:
1342 {
1343 int argcount;
1344 int posonlyargcount;
1345 int kwonlyargcount;
1346 int stacksize;
1347 int flags;
1348 PyObject *code = NULL;
1349 PyObject *consts = NULL;
1350 PyObject *names = NULL;
1351 PyObject *localsplusnames = NULL;
1352 PyObject *localspluskinds = NULL;
1353 PyObject *filename = NULL;
1354 PyObject *name = NULL;
1355 PyObject *qualname = NULL;
1356 int firstlineno;
1357 PyObject *linetable = NULL;
1358 PyObject* endlinetable = NULL;
1359 PyObject* columntable = NULL;
1360 PyObject *exceptiontable = NULL;
1361
1362 idx = r_ref_reserve(flag, p);
1363 if (idx < 0)
1364 break;
1365
1366 v = NULL;
1367
1368 /* XXX ignore long->int overflows for now */
1369 argcount = (int)r_long(p);
1370 if (PyErr_Occurred())
1371 goto code_error;
1372 posonlyargcount = (int)r_long(p);
1373 if (PyErr_Occurred()) {
1374 goto code_error;
1375 }
1376 kwonlyargcount = (int)r_long(p);
1377 if (PyErr_Occurred())
1378 goto code_error;
1379 stacksize = (int)r_long(p);
1380 if (PyErr_Occurred())
1381 goto code_error;
1382 flags = (int)r_long(p);
1383 if (PyErr_Occurred())
1384 goto code_error;
1385 code = r_object(p);
1386 if (code == NULL)
1387 goto code_error;
1388 consts = r_object(p);
1389 if (consts == NULL)
1390 goto code_error;
1391 names = r_object(p);
1392 if (names == NULL)
1393 goto code_error;
1394 localsplusnames = r_object(p);
1395 if (localsplusnames == NULL)
1396 goto code_error;
1397 localspluskinds = r_object(p);
1398 if (localspluskinds == NULL)
1399 goto code_error;
1400 filename = r_object(p);
1401 if (filename == NULL)
1402 goto code_error;
1403 name = r_object(p);
1404 if (name == NULL)
1405 goto code_error;
1406 qualname = r_object(p);
1407 if (qualname == NULL)
1408 goto code_error;
1409 firstlineno = (int)r_long(p);
1410 if (firstlineno == -1 && PyErr_Occurred())
1411 break;
1412 linetable = r_object(p);
1413 if (linetable == NULL)
1414 goto code_error;
1415 endlinetable = r_object(p);
1416 if (endlinetable == NULL)
1417 goto code_error;
1418 columntable = r_object(p);
1419 if (columntable == NULL)
1420 goto code_error;
1421 exceptiontable = r_object(p);
1422 if (exceptiontable == NULL)
1423 goto code_error;
1424
1425 struct _PyCodeConstructor con = {
1426 .filename = filename,
1427 .name = name,
1428 .qualname = qualname,
1429 .flags = flags,
1430
1431 .code = code,
1432 .firstlineno = firstlineno,
1433 .linetable = linetable,
1434 .endlinetable = endlinetable,
1435 .columntable = columntable,
1436
1437 .consts = consts,
1438 .names = names,
1439
1440 .localsplusnames = localsplusnames,
1441 .localspluskinds = localspluskinds,
1442
1443 .argcount = argcount,
1444 .posonlyargcount = posonlyargcount,
1445 .kwonlyargcount = kwonlyargcount,
1446
1447 .stacksize = stacksize,
1448
1449 .exceptiontable = exceptiontable,
1450 };
1451
1452 if (_PyCode_Validate(&con) < 0) {
1453 goto code_error;
1454 }
1455
1456 v = (PyObject *)_PyCode_New(&con);
1457 if (v == NULL) {
1458 goto code_error;
1459 }
1460
1461 v = r_ref_insert(v, idx, flag, p);
1462
1463 code_error:
1464 Py_XDECREF(code);
1465 Py_XDECREF(consts);
1466 Py_XDECREF(names);
1467 Py_XDECREF(localsplusnames);
1468 Py_XDECREF(localspluskinds);
1469 Py_XDECREF(filename);
1470 Py_XDECREF(name);
1471 Py_XDECREF(qualname);
1472 Py_XDECREF(linetable);
1473 Py_XDECREF(endlinetable);
1474 Py_XDECREF(columntable);
1475 Py_XDECREF(exceptiontable);
1476 }
1477 retval = v;
1478 break;
1479
1480 case TYPE_REF:
1481 n = r_long(p);
1482 if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1483 if (n == -1 && PyErr_Occurred())
1484 break;
1485 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1486 break;
1487 }
1488 v = PyList_GET_ITEM(p->refs, n);
1489 if (v == Py_None) {
1490 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1491 break;
1492 }
1493 Py_INCREF(v);
1494 retval = v;
1495 break;
1496
1497 default:
1498 /* Bogus data got written, which isn't ideal.
1499 This will let you keep working and recover. */
1500 PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1501 break;
1502
1503 }
1504 p->depth--;
1505 return retval;
1506 }
1507
1508 static PyObject *
read_object(RFILE * p)1509 read_object(RFILE *p)
1510 {
1511 PyObject *v;
1512 if (PyErr_Occurred()) {
1513 fprintf(stderr, "XXX readobject called with exception set\n");
1514 return NULL;
1515 }
1516 if (p->ptr && p->end) {
1517 if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1518 return NULL;
1519 }
1520 } else if (p->fp || p->readable) {
1521 if (PySys_Audit("marshal.load", NULL) < 0) {
1522 return NULL;
1523 }
1524 }
1525 v = r_object(p);
1526 if (v == NULL && !PyErr_Occurred())
1527 PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1528 return v;
1529 }
1530
1531 int
PyMarshal_ReadShortFromFile(FILE * fp)1532 PyMarshal_ReadShortFromFile(FILE *fp)
1533 {
1534 RFILE rf;
1535 int res;
1536 assert(fp);
1537 rf.readable = NULL;
1538 rf.fp = fp;
1539 rf.end = rf.ptr = NULL;
1540 rf.buf = NULL;
1541 res = r_short(&rf);
1542 if (rf.buf != NULL)
1543 PyMem_Free(rf.buf);
1544 return res;
1545 }
1546
1547 long
PyMarshal_ReadLongFromFile(FILE * fp)1548 PyMarshal_ReadLongFromFile(FILE *fp)
1549 {
1550 RFILE rf;
1551 long res;
1552 rf.fp = fp;
1553 rf.readable = NULL;
1554 rf.ptr = rf.end = NULL;
1555 rf.buf = NULL;
1556 res = r_long(&rf);
1557 if (rf.buf != NULL)
1558 PyMem_Free(rf.buf);
1559 return res;
1560 }
1561
1562 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1563 static off_t
getfilesize(FILE * fp)1564 getfilesize(FILE *fp)
1565 {
1566 struct _Py_stat_struct st;
1567 if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1568 return -1;
1569 #if SIZEOF_OFF_T == 4
1570 else if (st.st_size >= INT_MAX)
1571 return (off_t)INT_MAX;
1572 #endif
1573 else
1574 return (off_t)st.st_size;
1575 }
1576
1577 /* If we can get the size of the file up-front, and it's reasonably small,
1578 * read it in one gulp and delegate to ...FromString() instead. Much quicker
1579 * than reading a byte at a time from file; speeds .pyc imports.
1580 * CAUTION: since this may read the entire remainder of the file, don't
1581 * call it unless you know you're done with the file.
1582 */
1583 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1584 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1585 {
1586 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1587 #define REASONABLE_FILE_LIMIT (1L << 18)
1588 off_t filesize;
1589 filesize = getfilesize(fp);
1590 if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1591 char* pBuf = (char *)PyMem_Malloc(filesize);
1592 if (pBuf != NULL) {
1593 size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1594 PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1595 PyMem_Free(pBuf);
1596 return v;
1597 }
1598
1599 }
1600 /* We don't have fstat, or we do but the file is larger than
1601 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1602 */
1603 return PyMarshal_ReadObjectFromFile(fp);
1604
1605 #undef REASONABLE_FILE_LIMIT
1606 }
1607
1608 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1609 PyMarshal_ReadObjectFromFile(FILE *fp)
1610 {
1611 RFILE rf;
1612 PyObject *result;
1613 rf.fp = fp;
1614 rf.readable = NULL;
1615 rf.depth = 0;
1616 rf.ptr = rf.end = NULL;
1617 rf.buf = NULL;
1618 rf.refs = PyList_New(0);
1619 if (rf.refs == NULL)
1620 return NULL;
1621 result = read_object(&rf);
1622 Py_DECREF(rf.refs);
1623 if (rf.buf != NULL)
1624 PyMem_Free(rf.buf);
1625 return result;
1626 }
1627
1628 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1629 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1630 {
1631 RFILE rf;
1632 PyObject *result;
1633 rf.fp = NULL;
1634 rf.readable = NULL;
1635 rf.ptr = str;
1636 rf.end = str + len;
1637 rf.buf = NULL;
1638 rf.depth = 0;
1639 rf.refs = PyList_New(0);
1640 if (rf.refs == NULL)
1641 return NULL;
1642 result = read_object(&rf);
1643 Py_DECREF(rf.refs);
1644 if (rf.buf != NULL)
1645 PyMem_Free(rf.buf);
1646 return result;
1647 }
1648
1649 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1650 PyMarshal_WriteObjectToString(PyObject *x, int version)
1651 {
1652 WFILE wf;
1653
1654 if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1655 return NULL;
1656 }
1657 memset(&wf, 0, sizeof(wf));
1658 wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1659 if (wf.str == NULL)
1660 return NULL;
1661 wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1662 wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1663 wf.error = WFERR_OK;
1664 wf.version = version;
1665 if (w_init_refs(&wf, version)) {
1666 Py_DECREF(wf.str);
1667 return NULL;
1668 }
1669 w_object(x, &wf);
1670 w_clear_refs(&wf);
1671 if (wf.str != NULL) {
1672 const char *base = PyBytes_AS_STRING(wf.str);
1673 if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1674 return NULL;
1675 }
1676 if (wf.error != WFERR_OK) {
1677 Py_XDECREF(wf.str);
1678 if (wf.error == WFERR_NOMEMORY)
1679 PyErr_NoMemory();
1680 else
1681 PyErr_SetString(PyExc_ValueError,
1682 (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1683 :"object too deeply nested to marshal");
1684 return NULL;
1685 }
1686 return wf.str;
1687 }
1688
1689 /* And an interface for Python programs... */
1690 /*[clinic input]
1691 marshal.dump
1692
1693 value: object
1694 Must be a supported type.
1695 file: object
1696 Must be a writeable binary file.
1697 version: int(c_default="Py_MARSHAL_VERSION") = version
1698 Indicates the data format that dump should use.
1699 /
1700
1701 Write the value on the open file.
1702
1703 If the value has (or contains an object that has) an unsupported type, a
1704 ValueError exception is raised - but garbage data will also be written
1705 to the file. The object will not be properly read back by load().
1706 [clinic start generated code]*/
1707
1708 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1709 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1710 int version)
1711 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1712 {
1713 /* XXX Quick hack -- need to do this differently */
1714 PyObject *s;
1715 PyObject *res;
1716 _Py_IDENTIFIER(write);
1717
1718 s = PyMarshal_WriteObjectToString(value, version);
1719 if (s == NULL)
1720 return NULL;
1721 res = _PyObject_CallMethodIdOneArg(file, &PyId_write, s);
1722 Py_DECREF(s);
1723 return res;
1724 }
1725
1726 /*[clinic input]
1727 marshal.load
1728
1729 file: object
1730 Must be readable binary file.
1731 /
1732
1733 Read one value from the open file and return it.
1734
1735 If no valid value is read (e.g. because the data has a different Python
1736 version's incompatible marshal format), raise EOFError, ValueError or
1737 TypeError.
1738
1739 Note: If an object containing an unsupported type was marshalled with
1740 dump(), load() will substitute None for the unmarshallable type.
1741 [clinic start generated code]*/
1742
1743 static PyObject *
marshal_load(PyObject * module,PyObject * file)1744 marshal_load(PyObject *module, PyObject *file)
1745 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1746 {
1747 PyObject *data, *result;
1748 _Py_IDENTIFIER(read);
1749 RFILE rf;
1750
1751 /*
1752 * Make a call to the read method, but read zero bytes.
1753 * This is to ensure that the object passed in at least
1754 * has a read method which returns bytes.
1755 * This can be removed if we guarantee good error handling
1756 * for r_string()
1757 */
1758 data = _PyObject_CallMethodId(file, &PyId_read, "i", 0);
1759 if (data == NULL)
1760 return NULL;
1761 if (!PyBytes_Check(data)) {
1762 PyErr_Format(PyExc_TypeError,
1763 "file.read() returned not bytes but %.100s",
1764 Py_TYPE(data)->tp_name);
1765 result = NULL;
1766 }
1767 else {
1768 rf.depth = 0;
1769 rf.fp = NULL;
1770 rf.readable = file;
1771 rf.ptr = rf.end = NULL;
1772 rf.buf = NULL;
1773 if ((rf.refs = PyList_New(0)) != NULL) {
1774 result = read_object(&rf);
1775 Py_DECREF(rf.refs);
1776 if (rf.buf != NULL)
1777 PyMem_Free(rf.buf);
1778 } else
1779 result = NULL;
1780 }
1781 Py_DECREF(data);
1782 return result;
1783 }
1784
1785 /*[clinic input]
1786 marshal.dumps
1787
1788 value: object
1789 Must be a supported type.
1790 version: int(c_default="Py_MARSHAL_VERSION") = version
1791 Indicates the data format that dumps should use.
1792 /
1793
1794 Return the bytes object that would be written to a file by dump(value, file).
1795
1796 Raise a ValueError exception if value has (or contains an object that has) an
1797 unsupported type.
1798 [clinic start generated code]*/
1799
1800 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1801 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1802 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1803 {
1804 return PyMarshal_WriteObjectToString(value, version);
1805 }
1806
1807 /*[clinic input]
1808 marshal.loads
1809
1810 bytes: Py_buffer
1811 /
1812
1813 Convert the bytes-like object to a value.
1814
1815 If no valid value is found, raise EOFError, ValueError or TypeError. Extra
1816 bytes in the input are ignored.
1817 [clinic start generated code]*/
1818
1819 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1820 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1821 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1822 {
1823 RFILE rf;
1824 char *s = bytes->buf;
1825 Py_ssize_t n = bytes->len;
1826 PyObject* result;
1827 rf.fp = NULL;
1828 rf.readable = NULL;
1829 rf.ptr = s;
1830 rf.end = s + n;
1831 rf.depth = 0;
1832 if ((rf.refs = PyList_New(0)) == NULL)
1833 return NULL;
1834 result = read_object(&rf);
1835 Py_DECREF(rf.refs);
1836 return result;
1837 }
1838
1839 static PyMethodDef marshal_methods[] = {
1840 MARSHAL_DUMP_METHODDEF
1841 MARSHAL_LOAD_METHODDEF
1842 MARSHAL_DUMPS_METHODDEF
1843 MARSHAL_LOADS_METHODDEF
1844 {NULL, NULL} /* sentinel */
1845 };
1846
1847
1848 PyDoc_STRVAR(module_doc,
1849 "This module contains functions that can read and write Python values in\n\
1850 a binary format. The format is specific to Python, but independent of\n\
1851 machine architecture issues.\n\
1852 \n\
1853 Not all Python object types are supported; in general, only objects\n\
1854 whose value is independent from a particular invocation of Python can be\n\
1855 written and read by this module. The following types are supported:\n\
1856 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1857 tuples, lists, sets, dictionaries, and code objects, where it\n\
1858 should be understood that tuples, lists and dictionaries are only\n\
1859 supported as long as the values contained therein are themselves\n\
1860 supported; and recursive lists and dictionaries should not be written\n\
1861 (they will cause infinite loops).\n\
1862 \n\
1863 Variables:\n\
1864 \n\
1865 version -- indicates the format that the module uses. Version 0 is the\n\
1866 historical format, version 1 shares interned strings and version 2\n\
1867 uses a binary format for floating point numbers.\n\
1868 Version 3 shares common object references (New in version 3.4).\n\
1869 \n\
1870 Functions:\n\
1871 \n\
1872 dump() -- write value to a file\n\
1873 load() -- read value from a file\n\
1874 dumps() -- marshal value as a bytes object\n\
1875 loads() -- read value from a bytes-like object");
1876
1877
1878 static int
marshal_module_exec(PyObject * mod)1879 marshal_module_exec(PyObject *mod)
1880 {
1881 if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1882 return -1;
1883 }
1884 return 0;
1885 }
1886
1887 static PyModuleDef_Slot marshalmodule_slots[] = {
1888 {Py_mod_exec, marshal_module_exec},
1889 {0, NULL}
1890 };
1891
1892 static struct PyModuleDef marshalmodule = {
1893 PyModuleDef_HEAD_INIT,
1894 .m_name = "marshal",
1895 .m_doc = module_doc,
1896 .m_methods = marshal_methods,
1897 .m_slots = marshalmodule_slots,
1898 };
1899
1900 PyMODINIT_FUNC
PyMarshal_Init(void)1901 PyMarshal_Init(void)
1902 {
1903 return PyModuleDef_Init(&marshalmodule);
1904 }
1905