1 
2 /* Write Python objects to files and read them back.
3    This is primarily intended for writing and reading compiled Python code,
4    even though dicts, lists, sets and frozensets, not commonly seen in
5    code objects, are supported.
6    Version 3 of this protocol properly supports circular links
7    and sharing. */
8 
9 #define PY_SSIZE_T_CLEAN
10 
11 #include "Python.h"
12 #include "pycore_call.h"          // _PyObject_CallNoArgs()
13 #include "pycore_code.h"          // _PyCode_New()
14 #include "pycore_floatobject.h"   // _PyFloat_Pack8()
15 #include "pycore_hashtable.h"     // _Py_hashtable_t
16 #include "code.h"
17 #include "marshal.h"              // Py_MARSHAL_VERSION
18 
19 /*[clinic input]
20 module marshal
21 [clinic start generated code]*/
22 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
23 
24 #include "clinic/marshal.c.h"
25 
26 /* High water mark to determine when the marshalled object is dangerously deep
27  * and risks coring the interpreter.  When the object stack gets this deep,
28  * raise an exception instead of continuing.
29  * On Windows debug builds, reduce this value.
30  *
31  * BUG: https://bugs.python.org/issue33720
32  * On Windows PGO builds, the r_object function overallocates its stack and
33  * can cause a stack overflow. We reduce the maximum depth for all Windows
34  * releases to protect against this.
35  * #if defined(MS_WINDOWS) && defined(_DEBUG)
36  */
37 #if defined(MS_WINDOWS)
38 #define MAX_MARSHAL_STACK_DEPTH 1000
39 #else
40 #define MAX_MARSHAL_STACK_DEPTH 2000
41 #endif
42 
43 #define TYPE_NULL               '0'
44 #define TYPE_NONE               'N'
45 #define TYPE_FALSE              'F'
46 #define TYPE_TRUE               'T'
47 #define TYPE_STOPITER           'S'
48 #define TYPE_ELLIPSIS           '.'
49 #define TYPE_INT                'i'
50 /* TYPE_INT64 is not generated anymore.
51    Supported for backward compatibility only. */
52 #define TYPE_INT64              'I'
53 #define TYPE_FLOAT              'f'
54 #define TYPE_BINARY_FLOAT       'g'
55 #define TYPE_COMPLEX            'x'
56 #define TYPE_BINARY_COMPLEX     'y'
57 #define TYPE_LONG               'l'
58 #define TYPE_STRING             's'
59 #define TYPE_INTERNED           't'
60 #define TYPE_REF                'r'
61 #define TYPE_TUPLE              '('
62 #define TYPE_LIST               '['
63 #define TYPE_DICT               '{'
64 #define TYPE_CODE               'c'
65 #define TYPE_UNICODE            'u'
66 #define TYPE_UNKNOWN            '?'
67 #define TYPE_SET                '<'
68 #define TYPE_FROZENSET          '>'
69 #define FLAG_REF                '\x80' /* with a type, add obj to index */
70 
71 #define TYPE_ASCII              'a'
72 #define TYPE_ASCII_INTERNED     'A'
73 #define TYPE_SMALL_TUPLE        ')'
74 #define TYPE_SHORT_ASCII        'z'
75 #define TYPE_SHORT_ASCII_INTERNED 'Z'
76 
77 #define WFERR_OK 0
78 #define WFERR_UNMARSHALLABLE 1
79 #define WFERR_NESTEDTOODEEP 2
80 #define WFERR_NOMEMORY 3
81 
82 typedef struct {
83     FILE *fp;
84     int error;  /* see WFERR_* values */
85     int depth;
86     PyObject *str;
87     char *ptr;
88     const char *end;
89     char *buf;
90     _Py_hashtable_t *hashtable;
91     int version;
92 } WFILE;
93 
94 #define w_byte(c, p) do {                               \
95         if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
96             *(p)->ptr++ = (c);                          \
97     } while(0)
98 
99 static void
w_flush(WFILE * p)100 w_flush(WFILE *p)
101 {
102     assert(p->fp != NULL);
103     fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
104     p->ptr = p->buf;
105 }
106 
107 static int
w_reserve(WFILE * p,Py_ssize_t needed)108 w_reserve(WFILE *p, Py_ssize_t needed)
109 {
110     Py_ssize_t pos, size, delta;
111     if (p->ptr == NULL)
112         return 0; /* An error already occurred */
113     if (p->fp != NULL) {
114         w_flush(p);
115         return needed <= p->end - p->ptr;
116     }
117     assert(p->str != NULL);
118     pos = p->ptr - p->buf;
119     size = PyBytes_GET_SIZE(p->str);
120     if (size > 16*1024*1024)
121         delta = (size >> 3);            /* 12.5% overallocation */
122     else
123         delta = size + 1024;
124     delta = Py_MAX(delta, needed);
125     if (delta > PY_SSIZE_T_MAX - size) {
126         p->error = WFERR_NOMEMORY;
127         return 0;
128     }
129     size += delta;
130     if (_PyBytes_Resize(&p->str, size) != 0) {
131         p->end = p->ptr = p->buf = NULL;
132         return 0;
133     }
134     else {
135         p->buf = PyBytes_AS_STRING(p->str);
136         p->ptr = p->buf + pos;
137         p->end = p->buf + size;
138         return 1;
139     }
140 }
141 
142 static void
w_string(const void * s,Py_ssize_t n,WFILE * p)143 w_string(const void *s, Py_ssize_t n, WFILE *p)
144 {
145     Py_ssize_t m;
146     if (!n || p->ptr == NULL)
147         return;
148     m = p->end - p->ptr;
149     if (p->fp != NULL) {
150         if (n <= m) {
151             memcpy(p->ptr, s, n);
152             p->ptr += n;
153         }
154         else {
155             w_flush(p);
156             fwrite(s, 1, n, p->fp);
157         }
158     }
159     else {
160         if (n <= m || w_reserve(p, n - m)) {
161             memcpy(p->ptr, s, n);
162             p->ptr += n;
163         }
164     }
165 }
166 
167 static void
w_short(int x,WFILE * p)168 w_short(int x, WFILE *p)
169 {
170     w_byte((char)( x      & 0xff), p);
171     w_byte((char)((x>> 8) & 0xff), p);
172 }
173 
174 static void
w_long(long x,WFILE * p)175 w_long(long x, WFILE *p)
176 {
177     w_byte((char)( x      & 0xff), p);
178     w_byte((char)((x>> 8) & 0xff), p);
179     w_byte((char)((x>>16) & 0xff), p);
180     w_byte((char)((x>>24) & 0xff), p);
181 }
182 
183 #define SIZE32_MAX  0x7FFFFFFF
184 
185 #if SIZEOF_SIZE_T > 4
186 # define W_SIZE(n, p)  do {                     \
187         if ((n) > SIZE32_MAX) {                 \
188             (p)->depth--;                       \
189             (p)->error = WFERR_UNMARSHALLABLE;  \
190             return;                             \
191         }                                       \
192         w_long((long)(n), p);                   \
193     } while(0)
194 #else
195 # define W_SIZE  w_long
196 #endif
197 
198 static void
w_pstring(const void * s,Py_ssize_t n,WFILE * p)199 w_pstring(const void *s, Py_ssize_t n, WFILE *p)
200 {
201         W_SIZE(n, p);
202         w_string(s, n, p);
203 }
204 
205 static void
w_short_pstring(const void * s,Py_ssize_t n,WFILE * p)206 w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
207 {
208     w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
209     w_string(s, n, p);
210 }
211 
212 /* We assume that Python ints are stored internally in base some power of
213    2**15; for the sake of portability we'll always read and write them in base
214    exactly 2**15. */
215 
216 #define PyLong_MARSHAL_SHIFT 15
217 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
218 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
219 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
220 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
221 #endif
222 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
223 
224 #define W_TYPE(t, p) do { \
225     w_byte((t) | flag, (p)); \
226 } while(0)
227 
228 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)229 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
230 {
231     Py_ssize_t i, j, n, l;
232     digit d;
233 
234     W_TYPE(TYPE_LONG, p);
235     if (Py_SIZE(ob) == 0) {
236         w_long((long)0, p);
237         return;
238     }
239 
240     /* set l to number of base PyLong_MARSHAL_BASE digits */
241     n = Py_ABS(Py_SIZE(ob));
242     l = (n-1) * PyLong_MARSHAL_RATIO;
243     d = ob->ob_digit[n-1];
244     assert(d != 0); /* a PyLong is always normalized */
245     do {
246         d >>= PyLong_MARSHAL_SHIFT;
247         l++;
248     } while (d != 0);
249     if (l > SIZE32_MAX) {
250         p->depth--;
251         p->error = WFERR_UNMARSHALLABLE;
252         return;
253     }
254     w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
255 
256     for (i=0; i < n-1; i++) {
257         d = ob->ob_digit[i];
258         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
259             w_short(d & PyLong_MARSHAL_MASK, p);
260             d >>= PyLong_MARSHAL_SHIFT;
261         }
262         assert (d == 0);
263     }
264     d = ob->ob_digit[n-1];
265     do {
266         w_short(d & PyLong_MARSHAL_MASK, p);
267         d >>= PyLong_MARSHAL_SHIFT;
268     } while (d != 0);
269 }
270 
271 static void
w_float_bin(double v,WFILE * p)272 w_float_bin(double v, WFILE *p)
273 {
274     unsigned char buf[8];
275     if (_PyFloat_Pack8(v, buf, 1) < 0) {
276         p->error = WFERR_UNMARSHALLABLE;
277         return;
278     }
279     w_string(buf, 8, p);
280 }
281 
282 static void
w_float_str(double v,WFILE * p)283 w_float_str(double v, WFILE *p)
284 {
285     char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
286     if (!buf) {
287         p->error = WFERR_NOMEMORY;
288         return;
289     }
290     w_short_pstring(buf, strlen(buf), p);
291     PyMem_Free(buf);
292 }
293 
294 static int
w_ref(PyObject * v,char * flag,WFILE * p)295 w_ref(PyObject *v, char *flag, WFILE *p)
296 {
297     _Py_hashtable_entry_t *entry;
298     int w;
299 
300     if (p->version < 3 || p->hashtable == NULL)
301         return 0; /* not writing object references */
302 
303     /* if it has only one reference, it definitely isn't shared */
304     if (Py_REFCNT(v) == 1)
305         return 0;
306 
307     entry = _Py_hashtable_get_entry(p->hashtable, v);
308     if (entry != NULL) {
309         /* write the reference index to the stream */
310         w = (int)(uintptr_t)entry->value;
311         /* we don't store "long" indices in the dict */
312         assert(0 <= w && w <= 0x7fffffff);
313         w_byte(TYPE_REF, p);
314         w_long(w, p);
315         return 1;
316     } else {
317         size_t s = p->hashtable->nentries;
318         /* we don't support long indices */
319         if (s >= 0x7fffffff) {
320             PyErr_SetString(PyExc_ValueError, "too many objects");
321             goto err;
322         }
323         w = (int)s;
324         Py_INCREF(v);
325         if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) {
326             Py_DECREF(v);
327             goto err;
328         }
329         *flag |= FLAG_REF;
330         return 0;
331     }
332 err:
333     p->error = WFERR_UNMARSHALLABLE;
334     return 1;
335 }
336 
337 static void
338 w_complex_object(PyObject *v, char flag, WFILE *p);
339 
340 static void
w_object(PyObject * v,WFILE * p)341 w_object(PyObject *v, WFILE *p)
342 {
343     char flag = '\0';
344 
345     p->depth++;
346 
347     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
348         p->error = WFERR_NESTEDTOODEEP;
349     }
350     else if (v == NULL) {
351         w_byte(TYPE_NULL, p);
352     }
353     else if (v == Py_None) {
354         w_byte(TYPE_NONE, p);
355     }
356     else if (v == PyExc_StopIteration) {
357         w_byte(TYPE_STOPITER, p);
358     }
359     else if (v == Py_Ellipsis) {
360         w_byte(TYPE_ELLIPSIS, p);
361     }
362     else if (v == Py_False) {
363         w_byte(TYPE_FALSE, p);
364     }
365     else if (v == Py_True) {
366         w_byte(TYPE_TRUE, p);
367     }
368     else if (!w_ref(v, &flag, p))
369         w_complex_object(v, flag, p);
370 
371     p->depth--;
372 }
373 
374 static void
w_complex_object(PyObject * v,char flag,WFILE * p)375 w_complex_object(PyObject *v, char flag, WFILE *p)
376 {
377     Py_ssize_t i, n;
378 
379     if (PyLong_CheckExact(v)) {
380         int overflow;
381         long x = PyLong_AsLongAndOverflow(v, &overflow);
382         if (overflow) {
383             w_PyLong((PyLongObject *)v, flag, p);
384         }
385         else {
386 #if SIZEOF_LONG > 4
387             long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
388             if (y && y != -1) {
389                 /* Too large for TYPE_INT */
390                 w_PyLong((PyLongObject*)v, flag, p);
391             }
392             else
393 #endif
394             {
395                 W_TYPE(TYPE_INT, p);
396                 w_long(x, p);
397             }
398         }
399     }
400     else if (PyFloat_CheckExact(v)) {
401         if (p->version > 1) {
402             W_TYPE(TYPE_BINARY_FLOAT, p);
403             w_float_bin(PyFloat_AS_DOUBLE(v), p);
404         }
405         else {
406             W_TYPE(TYPE_FLOAT, p);
407             w_float_str(PyFloat_AS_DOUBLE(v), p);
408         }
409     }
410     else if (PyComplex_CheckExact(v)) {
411         if (p->version > 1) {
412             W_TYPE(TYPE_BINARY_COMPLEX, p);
413             w_float_bin(PyComplex_RealAsDouble(v), p);
414             w_float_bin(PyComplex_ImagAsDouble(v), p);
415         }
416         else {
417             W_TYPE(TYPE_COMPLEX, p);
418             w_float_str(PyComplex_RealAsDouble(v), p);
419             w_float_str(PyComplex_ImagAsDouble(v), p);
420         }
421     }
422     else if (PyBytes_CheckExact(v)) {
423         W_TYPE(TYPE_STRING, p);
424         w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
425     }
426     else if (PyUnicode_CheckExact(v)) {
427         if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
428             int is_short = PyUnicode_GET_LENGTH(v) < 256;
429             if (is_short) {
430                 if (PyUnicode_CHECK_INTERNED(v))
431                     W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
432                 else
433                     W_TYPE(TYPE_SHORT_ASCII, p);
434                 w_short_pstring(PyUnicode_1BYTE_DATA(v),
435                                 PyUnicode_GET_LENGTH(v), p);
436             }
437             else {
438                 if (PyUnicode_CHECK_INTERNED(v))
439                     W_TYPE(TYPE_ASCII_INTERNED, p);
440                 else
441                     W_TYPE(TYPE_ASCII, p);
442                 w_pstring(PyUnicode_1BYTE_DATA(v),
443                           PyUnicode_GET_LENGTH(v), p);
444             }
445         }
446         else {
447             PyObject *utf8;
448             utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
449             if (utf8 == NULL) {
450                 p->depth--;
451                 p->error = WFERR_UNMARSHALLABLE;
452                 return;
453             }
454             if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
455                 W_TYPE(TYPE_INTERNED, p);
456             else
457                 W_TYPE(TYPE_UNICODE, p);
458             w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
459             Py_DECREF(utf8);
460         }
461     }
462     else if (PyTuple_CheckExact(v)) {
463         n = PyTuple_GET_SIZE(v);
464         if (p->version >= 4 && n < 256) {
465             W_TYPE(TYPE_SMALL_TUPLE, p);
466             w_byte((unsigned char)n, p);
467         }
468         else {
469             W_TYPE(TYPE_TUPLE, p);
470             W_SIZE(n, p);
471         }
472         for (i = 0; i < n; i++) {
473             w_object(PyTuple_GET_ITEM(v, i), p);
474         }
475     }
476     else if (PyList_CheckExact(v)) {
477         W_TYPE(TYPE_LIST, p);
478         n = PyList_GET_SIZE(v);
479         W_SIZE(n, p);
480         for (i = 0; i < n; i++) {
481             w_object(PyList_GET_ITEM(v, i), p);
482         }
483     }
484     else if (PyDict_CheckExact(v)) {
485         Py_ssize_t pos;
486         PyObject *key, *value;
487         W_TYPE(TYPE_DICT, p);
488         /* This one is NULL object terminated! */
489         pos = 0;
490         while (PyDict_Next(v, &pos, &key, &value)) {
491             w_object(key, p);
492             w_object(value, p);
493         }
494         w_object((PyObject *)NULL, p);
495     }
496     else if (PyAnySet_CheckExact(v)) {
497         PyObject *value;
498         Py_ssize_t pos = 0;
499         Py_hash_t hash;
500 
501         if (PyFrozenSet_CheckExact(v))
502             W_TYPE(TYPE_FROZENSET, p);
503         else
504             W_TYPE(TYPE_SET, p);
505         n = PySet_GET_SIZE(v);
506         W_SIZE(n, p);
507         // bpo-37596: To support reproducible builds, sets and frozensets need
508         // to have their elements serialized in a consistent order (even when
509         // they have been scrambled by hash randomization). To ensure this, we
510         // use an order equivalent to sorted(v, key=marshal.dumps):
511         PyObject *pairs = PyList_New(n);
512         if (pairs == NULL) {
513             p->error = WFERR_NOMEMORY;
514             return;
515         }
516         Py_ssize_t i = 0;
517         while (_PySet_NextEntry(v, &pos, &value, &hash)) {
518             PyObject *dump = PyMarshal_WriteObjectToString(value, p->version);
519             if (dump == NULL) {
520                 p->error = WFERR_UNMARSHALLABLE;
521                 Py_DECREF(pairs);
522                 return;
523             }
524             PyObject *pair = PyTuple_Pack(2, dump, value);
525             Py_DECREF(dump);
526             if (pair == NULL) {
527                 p->error = WFERR_NOMEMORY;
528                 Py_DECREF(pairs);
529                 return;
530             }
531             PyList_SET_ITEM(pairs, i++, pair);
532         }
533         assert(i == n);
534         if (PyList_Sort(pairs)) {
535             p->error = WFERR_NOMEMORY;
536             Py_DECREF(pairs);
537             return;
538         }
539         for (Py_ssize_t i = 0; i < n; i++) {
540             PyObject *pair = PyList_GET_ITEM(pairs, i);
541             value = PyTuple_GET_ITEM(pair, 1);
542             w_object(value, p);
543         }
544         Py_DECREF(pairs);
545     }
546     else if (PyCode_Check(v)) {
547         PyCodeObject *co = (PyCodeObject *)v;
548         W_TYPE(TYPE_CODE, p);
549         w_long(co->co_argcount, p);
550         w_long(co->co_posonlyargcount, p);
551         w_long(co->co_kwonlyargcount, p);
552         w_long(co->co_stacksize, p);
553         w_long(co->co_flags, p);
554         w_object(co->co_code, p);
555         w_object(co->co_consts, p);
556         w_object(co->co_names, p);
557         w_object(co->co_localsplusnames, p);
558         w_object(co->co_localspluskinds, p);
559         w_object(co->co_filename, p);
560         w_object(co->co_name, p);
561         w_object(co->co_qualname, p);
562         w_long(co->co_firstlineno, p);
563         w_object(co->co_linetable, p);
564         w_object(co->co_endlinetable, p);
565         w_object(co->co_columntable, p);
566         w_object(co->co_exceptiontable, p);
567     }
568     else if (PyObject_CheckBuffer(v)) {
569         /* Write unknown bytes-like objects as a bytes object */
570         Py_buffer view;
571         if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
572             w_byte(TYPE_UNKNOWN, p);
573             p->depth--;
574             p->error = WFERR_UNMARSHALLABLE;
575             return;
576         }
577         W_TYPE(TYPE_STRING, p);
578         w_pstring(view.buf, view.len, p);
579         PyBuffer_Release(&view);
580     }
581     else {
582         W_TYPE(TYPE_UNKNOWN, p);
583         p->error = WFERR_UNMARSHALLABLE;
584     }
585 }
586 
587 static void
w_decref_entry(void * key)588 w_decref_entry(void *key)
589 {
590     PyObject *entry_key = (PyObject *)key;
591     Py_XDECREF(entry_key);
592 }
593 
594 static int
w_init_refs(WFILE * wf,int version)595 w_init_refs(WFILE *wf, int version)
596 {
597     if (version >= 3) {
598         wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
599                                                _Py_hashtable_compare_direct,
600                                                w_decref_entry, NULL, NULL);
601         if (wf->hashtable == NULL) {
602             PyErr_NoMemory();
603             return -1;
604         }
605     }
606     return 0;
607 }
608 
609 static void
w_clear_refs(WFILE * wf)610 w_clear_refs(WFILE *wf)
611 {
612     if (wf->hashtable != NULL) {
613         _Py_hashtable_destroy(wf->hashtable);
614     }
615 }
616 
617 /* version currently has no effect for writing ints. */
618 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)619 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
620 {
621     char buf[4];
622     WFILE wf;
623     memset(&wf, 0, sizeof(wf));
624     wf.fp = fp;
625     wf.ptr = wf.buf = buf;
626     wf.end = wf.ptr + sizeof(buf);
627     wf.error = WFERR_OK;
628     wf.version = version;
629     w_long(x, &wf);
630     w_flush(&wf);
631 }
632 
633 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)634 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
635 {
636     char buf[BUFSIZ];
637     WFILE wf;
638     if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
639         return; /* caller must check PyErr_Occurred() */
640     }
641     memset(&wf, 0, sizeof(wf));
642     wf.fp = fp;
643     wf.ptr = wf.buf = buf;
644     wf.end = wf.ptr + sizeof(buf);
645     wf.error = WFERR_OK;
646     wf.version = version;
647     if (w_init_refs(&wf, version)) {
648         return; /* caller must check PyErr_Occurred() */
649     }
650     w_object(x, &wf);
651     w_clear_refs(&wf);
652     w_flush(&wf);
653 }
654 
655 typedef struct {
656     FILE *fp;
657     int depth;
658     PyObject *readable;  /* Stream-like object being read from */
659     const char *ptr;
660     const char *end;
661     char *buf;
662     Py_ssize_t buf_size;
663     PyObject *refs;  /* a list */
664 } RFILE;
665 
666 static const char *
r_string(Py_ssize_t n,RFILE * p)667 r_string(Py_ssize_t n, RFILE *p)
668 {
669     Py_ssize_t read = -1;
670 
671     if (p->ptr != NULL) {
672         /* Fast path for loads() */
673         const char *res = p->ptr;
674         Py_ssize_t left = p->end - p->ptr;
675         if (left < n) {
676             PyErr_SetString(PyExc_EOFError,
677                             "marshal data too short");
678             return NULL;
679         }
680         p->ptr += n;
681         return res;
682     }
683     if (p->buf == NULL) {
684         p->buf = PyMem_Malloc(n);
685         if (p->buf == NULL) {
686             PyErr_NoMemory();
687             return NULL;
688         }
689         p->buf_size = n;
690     }
691     else if (p->buf_size < n) {
692         char *tmp = PyMem_Realloc(p->buf, n);
693         if (tmp == NULL) {
694             PyErr_NoMemory();
695             return NULL;
696         }
697         p->buf = tmp;
698         p->buf_size = n;
699     }
700 
701     if (!p->readable) {
702         assert(p->fp != NULL);
703         read = fread(p->buf, 1, n, p->fp);
704     }
705     else {
706         _Py_IDENTIFIER(readinto);
707         PyObject *res, *mview;
708         Py_buffer buf;
709 
710         if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
711             return NULL;
712         mview = PyMemoryView_FromBuffer(&buf);
713         if (mview == NULL)
714             return NULL;
715 
716         res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
717         if (res != NULL) {
718             read = PyNumber_AsSsize_t(res, PyExc_ValueError);
719             Py_DECREF(res);
720         }
721     }
722     if (read != n) {
723         if (!PyErr_Occurred()) {
724             if (read > n)
725                 PyErr_Format(PyExc_ValueError,
726                              "read() returned too much data: "
727                              "%zd bytes requested, %zd returned",
728                              n, read);
729             else
730                 PyErr_SetString(PyExc_EOFError,
731                                 "EOF read where not expected");
732         }
733         return NULL;
734     }
735     return p->buf;
736 }
737 
738 static int
r_byte(RFILE * p)739 r_byte(RFILE *p)
740 {
741     int c = EOF;
742 
743     if (p->ptr != NULL) {
744         if (p->ptr < p->end)
745             c = (unsigned char) *p->ptr++;
746         return c;
747     }
748     if (!p->readable) {
749         assert(p->fp);
750         c = getc(p->fp);
751     }
752     else {
753         const char *ptr = r_string(1, p);
754         if (ptr != NULL)
755             c = *(const unsigned char *) ptr;
756     }
757     return c;
758 }
759 
760 static int
r_short(RFILE * p)761 r_short(RFILE *p)
762 {
763     short x = -1;
764     const unsigned char *buffer;
765 
766     buffer = (const unsigned char *) r_string(2, p);
767     if (buffer != NULL) {
768         x = buffer[0];
769         x |= buffer[1] << 8;
770         /* Sign-extension, in case short greater than 16 bits */
771         x |= -(x & 0x8000);
772     }
773     return x;
774 }
775 
776 static long
r_long(RFILE * p)777 r_long(RFILE *p)
778 {
779     long x = -1;
780     const unsigned char *buffer;
781 
782     buffer = (const unsigned char *) r_string(4, p);
783     if (buffer != NULL) {
784         x = buffer[0];
785         x |= (long)buffer[1] << 8;
786         x |= (long)buffer[2] << 16;
787         x |= (long)buffer[3] << 24;
788 #if SIZEOF_LONG > 4
789         /* Sign extension for 64-bit machines */
790         x |= -(x & 0x80000000L);
791 #endif
792     }
793     return x;
794 }
795 
796 /* r_long64 deals with the TYPE_INT64 code. */
797 static PyObject *
r_long64(RFILE * p)798 r_long64(RFILE *p)
799 {
800     const unsigned char *buffer = (const unsigned char *) r_string(8, p);
801     if (buffer == NULL) {
802         return NULL;
803     }
804     return _PyLong_FromByteArray(buffer, 8,
805                                  1 /* little endian */,
806                                  1 /* signed */);
807 }
808 
809 static PyObject *
r_PyLong(RFILE * p)810 r_PyLong(RFILE *p)
811 {
812     PyLongObject *ob;
813     long n, size, i;
814     int j, md, shorts_in_top_digit;
815     digit d;
816 
817     n = r_long(p);
818     if (PyErr_Occurred())
819         return NULL;
820     if (n == 0)
821         return (PyObject *)_PyLong_New(0);
822     if (n < -SIZE32_MAX || n > SIZE32_MAX) {
823         PyErr_SetString(PyExc_ValueError,
824                        "bad marshal data (long size out of range)");
825         return NULL;
826     }
827 
828     size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
829     shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
830     ob = _PyLong_New(size);
831     if (ob == NULL)
832         return NULL;
833 
834     Py_SET_SIZE(ob, n > 0 ? size : -size);
835 
836     for (i = 0; i < size-1; i++) {
837         d = 0;
838         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
839             md = r_short(p);
840             if (PyErr_Occurred()) {
841                 Py_DECREF(ob);
842                 return NULL;
843             }
844             if (md < 0 || md > PyLong_MARSHAL_BASE)
845                 goto bad_digit;
846             d += (digit)md << j*PyLong_MARSHAL_SHIFT;
847         }
848         ob->ob_digit[i] = d;
849     }
850 
851     d = 0;
852     for (j=0; j < shorts_in_top_digit; j++) {
853         md = r_short(p);
854         if (PyErr_Occurred()) {
855             Py_DECREF(ob);
856             return NULL;
857         }
858         if (md < 0 || md > PyLong_MARSHAL_BASE)
859             goto bad_digit;
860         /* topmost marshal digit should be nonzero */
861         if (md == 0 && j == shorts_in_top_digit - 1) {
862             Py_DECREF(ob);
863             PyErr_SetString(PyExc_ValueError,
864                 "bad marshal data (unnormalized long data)");
865             return NULL;
866         }
867         d += (digit)md << j*PyLong_MARSHAL_SHIFT;
868     }
869     if (PyErr_Occurred()) {
870         Py_DECREF(ob);
871         return NULL;
872     }
873     /* top digit should be nonzero, else the resulting PyLong won't be
874        normalized */
875     ob->ob_digit[size-1] = d;
876     return (PyObject *)ob;
877   bad_digit:
878     Py_DECREF(ob);
879     PyErr_SetString(PyExc_ValueError,
880                     "bad marshal data (digit out of range in long)");
881     return NULL;
882 }
883 
884 static double
r_float_bin(RFILE * p)885 r_float_bin(RFILE *p)
886 {
887     const unsigned char *buf = (const unsigned char *) r_string(8, p);
888     if (buf == NULL)
889         return -1;
890     return _PyFloat_Unpack8(buf, 1);
891 }
892 
893 /* Issue #33720: Disable inlining for reducing the C stack consumption
894    on PGO builds. */
895 Py_NO_INLINE static double
r_float_str(RFILE * p)896 r_float_str(RFILE *p)
897 {
898     int n;
899     char buf[256];
900     const char *ptr;
901     n = r_byte(p);
902     if (n == EOF) {
903         PyErr_SetString(PyExc_EOFError,
904             "EOF read where object expected");
905         return -1;
906     }
907     ptr = r_string(n, p);
908     if (ptr == NULL) {
909         return -1;
910     }
911     memcpy(buf, ptr, n);
912     buf[n] = '\0';
913     return PyOS_string_to_double(buf, NULL, NULL);
914 }
915 
916 /* allocate the reflist index for a new object. Return -1 on failure */
917 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)918 r_ref_reserve(int flag, RFILE *p)
919 {
920     if (flag) { /* currently only FLAG_REF is defined */
921         Py_ssize_t idx = PyList_GET_SIZE(p->refs);
922         if (idx >= 0x7ffffffe) {
923             PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
924             return -1;
925         }
926         if (PyList_Append(p->refs, Py_None) < 0)
927             return -1;
928         return idx;
929     } else
930         return 0;
931 }
932 
933 /* insert the new object 'o' to the reflist at previously
934  * allocated index 'idx'.
935  * 'o' can be NULL, in which case nothing is done.
936  * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
937  * if 'o' was non-NULL, and the function fails, 'o' is released and
938  * NULL returned. This simplifies error checking at the call site since
939  * a single test for NULL for the function result is enough.
940  */
941 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)942 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
943 {
944     if (o != NULL && flag) { /* currently only FLAG_REF is defined */
945         PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
946         Py_INCREF(o);
947         PyList_SET_ITEM(p->refs, idx, o);
948         Py_DECREF(tmp);
949     }
950     return o;
951 }
952 
953 /* combination of both above, used when an object can be
954  * created whenever it is seen in the file, as opposed to
955  * after having loaded its sub-objects.
956  */
957 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)958 r_ref(PyObject *o, int flag, RFILE *p)
959 {
960     assert(flag & FLAG_REF);
961     if (o == NULL)
962         return NULL;
963     if (PyList_Append(p->refs, o) < 0) {
964         Py_DECREF(o); /* release the new object */
965         return NULL;
966     }
967     return o;
968 }
969 
970 static PyObject *
r_object(RFILE * p)971 r_object(RFILE *p)
972 {
973     /* NULL is a valid return value, it does not necessarily means that
974        an exception is set. */
975     PyObject *v, *v2;
976     Py_ssize_t idx = 0;
977     long i, n;
978     int type, code = r_byte(p);
979     int flag, is_interned = 0;
980     PyObject *retval = NULL;
981 
982     if (code == EOF) {
983         PyErr_SetString(PyExc_EOFError,
984                         "EOF read where object expected");
985         return NULL;
986     }
987 
988     p->depth++;
989 
990     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
991         p->depth--;
992         PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
993         return NULL;
994     }
995 
996     flag = code & FLAG_REF;
997     type = code & ~FLAG_REF;
998 
999 #define R_REF(O) do{\
1000     if (flag) \
1001         O = r_ref(O, flag, p);\
1002 } while (0)
1003 
1004     switch (type) {
1005 
1006     case TYPE_NULL:
1007         break;
1008 
1009     case TYPE_NONE:
1010         Py_INCREF(Py_None);
1011         retval = Py_None;
1012         break;
1013 
1014     case TYPE_STOPITER:
1015         Py_INCREF(PyExc_StopIteration);
1016         retval = PyExc_StopIteration;
1017         break;
1018 
1019     case TYPE_ELLIPSIS:
1020         Py_INCREF(Py_Ellipsis);
1021         retval = Py_Ellipsis;
1022         break;
1023 
1024     case TYPE_FALSE:
1025         Py_INCREF(Py_False);
1026         retval = Py_False;
1027         break;
1028 
1029     case TYPE_TRUE:
1030         Py_INCREF(Py_True);
1031         retval = Py_True;
1032         break;
1033 
1034     case TYPE_INT:
1035         n = r_long(p);
1036         retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
1037         R_REF(retval);
1038         break;
1039 
1040     case TYPE_INT64:
1041         retval = r_long64(p);
1042         R_REF(retval);
1043         break;
1044 
1045     case TYPE_LONG:
1046         retval = r_PyLong(p);
1047         R_REF(retval);
1048         break;
1049 
1050     case TYPE_FLOAT:
1051         {
1052             double x = r_float_str(p);
1053             if (x == -1.0 && PyErr_Occurred())
1054                 break;
1055             retval = PyFloat_FromDouble(x);
1056             R_REF(retval);
1057             break;
1058         }
1059 
1060     case TYPE_BINARY_FLOAT:
1061         {
1062             double x = r_float_bin(p);
1063             if (x == -1.0 && PyErr_Occurred())
1064                 break;
1065             retval = PyFloat_FromDouble(x);
1066             R_REF(retval);
1067             break;
1068         }
1069 
1070     case TYPE_COMPLEX:
1071         {
1072             Py_complex c;
1073             c.real = r_float_str(p);
1074             if (c.real == -1.0 && PyErr_Occurred())
1075                 break;
1076             c.imag = r_float_str(p);
1077             if (c.imag == -1.0 && PyErr_Occurred())
1078                 break;
1079             retval = PyComplex_FromCComplex(c);
1080             R_REF(retval);
1081             break;
1082         }
1083 
1084     case TYPE_BINARY_COMPLEX:
1085         {
1086             Py_complex c;
1087             c.real = r_float_bin(p);
1088             if (c.real == -1.0 && PyErr_Occurred())
1089                 break;
1090             c.imag = r_float_bin(p);
1091             if (c.imag == -1.0 && PyErr_Occurred())
1092                 break;
1093             retval = PyComplex_FromCComplex(c);
1094             R_REF(retval);
1095             break;
1096         }
1097 
1098     case TYPE_STRING:
1099         {
1100             const char *ptr;
1101             n = r_long(p);
1102             if (PyErr_Occurred())
1103                 break;
1104             if (n < 0 || n > SIZE32_MAX) {
1105                 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1106                 break;
1107             }
1108             v = PyBytes_FromStringAndSize((char *)NULL, n);
1109             if (v == NULL)
1110                 break;
1111             ptr = r_string(n, p);
1112             if (ptr == NULL) {
1113                 Py_DECREF(v);
1114                 break;
1115             }
1116             memcpy(PyBytes_AS_STRING(v), ptr, n);
1117             retval = v;
1118             R_REF(retval);
1119             break;
1120         }
1121 
1122     case TYPE_ASCII_INTERNED:
1123         is_interned = 1;
1124         /* fall through */
1125     case TYPE_ASCII:
1126         n = r_long(p);
1127         if (PyErr_Occurred())
1128             break;
1129         if (n < 0 || n > SIZE32_MAX) {
1130             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1131             break;
1132         }
1133         goto _read_ascii;
1134 
1135     case TYPE_SHORT_ASCII_INTERNED:
1136         is_interned = 1;
1137         /* fall through */
1138     case TYPE_SHORT_ASCII:
1139         n = r_byte(p);
1140         if (n == EOF) {
1141             PyErr_SetString(PyExc_EOFError,
1142                 "EOF read where object expected");
1143             break;
1144         }
1145     _read_ascii:
1146         {
1147             const char *ptr;
1148             ptr = r_string(n, p);
1149             if (ptr == NULL)
1150                 break;
1151             v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1152             if (v == NULL)
1153                 break;
1154             if (is_interned)
1155                 PyUnicode_InternInPlace(&v);
1156             retval = v;
1157             R_REF(retval);
1158             break;
1159         }
1160 
1161     case TYPE_INTERNED:
1162         is_interned = 1;
1163         /* fall through */
1164     case TYPE_UNICODE:
1165         {
1166         const char *buffer;
1167 
1168         n = r_long(p);
1169         if (PyErr_Occurred())
1170             break;
1171         if (n < 0 || n > SIZE32_MAX) {
1172             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1173             break;
1174         }
1175         if (n != 0) {
1176             buffer = r_string(n, p);
1177             if (buffer == NULL)
1178                 break;
1179             v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1180         }
1181         else {
1182             v = PyUnicode_New(0, 0);
1183         }
1184         if (v == NULL)
1185             break;
1186         if (is_interned)
1187             PyUnicode_InternInPlace(&v);
1188         retval = v;
1189         R_REF(retval);
1190         break;
1191         }
1192 
1193     case TYPE_SMALL_TUPLE:
1194         n = (unsigned char) r_byte(p);
1195         if (PyErr_Occurred())
1196             break;
1197         goto _read_tuple;
1198     case TYPE_TUPLE:
1199         n = r_long(p);
1200         if (PyErr_Occurred())
1201             break;
1202         if (n < 0 || n > SIZE32_MAX) {
1203             PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1204             break;
1205         }
1206     _read_tuple:
1207         v = PyTuple_New(n);
1208         R_REF(v);
1209         if (v == NULL)
1210             break;
1211 
1212         for (i = 0; i < n; i++) {
1213             v2 = r_object(p);
1214             if ( v2 == NULL ) {
1215                 if (!PyErr_Occurred())
1216                     PyErr_SetString(PyExc_TypeError,
1217                         "NULL object in marshal data for tuple");
1218                 Py_DECREF(v);
1219                 v = NULL;
1220                 break;
1221             }
1222             PyTuple_SET_ITEM(v, i, v2);
1223         }
1224         retval = v;
1225         break;
1226 
1227     case TYPE_LIST:
1228         n = r_long(p);
1229         if (PyErr_Occurred())
1230             break;
1231         if (n < 0 || n > SIZE32_MAX) {
1232             PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1233             break;
1234         }
1235         v = PyList_New(n);
1236         R_REF(v);
1237         if (v == NULL)
1238             break;
1239         for (i = 0; i < n; i++) {
1240             v2 = r_object(p);
1241             if ( v2 == NULL ) {
1242                 if (!PyErr_Occurred())
1243                     PyErr_SetString(PyExc_TypeError,
1244                         "NULL object in marshal data for list");
1245                 Py_DECREF(v);
1246                 v = NULL;
1247                 break;
1248             }
1249             PyList_SET_ITEM(v, i, v2);
1250         }
1251         retval = v;
1252         break;
1253 
1254     case TYPE_DICT:
1255         v = PyDict_New();
1256         R_REF(v);
1257         if (v == NULL)
1258             break;
1259         for (;;) {
1260             PyObject *key, *val;
1261             key = r_object(p);
1262             if (key == NULL)
1263                 break;
1264             val = r_object(p);
1265             if (val == NULL) {
1266                 Py_DECREF(key);
1267                 break;
1268             }
1269             if (PyDict_SetItem(v, key, val) < 0) {
1270                 Py_DECREF(key);
1271                 Py_DECREF(val);
1272                 break;
1273             }
1274             Py_DECREF(key);
1275             Py_DECREF(val);
1276         }
1277         if (PyErr_Occurred()) {
1278             Py_DECREF(v);
1279             v = NULL;
1280         }
1281         retval = v;
1282         break;
1283 
1284     case TYPE_SET:
1285     case TYPE_FROZENSET:
1286         n = r_long(p);
1287         if (PyErr_Occurred())
1288             break;
1289         if (n < 0 || n > SIZE32_MAX) {
1290             PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1291             break;
1292         }
1293 
1294         if (n == 0 && type == TYPE_FROZENSET) {
1295             /* call frozenset() to get the empty frozenset singleton */
1296             v = _PyObject_CallNoArgs((PyObject*)&PyFrozenSet_Type);
1297             if (v == NULL)
1298                 break;
1299             R_REF(v);
1300             retval = v;
1301         }
1302         else {
1303             v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1304             if (type == TYPE_SET) {
1305                 R_REF(v);
1306             } else {
1307                 /* must use delayed registration of frozensets because they must
1308                  * be init with a refcount of 1
1309                  */
1310                 idx = r_ref_reserve(flag, p);
1311                 if (idx < 0)
1312                     Py_CLEAR(v); /* signal error */
1313             }
1314             if (v == NULL)
1315                 break;
1316 
1317             for (i = 0; i < n; i++) {
1318                 v2 = r_object(p);
1319                 if ( v2 == NULL ) {
1320                     if (!PyErr_Occurred())
1321                         PyErr_SetString(PyExc_TypeError,
1322                             "NULL object in marshal data for set");
1323                     Py_DECREF(v);
1324                     v = NULL;
1325                     break;
1326                 }
1327                 if (PySet_Add(v, v2) == -1) {
1328                     Py_DECREF(v);
1329                     Py_DECREF(v2);
1330                     v = NULL;
1331                     break;
1332                 }
1333                 Py_DECREF(v2);
1334             }
1335             if (type != TYPE_SET)
1336                 v = r_ref_insert(v, idx, flag, p);
1337             retval = v;
1338         }
1339         break;
1340 
1341     case TYPE_CODE:
1342         {
1343             int argcount;
1344             int posonlyargcount;
1345             int kwonlyargcount;
1346             int stacksize;
1347             int flags;
1348             PyObject *code = NULL;
1349             PyObject *consts = NULL;
1350             PyObject *names = NULL;
1351             PyObject *localsplusnames = NULL;
1352             PyObject *localspluskinds = NULL;
1353             PyObject *filename = NULL;
1354             PyObject *name = NULL;
1355             PyObject *qualname = NULL;
1356             int firstlineno;
1357             PyObject *linetable = NULL;
1358             PyObject* endlinetable = NULL;
1359             PyObject* columntable = NULL;
1360             PyObject *exceptiontable = NULL;
1361 
1362             idx = r_ref_reserve(flag, p);
1363             if (idx < 0)
1364                 break;
1365 
1366             v = NULL;
1367 
1368             /* XXX ignore long->int overflows for now */
1369             argcount = (int)r_long(p);
1370             if (PyErr_Occurred())
1371                 goto code_error;
1372             posonlyargcount = (int)r_long(p);
1373             if (PyErr_Occurred()) {
1374                 goto code_error;
1375             }
1376             kwonlyargcount = (int)r_long(p);
1377             if (PyErr_Occurred())
1378                 goto code_error;
1379             stacksize = (int)r_long(p);
1380             if (PyErr_Occurred())
1381                 goto code_error;
1382             flags = (int)r_long(p);
1383             if (PyErr_Occurred())
1384                 goto code_error;
1385             code = r_object(p);
1386             if (code == NULL)
1387                 goto code_error;
1388             consts = r_object(p);
1389             if (consts == NULL)
1390                 goto code_error;
1391             names = r_object(p);
1392             if (names == NULL)
1393                 goto code_error;
1394             localsplusnames = r_object(p);
1395             if (localsplusnames == NULL)
1396                 goto code_error;
1397             localspluskinds = r_object(p);
1398             if (localspluskinds == NULL)
1399                 goto code_error;
1400             filename = r_object(p);
1401             if (filename == NULL)
1402                 goto code_error;
1403             name = r_object(p);
1404             if (name == NULL)
1405                 goto code_error;
1406             qualname = r_object(p);
1407             if (qualname == NULL)
1408                 goto code_error;
1409             firstlineno = (int)r_long(p);
1410             if (firstlineno == -1 && PyErr_Occurred())
1411                 break;
1412             linetable = r_object(p);
1413             if (linetable == NULL)
1414                 goto code_error;
1415             endlinetable = r_object(p);
1416             if (endlinetable == NULL)
1417                 goto code_error;
1418             columntable = r_object(p);
1419             if (columntable == NULL)
1420                 goto code_error;
1421             exceptiontable = r_object(p);
1422             if (exceptiontable == NULL)
1423                 goto code_error;
1424 
1425             struct _PyCodeConstructor con = {
1426                 .filename = filename,
1427                 .name = name,
1428                 .qualname = qualname,
1429                 .flags = flags,
1430 
1431                 .code = code,
1432                 .firstlineno = firstlineno,
1433                 .linetable = linetable,
1434                 .endlinetable = endlinetable,
1435                 .columntable = columntable,
1436 
1437                 .consts = consts,
1438                 .names = names,
1439 
1440                 .localsplusnames = localsplusnames,
1441                 .localspluskinds = localspluskinds,
1442 
1443                 .argcount = argcount,
1444                 .posonlyargcount = posonlyargcount,
1445                 .kwonlyargcount = kwonlyargcount,
1446 
1447                 .stacksize = stacksize,
1448 
1449                 .exceptiontable = exceptiontable,
1450             };
1451 
1452             if (_PyCode_Validate(&con) < 0) {
1453                 goto code_error;
1454             }
1455 
1456             v = (PyObject *)_PyCode_New(&con);
1457             if (v == NULL) {
1458                 goto code_error;
1459             }
1460 
1461             v = r_ref_insert(v, idx, flag, p);
1462 
1463           code_error:
1464             Py_XDECREF(code);
1465             Py_XDECREF(consts);
1466             Py_XDECREF(names);
1467             Py_XDECREF(localsplusnames);
1468             Py_XDECREF(localspluskinds);
1469             Py_XDECREF(filename);
1470             Py_XDECREF(name);
1471             Py_XDECREF(qualname);
1472             Py_XDECREF(linetable);
1473             Py_XDECREF(endlinetable);
1474             Py_XDECREF(columntable);
1475             Py_XDECREF(exceptiontable);
1476         }
1477         retval = v;
1478         break;
1479 
1480     case TYPE_REF:
1481         n = r_long(p);
1482         if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1483             if (n == -1 && PyErr_Occurred())
1484                 break;
1485             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1486             break;
1487         }
1488         v = PyList_GET_ITEM(p->refs, n);
1489         if (v == Py_None) {
1490             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1491             break;
1492         }
1493         Py_INCREF(v);
1494         retval = v;
1495         break;
1496 
1497     default:
1498         /* Bogus data got written, which isn't ideal.
1499            This will let you keep working and recover. */
1500         PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1501         break;
1502 
1503     }
1504     p->depth--;
1505     return retval;
1506 }
1507 
1508 static PyObject *
read_object(RFILE * p)1509 read_object(RFILE *p)
1510 {
1511     PyObject *v;
1512     if (PyErr_Occurred()) {
1513         fprintf(stderr, "XXX readobject called with exception set\n");
1514         return NULL;
1515     }
1516     if (p->ptr && p->end) {
1517         if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1518             return NULL;
1519         }
1520     } else if (p->fp || p->readable) {
1521         if (PySys_Audit("marshal.load", NULL) < 0) {
1522             return NULL;
1523         }
1524     }
1525     v = r_object(p);
1526     if (v == NULL && !PyErr_Occurred())
1527         PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1528     return v;
1529 }
1530 
1531 int
PyMarshal_ReadShortFromFile(FILE * fp)1532 PyMarshal_ReadShortFromFile(FILE *fp)
1533 {
1534     RFILE rf;
1535     int res;
1536     assert(fp);
1537     rf.readable = NULL;
1538     rf.fp = fp;
1539     rf.end = rf.ptr = NULL;
1540     rf.buf = NULL;
1541     res = r_short(&rf);
1542     if (rf.buf != NULL)
1543         PyMem_Free(rf.buf);
1544     return res;
1545 }
1546 
1547 long
PyMarshal_ReadLongFromFile(FILE * fp)1548 PyMarshal_ReadLongFromFile(FILE *fp)
1549 {
1550     RFILE rf;
1551     long res;
1552     rf.fp = fp;
1553     rf.readable = NULL;
1554     rf.ptr = rf.end = NULL;
1555     rf.buf = NULL;
1556     res = r_long(&rf);
1557     if (rf.buf != NULL)
1558         PyMem_Free(rf.buf);
1559     return res;
1560 }
1561 
1562 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1563 static off_t
getfilesize(FILE * fp)1564 getfilesize(FILE *fp)
1565 {
1566     struct _Py_stat_struct st;
1567     if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1568         return -1;
1569 #if SIZEOF_OFF_T == 4
1570     else if (st.st_size >= INT_MAX)
1571         return (off_t)INT_MAX;
1572 #endif
1573     else
1574         return (off_t)st.st_size;
1575 }
1576 
1577 /* If we can get the size of the file up-front, and it's reasonably small,
1578  * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1579  * than reading a byte at a time from file; speeds .pyc imports.
1580  * CAUTION:  since this may read the entire remainder of the file, don't
1581  * call it unless you know you're done with the file.
1582  */
1583 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1584 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1585 {
1586 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1587 #define REASONABLE_FILE_LIMIT (1L << 18)
1588     off_t filesize;
1589     filesize = getfilesize(fp);
1590     if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1591         char* pBuf = (char *)PyMem_Malloc(filesize);
1592         if (pBuf != NULL) {
1593             size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1594             PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1595             PyMem_Free(pBuf);
1596             return v;
1597         }
1598 
1599     }
1600     /* We don't have fstat, or we do but the file is larger than
1601      * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1602      */
1603     return PyMarshal_ReadObjectFromFile(fp);
1604 
1605 #undef REASONABLE_FILE_LIMIT
1606 }
1607 
1608 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1609 PyMarshal_ReadObjectFromFile(FILE *fp)
1610 {
1611     RFILE rf;
1612     PyObject *result;
1613     rf.fp = fp;
1614     rf.readable = NULL;
1615     rf.depth = 0;
1616     rf.ptr = rf.end = NULL;
1617     rf.buf = NULL;
1618     rf.refs = PyList_New(0);
1619     if (rf.refs == NULL)
1620         return NULL;
1621     result = read_object(&rf);
1622     Py_DECREF(rf.refs);
1623     if (rf.buf != NULL)
1624         PyMem_Free(rf.buf);
1625     return result;
1626 }
1627 
1628 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1629 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1630 {
1631     RFILE rf;
1632     PyObject *result;
1633     rf.fp = NULL;
1634     rf.readable = NULL;
1635     rf.ptr = str;
1636     rf.end = str + len;
1637     rf.buf = NULL;
1638     rf.depth = 0;
1639     rf.refs = PyList_New(0);
1640     if (rf.refs == NULL)
1641         return NULL;
1642     result = read_object(&rf);
1643     Py_DECREF(rf.refs);
1644     if (rf.buf != NULL)
1645         PyMem_Free(rf.buf);
1646     return result;
1647 }
1648 
1649 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1650 PyMarshal_WriteObjectToString(PyObject *x, int version)
1651 {
1652     WFILE wf;
1653 
1654     if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1655         return NULL;
1656     }
1657     memset(&wf, 0, sizeof(wf));
1658     wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1659     if (wf.str == NULL)
1660         return NULL;
1661     wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1662     wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1663     wf.error = WFERR_OK;
1664     wf.version = version;
1665     if (w_init_refs(&wf, version)) {
1666         Py_DECREF(wf.str);
1667         return NULL;
1668     }
1669     w_object(x, &wf);
1670     w_clear_refs(&wf);
1671     if (wf.str != NULL) {
1672         const char *base = PyBytes_AS_STRING(wf.str);
1673         if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1674             return NULL;
1675     }
1676     if (wf.error != WFERR_OK) {
1677         Py_XDECREF(wf.str);
1678         if (wf.error == WFERR_NOMEMORY)
1679             PyErr_NoMemory();
1680         else
1681             PyErr_SetString(PyExc_ValueError,
1682               (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1683                :"object too deeply nested to marshal");
1684         return NULL;
1685     }
1686     return wf.str;
1687 }
1688 
1689 /* And an interface for Python programs... */
1690 /*[clinic input]
1691 marshal.dump
1692 
1693     value: object
1694         Must be a supported type.
1695     file: object
1696         Must be a writeable binary file.
1697     version: int(c_default="Py_MARSHAL_VERSION") = version
1698         Indicates the data format that dump should use.
1699     /
1700 
1701 Write the value on the open file.
1702 
1703 If the value has (or contains an object that has) an unsupported type, a
1704 ValueError exception is raised - but garbage data will also be written
1705 to the file. The object will not be properly read back by load().
1706 [clinic start generated code]*/
1707 
1708 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1709 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1710                   int version)
1711 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1712 {
1713     /* XXX Quick hack -- need to do this differently */
1714     PyObject *s;
1715     PyObject *res;
1716     _Py_IDENTIFIER(write);
1717 
1718     s = PyMarshal_WriteObjectToString(value, version);
1719     if (s == NULL)
1720         return NULL;
1721     res = _PyObject_CallMethodIdOneArg(file, &PyId_write, s);
1722     Py_DECREF(s);
1723     return res;
1724 }
1725 
1726 /*[clinic input]
1727 marshal.load
1728 
1729     file: object
1730         Must be readable binary file.
1731     /
1732 
1733 Read one value from the open file and return it.
1734 
1735 If no valid value is read (e.g. because the data has a different Python
1736 version's incompatible marshal format), raise EOFError, ValueError or
1737 TypeError.
1738 
1739 Note: If an object containing an unsupported type was marshalled with
1740 dump(), load() will substitute None for the unmarshallable type.
1741 [clinic start generated code]*/
1742 
1743 static PyObject *
marshal_load(PyObject * module,PyObject * file)1744 marshal_load(PyObject *module, PyObject *file)
1745 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1746 {
1747     PyObject *data, *result;
1748     _Py_IDENTIFIER(read);
1749     RFILE rf;
1750 
1751     /*
1752      * Make a call to the read method, but read zero bytes.
1753      * This is to ensure that the object passed in at least
1754      * has a read method which returns bytes.
1755      * This can be removed if we guarantee good error handling
1756      * for r_string()
1757      */
1758     data = _PyObject_CallMethodId(file, &PyId_read, "i", 0);
1759     if (data == NULL)
1760         return NULL;
1761     if (!PyBytes_Check(data)) {
1762         PyErr_Format(PyExc_TypeError,
1763                      "file.read() returned not bytes but %.100s",
1764                      Py_TYPE(data)->tp_name);
1765         result = NULL;
1766     }
1767     else {
1768         rf.depth = 0;
1769         rf.fp = NULL;
1770         rf.readable = file;
1771         rf.ptr = rf.end = NULL;
1772         rf.buf = NULL;
1773         if ((rf.refs = PyList_New(0)) != NULL) {
1774             result = read_object(&rf);
1775             Py_DECREF(rf.refs);
1776             if (rf.buf != NULL)
1777                 PyMem_Free(rf.buf);
1778         } else
1779             result = NULL;
1780     }
1781     Py_DECREF(data);
1782     return result;
1783 }
1784 
1785 /*[clinic input]
1786 marshal.dumps
1787 
1788     value: object
1789         Must be a supported type.
1790     version: int(c_default="Py_MARSHAL_VERSION") = version
1791         Indicates the data format that dumps should use.
1792     /
1793 
1794 Return the bytes object that would be written to a file by dump(value, file).
1795 
1796 Raise a ValueError exception if value has (or contains an object that has) an
1797 unsupported type.
1798 [clinic start generated code]*/
1799 
1800 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1801 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1802 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1803 {
1804     return PyMarshal_WriteObjectToString(value, version);
1805 }
1806 
1807 /*[clinic input]
1808 marshal.loads
1809 
1810     bytes: Py_buffer
1811     /
1812 
1813 Convert the bytes-like object to a value.
1814 
1815 If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
1816 bytes in the input are ignored.
1817 [clinic start generated code]*/
1818 
1819 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1820 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1821 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1822 {
1823     RFILE rf;
1824     char *s = bytes->buf;
1825     Py_ssize_t n = bytes->len;
1826     PyObject* result;
1827     rf.fp = NULL;
1828     rf.readable = NULL;
1829     rf.ptr = s;
1830     rf.end = s + n;
1831     rf.depth = 0;
1832     if ((rf.refs = PyList_New(0)) == NULL)
1833         return NULL;
1834     result = read_object(&rf);
1835     Py_DECREF(rf.refs);
1836     return result;
1837 }
1838 
1839 static PyMethodDef marshal_methods[] = {
1840     MARSHAL_DUMP_METHODDEF
1841     MARSHAL_LOAD_METHODDEF
1842     MARSHAL_DUMPS_METHODDEF
1843     MARSHAL_LOADS_METHODDEF
1844     {NULL,              NULL}           /* sentinel */
1845 };
1846 
1847 
1848 PyDoc_STRVAR(module_doc,
1849 "This module contains functions that can read and write Python values in\n\
1850 a binary format. The format is specific to Python, but independent of\n\
1851 machine architecture issues.\n\
1852 \n\
1853 Not all Python object types are supported; in general, only objects\n\
1854 whose value is independent from a particular invocation of Python can be\n\
1855 written and read by this module. The following types are supported:\n\
1856 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1857 tuples, lists, sets, dictionaries, and code objects, where it\n\
1858 should be understood that tuples, lists and dictionaries are only\n\
1859 supported as long as the values contained therein are themselves\n\
1860 supported; and recursive lists and dictionaries should not be written\n\
1861 (they will cause infinite loops).\n\
1862 \n\
1863 Variables:\n\
1864 \n\
1865 version -- indicates the format that the module uses. Version 0 is the\n\
1866     historical format, version 1 shares interned strings and version 2\n\
1867     uses a binary format for floating point numbers.\n\
1868     Version 3 shares common object references (New in version 3.4).\n\
1869 \n\
1870 Functions:\n\
1871 \n\
1872 dump() -- write value to a file\n\
1873 load() -- read value from a file\n\
1874 dumps() -- marshal value as a bytes object\n\
1875 loads() -- read value from a bytes-like object");
1876 
1877 
1878 static int
marshal_module_exec(PyObject * mod)1879 marshal_module_exec(PyObject *mod)
1880 {
1881     if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1882         return -1;
1883     }
1884     return 0;
1885 }
1886 
1887 static PyModuleDef_Slot marshalmodule_slots[] = {
1888     {Py_mod_exec, marshal_module_exec},
1889     {0, NULL}
1890 };
1891 
1892 static struct PyModuleDef marshalmodule = {
1893     PyModuleDef_HEAD_INIT,
1894     .m_name = "marshal",
1895     .m_doc = module_doc,
1896     .m_methods = marshal_methods,
1897     .m_slots = marshalmodule_slots,
1898 };
1899 
1900 PyMODINIT_FUNC
PyMarshal_Init(void)1901 PyMarshal_Init(void)
1902 {
1903     return PyModuleDef_Init(&marshalmodule);
1904 }
1905