1 
2 /* Write Python objects to files and read them back.
3    This is primarily intended for writing and reading compiled Python code,
4    even though dicts, lists, sets and frozensets, not commonly seen in
5    code objects, are supported.
6    Version 3 of this protocol properly supports circular links
7    and sharing. */
8 
9 #define PY_SSIZE_T_CLEAN
10 
11 #include "Python.h"
12 #include "longintrepr.h"
13 #include "code.h"
14 #include "marshal.h"
15 #include "../Modules/hashtable.h"
16 
17 /*[clinic input]
18 module marshal
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
21 
22 #include "clinic/marshal.c.h"
23 
24 /* High water mark to determine when the marshalled object is dangerously deep
25  * and risks coring the interpreter.  When the object stack gets this deep,
26  * raise an exception instead of continuing.
27  * On Windows debug builds, reduce this value.
28  *
29  * BUG: https://bugs.python.org/issue33720
30  * On Windows PGO builds, the r_object function overallocates its stack and
31  * can cause a stack overflow. We reduce the maximum depth for all Windows
32  * releases to protect against this.
33  * #if defined(MS_WINDOWS) && defined(_DEBUG)
34  */
35 #if defined(MS_WINDOWS)
36 #define MAX_MARSHAL_STACK_DEPTH 1000
37 #else
38 #define MAX_MARSHAL_STACK_DEPTH 2000
39 #endif
40 
41 #define TYPE_NULL               '0'
42 #define TYPE_NONE               'N'
43 #define TYPE_FALSE              'F'
44 #define TYPE_TRUE               'T'
45 #define TYPE_STOPITER           'S'
46 #define TYPE_ELLIPSIS           '.'
47 #define TYPE_INT                'i'
48 /* TYPE_INT64 is not generated anymore.
49    Supported for backward compatibility only. */
50 #define TYPE_INT64              'I'
51 #define TYPE_FLOAT              'f'
52 #define TYPE_BINARY_FLOAT       'g'
53 #define TYPE_COMPLEX            'x'
54 #define TYPE_BINARY_COMPLEX     'y'
55 #define TYPE_LONG               'l'
56 #define TYPE_STRING             's'
57 #define TYPE_INTERNED           't'
58 #define TYPE_REF                'r'
59 #define TYPE_TUPLE              '('
60 #define TYPE_LIST               '['
61 #define TYPE_DICT               '{'
62 #define TYPE_CODE               'c'
63 #define TYPE_UNICODE            'u'
64 #define TYPE_UNKNOWN            '?'
65 #define TYPE_SET                '<'
66 #define TYPE_FROZENSET          '>'
67 #define FLAG_REF                '\x80' /* with a type, add obj to index */
68 
69 #define TYPE_ASCII              'a'
70 #define TYPE_ASCII_INTERNED     'A'
71 #define TYPE_SMALL_TUPLE        ')'
72 #define TYPE_SHORT_ASCII        'z'
73 #define TYPE_SHORT_ASCII_INTERNED 'Z'
74 
75 #define WFERR_OK 0
76 #define WFERR_UNMARSHALLABLE 1
77 #define WFERR_NESTEDTOODEEP 2
78 #define WFERR_NOMEMORY 3
79 
80 typedef struct {
81     FILE *fp;
82     int error;  /* see WFERR_* values */
83     int depth;
84     PyObject *str;
85     char *ptr;
86     char *end;
87     char *buf;
88     _Py_hashtable_t *hashtable;
89     int version;
90 } WFILE;
91 
92 #define w_byte(c, p) do {                               \
93         if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
94             *(p)->ptr++ = (c);                          \
95     } while(0)
96 
97 static void
w_flush(WFILE * p)98 w_flush(WFILE *p)
99 {
100     assert(p->fp != NULL);
101     fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
102     p->ptr = p->buf;
103 }
104 
105 static int
w_reserve(WFILE * p,Py_ssize_t needed)106 w_reserve(WFILE *p, Py_ssize_t needed)
107 {
108     Py_ssize_t pos, size, delta;
109     if (p->ptr == NULL)
110         return 0; /* An error already occurred */
111     if (p->fp != NULL) {
112         w_flush(p);
113         return needed <= p->end - p->ptr;
114     }
115     assert(p->str != NULL);
116     pos = p->ptr - p->buf;
117     size = PyBytes_Size(p->str);
118     if (size > 16*1024*1024)
119         delta = (size >> 3);            /* 12.5% overallocation */
120     else
121         delta = size + 1024;
122     delta = Py_MAX(delta, needed);
123     if (delta > PY_SSIZE_T_MAX - size) {
124         p->error = WFERR_NOMEMORY;
125         return 0;
126     }
127     size += delta;
128     if (_PyBytes_Resize(&p->str, size) != 0) {
129         p->ptr = p->buf = p->end = NULL;
130         return 0;
131     }
132     else {
133         p->buf = PyBytes_AS_STRING(p->str);
134         p->ptr = p->buf + pos;
135         p->end = p->buf + size;
136         return 1;
137     }
138 }
139 
140 static void
w_string(const char * s,Py_ssize_t n,WFILE * p)141 w_string(const char *s, Py_ssize_t n, WFILE *p)
142 {
143     Py_ssize_t m;
144     if (!n || p->ptr == NULL)
145         return;
146     m = p->end - p->ptr;
147     if (p->fp != NULL) {
148         if (n <= m) {
149             memcpy(p->ptr, s, n);
150             p->ptr += n;
151         }
152         else {
153             w_flush(p);
154             fwrite(s, 1, n, p->fp);
155         }
156     }
157     else {
158         if (n <= m || w_reserve(p, n - m)) {
159             memcpy(p->ptr, s, n);
160             p->ptr += n;
161         }
162     }
163 }
164 
165 static void
w_short(int x,WFILE * p)166 w_short(int x, WFILE *p)
167 {
168     w_byte((char)( x      & 0xff), p);
169     w_byte((char)((x>> 8) & 0xff), p);
170 }
171 
172 static void
w_long(long x,WFILE * p)173 w_long(long x, WFILE *p)
174 {
175     w_byte((char)( x      & 0xff), p);
176     w_byte((char)((x>> 8) & 0xff), p);
177     w_byte((char)((x>>16) & 0xff), p);
178     w_byte((char)((x>>24) & 0xff), p);
179 }
180 
181 #define SIZE32_MAX  0x7FFFFFFF
182 
183 #if SIZEOF_SIZE_T > 4
184 # define W_SIZE(n, p)  do {                     \
185         if ((n) > SIZE32_MAX) {                 \
186             (p)->depth--;                       \
187             (p)->error = WFERR_UNMARSHALLABLE;  \
188             return;                             \
189         }                                       \
190         w_long((long)(n), p);                   \
191     } while(0)
192 #else
193 # define W_SIZE  w_long
194 #endif
195 
196 static void
w_pstring(const char * s,Py_ssize_t n,WFILE * p)197 w_pstring(const char *s, Py_ssize_t n, WFILE *p)
198 {
199         W_SIZE(n, p);
200         w_string(s, n, p);
201 }
202 
203 static void
w_short_pstring(const char * s,Py_ssize_t n,WFILE * p)204 w_short_pstring(const char *s, Py_ssize_t n, WFILE *p)
205 {
206     w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
207     w_string(s, n, p);
208 }
209 
210 /* We assume that Python ints are stored internally in base some power of
211    2**15; for the sake of portability we'll always read and write them in base
212    exactly 2**15. */
213 
214 #define PyLong_MARSHAL_SHIFT 15
215 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
216 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
217 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
218 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
219 #endif
220 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
221 
222 #define W_TYPE(t, p) do { \
223     w_byte((t) | flag, (p)); \
224 } while(0)
225 
226 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)227 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
228 {
229     Py_ssize_t i, j, n, l;
230     digit d;
231 
232     W_TYPE(TYPE_LONG, p);
233     if (Py_SIZE(ob) == 0) {
234         w_long((long)0, p);
235         return;
236     }
237 
238     /* set l to number of base PyLong_MARSHAL_BASE digits */
239     n = Py_ABS(Py_SIZE(ob));
240     l = (n-1) * PyLong_MARSHAL_RATIO;
241     d = ob->ob_digit[n-1];
242     assert(d != 0); /* a PyLong is always normalized */
243     do {
244         d >>= PyLong_MARSHAL_SHIFT;
245         l++;
246     } while (d != 0);
247     if (l > SIZE32_MAX) {
248         p->depth--;
249         p->error = WFERR_UNMARSHALLABLE;
250         return;
251     }
252     w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
253 
254     for (i=0; i < n-1; i++) {
255         d = ob->ob_digit[i];
256         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
257             w_short(d & PyLong_MARSHAL_MASK, p);
258             d >>= PyLong_MARSHAL_SHIFT;
259         }
260         assert (d == 0);
261     }
262     d = ob->ob_digit[n-1];
263     do {
264         w_short(d & PyLong_MARSHAL_MASK, p);
265         d >>= PyLong_MARSHAL_SHIFT;
266     } while (d != 0);
267 }
268 
269 static void
w_float_bin(double v,WFILE * p)270 w_float_bin(double v, WFILE *p)
271 {
272     unsigned char buf[8];
273     if (_PyFloat_Pack8(v, buf, 1) < 0) {
274         p->error = WFERR_UNMARSHALLABLE;
275         return;
276     }
277     w_string((const char *)buf, 8, p);
278 }
279 
280 static void
w_float_str(double v,WFILE * p)281 w_float_str(double v, WFILE *p)
282 {
283     int n;
284     char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
285     if (!buf) {
286         p->error = WFERR_NOMEMORY;
287         return;
288     }
289     n = (int)strlen(buf);
290     w_byte(n, p);
291     w_string(buf, n, p);
292     PyMem_Free(buf);
293 }
294 
295 static int
w_ref(PyObject * v,char * flag,WFILE * p)296 w_ref(PyObject *v, char *flag, WFILE *p)
297 {
298     _Py_hashtable_entry_t *entry;
299     int w;
300 
301     if (p->version < 3 || p->hashtable == NULL)
302         return 0; /* not writing object references */
303 
304     /* if it has only one reference, it definitely isn't shared */
305     if (Py_REFCNT(v) == 1)
306         return 0;
307 
308     entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
309     if (entry != NULL) {
310         /* write the reference index to the stream */
311         _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w);
312         /* we don't store "long" indices in the dict */
313         assert(0 <= w && w <= 0x7fffffff);
314         w_byte(TYPE_REF, p);
315         w_long(w, p);
316         return 1;
317     } else {
318         size_t s = p->hashtable->entries;
319         /* we don't support long indices */
320         if (s >= 0x7fffffff) {
321             PyErr_SetString(PyExc_ValueError, "too many objects");
322             goto err;
323         }
324         w = (int)s;
325         Py_INCREF(v);
326         if (_Py_HASHTABLE_SET(p->hashtable, v, w) < 0) {
327             Py_DECREF(v);
328             goto err;
329         }
330         *flag |= FLAG_REF;
331         return 0;
332     }
333 err:
334     p->error = WFERR_UNMARSHALLABLE;
335     return 1;
336 }
337 
338 static void
339 w_complex_object(PyObject *v, char flag, WFILE *p);
340 
341 static void
w_object(PyObject * v,WFILE * p)342 w_object(PyObject *v, WFILE *p)
343 {
344     char flag = '\0';
345 
346     p->depth++;
347 
348     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
349         p->error = WFERR_NESTEDTOODEEP;
350     }
351     else if (v == NULL) {
352         w_byte(TYPE_NULL, p);
353     }
354     else if (v == Py_None) {
355         w_byte(TYPE_NONE, p);
356     }
357     else if (v == PyExc_StopIteration) {
358         w_byte(TYPE_STOPITER, p);
359     }
360     else if (v == Py_Ellipsis) {
361         w_byte(TYPE_ELLIPSIS, p);
362     }
363     else if (v == Py_False) {
364         w_byte(TYPE_FALSE, p);
365     }
366     else if (v == Py_True) {
367         w_byte(TYPE_TRUE, p);
368     }
369     else if (!w_ref(v, &flag, p))
370         w_complex_object(v, flag, p);
371 
372     p->depth--;
373 }
374 
375 static void
w_complex_object(PyObject * v,char flag,WFILE * p)376 w_complex_object(PyObject *v, char flag, WFILE *p)
377 {
378     Py_ssize_t i, n;
379 
380     if (PyLong_CheckExact(v)) {
381         long x = PyLong_AsLong(v);
382         if ((x == -1)  && PyErr_Occurred()) {
383             PyLongObject *ob = (PyLongObject *)v;
384             PyErr_Clear();
385             w_PyLong(ob, flag, p);
386         }
387         else {
388 #if SIZEOF_LONG > 4
389             long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
390             if (y && y != -1) {
391                 /* Too large for TYPE_INT */
392                 w_PyLong((PyLongObject*)v, flag, p);
393             }
394             else
395 #endif
396             {
397                 W_TYPE(TYPE_INT, p);
398                 w_long(x, p);
399             }
400         }
401     }
402     else if (PyFloat_CheckExact(v)) {
403         if (p->version > 1) {
404             W_TYPE(TYPE_BINARY_FLOAT, p);
405             w_float_bin(PyFloat_AS_DOUBLE(v), p);
406         }
407         else {
408             W_TYPE(TYPE_FLOAT, p);
409             w_float_str(PyFloat_AS_DOUBLE(v), p);
410         }
411     }
412     else if (PyComplex_CheckExact(v)) {
413         if (p->version > 1) {
414             W_TYPE(TYPE_BINARY_COMPLEX, p);
415             w_float_bin(PyComplex_RealAsDouble(v), p);
416             w_float_bin(PyComplex_ImagAsDouble(v), p);
417         }
418         else {
419             W_TYPE(TYPE_COMPLEX, p);
420             w_float_str(PyComplex_RealAsDouble(v), p);
421             w_float_str(PyComplex_ImagAsDouble(v), p);
422         }
423     }
424     else if (PyBytes_CheckExact(v)) {
425         W_TYPE(TYPE_STRING, p);
426         w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
427     }
428     else if (PyUnicode_CheckExact(v)) {
429         if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
430             int is_short = PyUnicode_GET_LENGTH(v) < 256;
431             if (is_short) {
432                 if (PyUnicode_CHECK_INTERNED(v))
433                     W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
434                 else
435                     W_TYPE(TYPE_SHORT_ASCII, p);
436                 w_short_pstring((char *) PyUnicode_1BYTE_DATA(v),
437                                 PyUnicode_GET_LENGTH(v), p);
438             }
439             else {
440                 if (PyUnicode_CHECK_INTERNED(v))
441                     W_TYPE(TYPE_ASCII_INTERNED, p);
442                 else
443                     W_TYPE(TYPE_ASCII, p);
444                 w_pstring((char *) PyUnicode_1BYTE_DATA(v),
445                           PyUnicode_GET_LENGTH(v), p);
446             }
447         }
448         else {
449             PyObject *utf8;
450             utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
451             if (utf8 == NULL) {
452                 p->depth--;
453                 p->error = WFERR_UNMARSHALLABLE;
454                 return;
455             }
456             if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
457                 W_TYPE(TYPE_INTERNED, p);
458             else
459                 W_TYPE(TYPE_UNICODE, p);
460             w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
461             Py_DECREF(utf8);
462         }
463     }
464     else if (PyTuple_CheckExact(v)) {
465         n = PyTuple_Size(v);
466         if (p->version >= 4 && n < 256) {
467             W_TYPE(TYPE_SMALL_TUPLE, p);
468             w_byte((unsigned char)n, p);
469         }
470         else {
471             W_TYPE(TYPE_TUPLE, p);
472             W_SIZE(n, p);
473         }
474         for (i = 0; i < n; i++) {
475             w_object(PyTuple_GET_ITEM(v, i), p);
476         }
477     }
478     else if (PyList_CheckExact(v)) {
479         W_TYPE(TYPE_LIST, p);
480         n = PyList_GET_SIZE(v);
481         W_SIZE(n, p);
482         for (i = 0; i < n; i++) {
483             w_object(PyList_GET_ITEM(v, i), p);
484         }
485     }
486     else if (PyDict_CheckExact(v)) {
487         Py_ssize_t pos;
488         PyObject *key, *value;
489         W_TYPE(TYPE_DICT, p);
490         /* This one is NULL object terminated! */
491         pos = 0;
492         while (PyDict_Next(v, &pos, &key, &value)) {
493             w_object(key, p);
494             w_object(value, p);
495         }
496         w_object((PyObject *)NULL, p);
497     }
498     else if (PyAnySet_CheckExact(v)) {
499         PyObject *value, *it;
500 
501         if (PyObject_TypeCheck(v, &PySet_Type))
502             W_TYPE(TYPE_SET, p);
503         else
504             W_TYPE(TYPE_FROZENSET, p);
505         n = PyObject_Size(v);
506         if (n == -1) {
507             p->depth--;
508             p->error = WFERR_UNMARSHALLABLE;
509             return;
510         }
511         W_SIZE(n, p);
512         it = PyObject_GetIter(v);
513         if (it == NULL) {
514             p->depth--;
515             p->error = WFERR_UNMARSHALLABLE;
516             return;
517         }
518         while ((value = PyIter_Next(it)) != NULL) {
519             w_object(value, p);
520             Py_DECREF(value);
521         }
522         Py_DECREF(it);
523         if (PyErr_Occurred()) {
524             p->depth--;
525             p->error = WFERR_UNMARSHALLABLE;
526             return;
527         }
528     }
529     else if (PyCode_Check(v)) {
530         PyCodeObject *co = (PyCodeObject *)v;
531         W_TYPE(TYPE_CODE, p);
532         w_long(co->co_argcount, p);
533         w_long(co->co_posonlyargcount, p);
534         w_long(co->co_kwonlyargcount, p);
535         w_long(co->co_nlocals, p);
536         w_long(co->co_stacksize, p);
537         w_long(co->co_flags, p);
538         w_object(co->co_code, p);
539         w_object(co->co_consts, p);
540         w_object(co->co_names, p);
541         w_object(co->co_varnames, p);
542         w_object(co->co_freevars, p);
543         w_object(co->co_cellvars, p);
544         w_object(co->co_filename, p);
545         w_object(co->co_name, p);
546         w_long(co->co_firstlineno, p);
547         w_object(co->co_lnotab, p);
548     }
549     else if (PyObject_CheckBuffer(v)) {
550         /* Write unknown bytes-like objects as a bytes object */
551         Py_buffer view;
552         if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
553             w_byte(TYPE_UNKNOWN, p);
554             p->depth--;
555             p->error = WFERR_UNMARSHALLABLE;
556             return;
557         }
558         W_TYPE(TYPE_STRING, p);
559         w_pstring(view.buf, view.len, p);
560         PyBuffer_Release(&view);
561     }
562     else {
563         W_TYPE(TYPE_UNKNOWN, p);
564         p->error = WFERR_UNMARSHALLABLE;
565     }
566 }
567 
568 static int
w_init_refs(WFILE * wf,int version)569 w_init_refs(WFILE *wf, int version)
570 {
571     if (version >= 3) {
572         wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
573                                           _Py_hashtable_hash_ptr,
574                                           _Py_hashtable_compare_direct);
575         if (wf->hashtable == NULL) {
576             PyErr_NoMemory();
577             return -1;
578         }
579     }
580     return 0;
581 }
582 
583 static int
w_decref_entry(_Py_hashtable_t * ht,_Py_hashtable_entry_t * entry,void * Py_UNUSED (data))584 w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
585                void *Py_UNUSED(data))
586 {
587     PyObject *entry_key;
588 
589     _Py_HASHTABLE_ENTRY_READ_KEY(ht, entry, entry_key);
590     Py_XDECREF(entry_key);
591     return 0;
592 }
593 
594 static void
w_clear_refs(WFILE * wf)595 w_clear_refs(WFILE *wf)
596 {
597     if (wf->hashtable != NULL) {
598         _Py_hashtable_foreach(wf->hashtable, w_decref_entry, NULL);
599         _Py_hashtable_destroy(wf->hashtable);
600     }
601 }
602 
603 /* version currently has no effect for writing ints. */
604 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)605 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
606 {
607     char buf[4];
608     WFILE wf;
609     memset(&wf, 0, sizeof(wf));
610     wf.fp = fp;
611     wf.ptr = wf.buf = buf;
612     wf.end = wf.ptr + sizeof(buf);
613     wf.error = WFERR_OK;
614     wf.version = version;
615     w_long(x, &wf);
616     w_flush(&wf);
617 }
618 
619 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)620 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
621 {
622     char buf[BUFSIZ];
623     WFILE wf;
624     memset(&wf, 0, sizeof(wf));
625     wf.fp = fp;
626     wf.ptr = wf.buf = buf;
627     wf.end = wf.ptr + sizeof(buf);
628     wf.error = WFERR_OK;
629     wf.version = version;
630     if (w_init_refs(&wf, version))
631         return; /* caller mush check PyErr_Occurred() */
632     w_object(x, &wf);
633     w_clear_refs(&wf);
634     w_flush(&wf);
635 }
636 
637 typedef struct {
638     FILE *fp;
639     int depth;
640     PyObject *readable;  /* Stream-like object being read from */
641     char *ptr;
642     char *end;
643     char *buf;
644     Py_ssize_t buf_size;
645     PyObject *refs;  /* a list */
646 } RFILE;
647 
648 static const char *
r_string(Py_ssize_t n,RFILE * p)649 r_string(Py_ssize_t n, RFILE *p)
650 {
651     Py_ssize_t read = -1;
652 
653     if (p->ptr != NULL) {
654         /* Fast path for loads() */
655         char *res = p->ptr;
656         Py_ssize_t left = p->end - p->ptr;
657         if (left < n) {
658             PyErr_SetString(PyExc_EOFError,
659                             "marshal data too short");
660             return NULL;
661         }
662         p->ptr += n;
663         return res;
664     }
665     if (p->buf == NULL) {
666         p->buf = PyMem_MALLOC(n);
667         if (p->buf == NULL) {
668             PyErr_NoMemory();
669             return NULL;
670         }
671         p->buf_size = n;
672     }
673     else if (p->buf_size < n) {
674         char *tmp = PyMem_REALLOC(p->buf, n);
675         if (tmp == NULL) {
676             PyErr_NoMemory();
677             return NULL;
678         }
679         p->buf = tmp;
680         p->buf_size = n;
681     }
682 
683     if (!p->readable) {
684         assert(p->fp != NULL);
685         read = fread(p->buf, 1, n, p->fp);
686     }
687     else {
688         _Py_IDENTIFIER(readinto);
689         PyObject *res, *mview;
690         Py_buffer buf;
691 
692         if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
693             return NULL;
694         mview = PyMemoryView_FromBuffer(&buf);
695         if (mview == NULL)
696             return NULL;
697 
698         res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
699         if (res != NULL) {
700             read = PyNumber_AsSsize_t(res, PyExc_ValueError);
701             Py_DECREF(res);
702         }
703     }
704     if (read != n) {
705         if (!PyErr_Occurred()) {
706             if (read > n)
707                 PyErr_Format(PyExc_ValueError,
708                              "read() returned too much data: "
709                              "%zd bytes requested, %zd returned",
710                              n, read);
711             else
712                 PyErr_SetString(PyExc_EOFError,
713                                 "EOF read where not expected");
714         }
715         return NULL;
716     }
717     return p->buf;
718 }
719 
720 static int
r_byte(RFILE * p)721 r_byte(RFILE *p)
722 {
723     int c = EOF;
724 
725     if (p->ptr != NULL) {
726         if (p->ptr < p->end)
727             c = (unsigned char) *p->ptr++;
728         return c;
729     }
730     if (!p->readable) {
731         assert(p->fp);
732         c = getc(p->fp);
733     }
734     else {
735         const char *ptr = r_string(1, p);
736         if (ptr != NULL)
737             c = *(const unsigned char *) ptr;
738     }
739     return c;
740 }
741 
742 static int
r_short(RFILE * p)743 r_short(RFILE *p)
744 {
745     short x = -1;
746     const unsigned char *buffer;
747 
748     buffer = (const unsigned char *) r_string(2, p);
749     if (buffer != NULL) {
750         x = buffer[0];
751         x |= buffer[1] << 8;
752         /* Sign-extension, in case short greater than 16 bits */
753         x |= -(x & 0x8000);
754     }
755     return x;
756 }
757 
758 static long
r_long(RFILE * p)759 r_long(RFILE *p)
760 {
761     long x = -1;
762     const unsigned char *buffer;
763 
764     buffer = (const unsigned char *) r_string(4, p);
765     if (buffer != NULL) {
766         x = buffer[0];
767         x |= (long)buffer[1] << 8;
768         x |= (long)buffer[2] << 16;
769         x |= (long)buffer[3] << 24;
770 #if SIZEOF_LONG > 4
771         /* Sign extension for 64-bit machines */
772         x |= -(x & 0x80000000L);
773 #endif
774     }
775     return x;
776 }
777 
778 /* r_long64 deals with the TYPE_INT64 code. */
779 static PyObject *
r_long64(RFILE * p)780 r_long64(RFILE *p)
781 {
782     const unsigned char *buffer = (const unsigned char *) r_string(8, p);
783     if (buffer == NULL) {
784         return NULL;
785     }
786     return _PyLong_FromByteArray(buffer, 8,
787                                  1 /* little endian */,
788                                  1 /* signed */);
789 }
790 
791 static PyObject *
r_PyLong(RFILE * p)792 r_PyLong(RFILE *p)
793 {
794     PyLongObject *ob;
795     long n, size, i;
796     int j, md, shorts_in_top_digit;
797     digit d;
798 
799     n = r_long(p);
800     if (PyErr_Occurred())
801         return NULL;
802     if (n == 0)
803         return (PyObject *)_PyLong_New(0);
804     if (n < -SIZE32_MAX || n > SIZE32_MAX) {
805         PyErr_SetString(PyExc_ValueError,
806                        "bad marshal data (long size out of range)");
807         return NULL;
808     }
809 
810     size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
811     shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
812     ob = _PyLong_New(size);
813     if (ob == NULL)
814         return NULL;
815 
816     Py_SIZE(ob) = n > 0 ? size : -size;
817 
818     for (i = 0; i < size-1; i++) {
819         d = 0;
820         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
821             md = r_short(p);
822             if (PyErr_Occurred()) {
823                 Py_DECREF(ob);
824                 return NULL;
825             }
826             if (md < 0 || md > PyLong_MARSHAL_BASE)
827                 goto bad_digit;
828             d += (digit)md << j*PyLong_MARSHAL_SHIFT;
829         }
830         ob->ob_digit[i] = d;
831     }
832 
833     d = 0;
834     for (j=0; j < shorts_in_top_digit; j++) {
835         md = r_short(p);
836         if (PyErr_Occurred()) {
837             Py_DECREF(ob);
838             return NULL;
839         }
840         if (md < 0 || md > PyLong_MARSHAL_BASE)
841             goto bad_digit;
842         /* topmost marshal digit should be nonzero */
843         if (md == 0 && j == shorts_in_top_digit - 1) {
844             Py_DECREF(ob);
845             PyErr_SetString(PyExc_ValueError,
846                 "bad marshal data (unnormalized long data)");
847             return NULL;
848         }
849         d += (digit)md << j*PyLong_MARSHAL_SHIFT;
850     }
851     if (PyErr_Occurred()) {
852         Py_DECREF(ob);
853         return NULL;
854     }
855     /* top digit should be nonzero, else the resulting PyLong won't be
856        normalized */
857     ob->ob_digit[size-1] = d;
858     return (PyObject *)ob;
859   bad_digit:
860     Py_DECREF(ob);
861     PyErr_SetString(PyExc_ValueError,
862                     "bad marshal data (digit out of range in long)");
863     return NULL;
864 }
865 
866 static double
r_float_bin(RFILE * p)867 r_float_bin(RFILE *p)
868 {
869     const unsigned char *buf = (const unsigned char *) r_string(8, p);
870     if (buf == NULL)
871         return -1;
872     return _PyFloat_Unpack8(buf, 1);
873 }
874 
875 /* Issue #33720: Disable inlining for reducing the C stack consumption
876    on PGO builds. */
877 _Py_NO_INLINE static double
r_float_str(RFILE * p)878 r_float_str(RFILE *p)
879 {
880     int n;
881     char buf[256];
882     const char *ptr;
883     n = r_byte(p);
884     if (n == EOF) {
885         PyErr_SetString(PyExc_EOFError,
886             "EOF read where object expected");
887         return -1;
888     }
889     ptr = r_string(n, p);
890     if (ptr == NULL) {
891         return -1;
892     }
893     memcpy(buf, ptr, n);
894     buf[n] = '\0';
895     return PyOS_string_to_double(buf, NULL, NULL);
896 }
897 
898 /* allocate the reflist index for a new object. Return -1 on failure */
899 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)900 r_ref_reserve(int flag, RFILE *p)
901 {
902     if (flag) { /* currently only FLAG_REF is defined */
903         Py_ssize_t idx = PyList_GET_SIZE(p->refs);
904         if (idx >= 0x7ffffffe) {
905             PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
906             return -1;
907         }
908         if (PyList_Append(p->refs, Py_None) < 0)
909             return -1;
910         return idx;
911     } else
912         return 0;
913 }
914 
915 /* insert the new object 'o' to the reflist at previously
916  * allocated index 'idx'.
917  * 'o' can be NULL, in which case nothing is done.
918  * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
919  * if 'o' was non-NULL, and the function fails, 'o' is released and
920  * NULL returned. This simplifies error checking at the call site since
921  * a single test for NULL for the function result is enough.
922  */
923 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)924 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
925 {
926     if (o != NULL && flag) { /* currently only FLAG_REF is defined */
927         PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
928         Py_INCREF(o);
929         PyList_SET_ITEM(p->refs, idx, o);
930         Py_DECREF(tmp);
931     }
932     return o;
933 }
934 
935 /* combination of both above, used when an object can be
936  * created whenever it is seen in the file, as opposed to
937  * after having loaded its sub-objects.
938  */
939 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)940 r_ref(PyObject *o, int flag, RFILE *p)
941 {
942     assert(flag & FLAG_REF);
943     if (o == NULL)
944         return NULL;
945     if (PyList_Append(p->refs, o) < 0) {
946         Py_DECREF(o); /* release the new object */
947         return NULL;
948     }
949     return o;
950 }
951 
952 static PyObject *
r_object(RFILE * p)953 r_object(RFILE *p)
954 {
955     /* NULL is a valid return value, it does not necessarily means that
956        an exception is set. */
957     PyObject *v, *v2;
958     Py_ssize_t idx = 0;
959     long i, n;
960     int type, code = r_byte(p);
961     int flag, is_interned = 0;
962     PyObject *retval = NULL;
963 
964     if (code == EOF) {
965         PyErr_SetString(PyExc_EOFError,
966                         "EOF read where object expected");
967         return NULL;
968     }
969 
970     p->depth++;
971 
972     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
973         p->depth--;
974         PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
975         return NULL;
976     }
977 
978     flag = code & FLAG_REF;
979     type = code & ~FLAG_REF;
980 
981 #define R_REF(O) do{\
982     if (flag) \
983         O = r_ref(O, flag, p);\
984 } while (0)
985 
986     switch (type) {
987 
988     case TYPE_NULL:
989         break;
990 
991     case TYPE_NONE:
992         Py_INCREF(Py_None);
993         retval = Py_None;
994         break;
995 
996     case TYPE_STOPITER:
997         Py_INCREF(PyExc_StopIteration);
998         retval = PyExc_StopIteration;
999         break;
1000 
1001     case TYPE_ELLIPSIS:
1002         Py_INCREF(Py_Ellipsis);
1003         retval = Py_Ellipsis;
1004         break;
1005 
1006     case TYPE_FALSE:
1007         Py_INCREF(Py_False);
1008         retval = Py_False;
1009         break;
1010 
1011     case TYPE_TRUE:
1012         Py_INCREF(Py_True);
1013         retval = Py_True;
1014         break;
1015 
1016     case TYPE_INT:
1017         n = r_long(p);
1018         retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
1019         R_REF(retval);
1020         break;
1021 
1022     case TYPE_INT64:
1023         retval = r_long64(p);
1024         R_REF(retval);
1025         break;
1026 
1027     case TYPE_LONG:
1028         retval = r_PyLong(p);
1029         R_REF(retval);
1030         break;
1031 
1032     case TYPE_FLOAT:
1033         {
1034             double x = r_float_str(p);
1035             if (x == -1.0 && PyErr_Occurred())
1036                 break;
1037             retval = PyFloat_FromDouble(x);
1038             R_REF(retval);
1039             break;
1040         }
1041 
1042     case TYPE_BINARY_FLOAT:
1043         {
1044             double x = r_float_bin(p);
1045             if (x == -1.0 && PyErr_Occurred())
1046                 break;
1047             retval = PyFloat_FromDouble(x);
1048             R_REF(retval);
1049             break;
1050         }
1051 
1052     case TYPE_COMPLEX:
1053         {
1054             Py_complex c;
1055             c.real = r_float_str(p);
1056             if (c.real == -1.0 && PyErr_Occurred())
1057                 break;
1058             c.imag = r_float_str(p);
1059             if (c.imag == -1.0 && PyErr_Occurred())
1060                 break;
1061             retval = PyComplex_FromCComplex(c);
1062             R_REF(retval);
1063             break;
1064         }
1065 
1066     case TYPE_BINARY_COMPLEX:
1067         {
1068             Py_complex c;
1069             c.real = r_float_bin(p);
1070             if (c.real == -1.0 && PyErr_Occurred())
1071                 break;
1072             c.imag = r_float_bin(p);
1073             if (c.imag == -1.0 && PyErr_Occurred())
1074                 break;
1075             retval = PyComplex_FromCComplex(c);
1076             R_REF(retval);
1077             break;
1078         }
1079 
1080     case TYPE_STRING:
1081         {
1082             const char *ptr;
1083             n = r_long(p);
1084             if (PyErr_Occurred())
1085                 break;
1086             if (n < 0 || n > SIZE32_MAX) {
1087                 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1088                 break;
1089             }
1090             v = PyBytes_FromStringAndSize((char *)NULL, n);
1091             if (v == NULL)
1092                 break;
1093             ptr = r_string(n, p);
1094             if (ptr == NULL) {
1095                 Py_DECREF(v);
1096                 break;
1097             }
1098             memcpy(PyBytes_AS_STRING(v), ptr, n);
1099             retval = v;
1100             R_REF(retval);
1101             break;
1102         }
1103 
1104     case TYPE_ASCII_INTERNED:
1105         is_interned = 1;
1106         /* fall through */
1107     case TYPE_ASCII:
1108         n = r_long(p);
1109         if (PyErr_Occurred())
1110             break;
1111         if (n < 0 || n > SIZE32_MAX) {
1112             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1113             break;
1114         }
1115         goto _read_ascii;
1116 
1117     case TYPE_SHORT_ASCII_INTERNED:
1118         is_interned = 1;
1119         /* fall through */
1120     case TYPE_SHORT_ASCII:
1121         n = r_byte(p);
1122         if (n == EOF) {
1123             PyErr_SetString(PyExc_EOFError,
1124                 "EOF read where object expected");
1125             break;
1126         }
1127     _read_ascii:
1128         {
1129             const char *ptr;
1130             ptr = r_string(n, p);
1131             if (ptr == NULL)
1132                 break;
1133             v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1134             if (v == NULL)
1135                 break;
1136             if (is_interned)
1137                 PyUnicode_InternInPlace(&v);
1138             retval = v;
1139             R_REF(retval);
1140             break;
1141         }
1142 
1143     case TYPE_INTERNED:
1144         is_interned = 1;
1145         /* fall through */
1146     case TYPE_UNICODE:
1147         {
1148         const char *buffer;
1149 
1150         n = r_long(p);
1151         if (PyErr_Occurred())
1152             break;
1153         if (n < 0 || n > SIZE32_MAX) {
1154             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1155             break;
1156         }
1157         if (n != 0) {
1158             buffer = r_string(n, p);
1159             if (buffer == NULL)
1160                 break;
1161             v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1162         }
1163         else {
1164             v = PyUnicode_New(0, 0);
1165         }
1166         if (v == NULL)
1167             break;
1168         if (is_interned)
1169             PyUnicode_InternInPlace(&v);
1170         retval = v;
1171         R_REF(retval);
1172         break;
1173         }
1174 
1175     case TYPE_SMALL_TUPLE:
1176         n = (unsigned char) r_byte(p);
1177         if (PyErr_Occurred())
1178             break;
1179         goto _read_tuple;
1180     case TYPE_TUPLE:
1181         n = r_long(p);
1182         if (PyErr_Occurred())
1183             break;
1184         if (n < 0 || n > SIZE32_MAX) {
1185             PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1186             break;
1187         }
1188     _read_tuple:
1189         v = PyTuple_New(n);
1190         R_REF(v);
1191         if (v == NULL)
1192             break;
1193 
1194         for (i = 0; i < n; i++) {
1195             v2 = r_object(p);
1196             if ( v2 == NULL ) {
1197                 if (!PyErr_Occurred())
1198                     PyErr_SetString(PyExc_TypeError,
1199                         "NULL object in marshal data for tuple");
1200                 Py_DECREF(v);
1201                 v = NULL;
1202                 break;
1203             }
1204             PyTuple_SET_ITEM(v, i, v2);
1205         }
1206         retval = v;
1207         break;
1208 
1209     case TYPE_LIST:
1210         n = r_long(p);
1211         if (PyErr_Occurred())
1212             break;
1213         if (n < 0 || n > SIZE32_MAX) {
1214             PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1215             break;
1216         }
1217         v = PyList_New(n);
1218         R_REF(v);
1219         if (v == NULL)
1220             break;
1221         for (i = 0; i < n; i++) {
1222             v2 = r_object(p);
1223             if ( v2 == NULL ) {
1224                 if (!PyErr_Occurred())
1225                     PyErr_SetString(PyExc_TypeError,
1226                         "NULL object in marshal data for list");
1227                 Py_DECREF(v);
1228                 v = NULL;
1229                 break;
1230             }
1231             PyList_SET_ITEM(v, i, v2);
1232         }
1233         retval = v;
1234         break;
1235 
1236     case TYPE_DICT:
1237         v = PyDict_New();
1238         R_REF(v);
1239         if (v == NULL)
1240             break;
1241         for (;;) {
1242             PyObject *key, *val;
1243             key = r_object(p);
1244             if (key == NULL)
1245                 break;
1246             val = r_object(p);
1247             if (val == NULL) {
1248                 Py_DECREF(key);
1249                 break;
1250             }
1251             if (PyDict_SetItem(v, key, val) < 0) {
1252                 Py_DECREF(key);
1253                 Py_DECREF(val);
1254                 break;
1255             }
1256             Py_DECREF(key);
1257             Py_DECREF(val);
1258         }
1259         if (PyErr_Occurred()) {
1260             Py_DECREF(v);
1261             v = NULL;
1262         }
1263         retval = v;
1264         break;
1265 
1266     case TYPE_SET:
1267     case TYPE_FROZENSET:
1268         n = r_long(p);
1269         if (PyErr_Occurred())
1270             break;
1271         if (n < 0 || n > SIZE32_MAX) {
1272             PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1273             break;
1274         }
1275 
1276         if (n == 0 && type == TYPE_FROZENSET) {
1277             /* call frozenset() to get the empty frozenset singleton */
1278             v = _PyObject_CallNoArg((PyObject*)&PyFrozenSet_Type);
1279             if (v == NULL)
1280                 break;
1281             R_REF(v);
1282             retval = v;
1283         }
1284         else {
1285             v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1286             if (type == TYPE_SET) {
1287                 R_REF(v);
1288             } else {
1289                 /* must use delayed registration of frozensets because they must
1290                  * be init with a refcount of 1
1291                  */
1292                 idx = r_ref_reserve(flag, p);
1293                 if (idx < 0)
1294                     Py_CLEAR(v); /* signal error */
1295             }
1296             if (v == NULL)
1297                 break;
1298 
1299             for (i = 0; i < n; i++) {
1300                 v2 = r_object(p);
1301                 if ( v2 == NULL ) {
1302                     if (!PyErr_Occurred())
1303                         PyErr_SetString(PyExc_TypeError,
1304                             "NULL object in marshal data for set");
1305                     Py_DECREF(v);
1306                     v = NULL;
1307                     break;
1308                 }
1309                 if (PySet_Add(v, v2) == -1) {
1310                     Py_DECREF(v);
1311                     Py_DECREF(v2);
1312                     v = NULL;
1313                     break;
1314                 }
1315                 Py_DECREF(v2);
1316             }
1317             if (type != TYPE_SET)
1318                 v = r_ref_insert(v, idx, flag, p);
1319             retval = v;
1320         }
1321         break;
1322 
1323     case TYPE_CODE:
1324         {
1325             int argcount;
1326             int posonlyargcount;
1327             int kwonlyargcount;
1328             int nlocals;
1329             int stacksize;
1330             int flags;
1331             PyObject *code = NULL;
1332             PyObject *consts = NULL;
1333             PyObject *names = NULL;
1334             PyObject *varnames = NULL;
1335             PyObject *freevars = NULL;
1336             PyObject *cellvars = NULL;
1337             PyObject *filename = NULL;
1338             PyObject *name = NULL;
1339             int firstlineno;
1340             PyObject *lnotab = NULL;
1341 
1342             idx = r_ref_reserve(flag, p);
1343             if (idx < 0)
1344                 break;
1345 
1346             v = NULL;
1347 
1348             /* XXX ignore long->int overflows for now */
1349             argcount = (int)r_long(p);
1350             if (PyErr_Occurred())
1351                 goto code_error;
1352             posonlyargcount = (int)r_long(p);
1353             if (PyErr_Occurred()) {
1354                 goto code_error;
1355             }
1356             kwonlyargcount = (int)r_long(p);
1357             if (PyErr_Occurred())
1358                 goto code_error;
1359             nlocals = (int)r_long(p);
1360             if (PyErr_Occurred())
1361                 goto code_error;
1362             stacksize = (int)r_long(p);
1363             if (PyErr_Occurred())
1364                 goto code_error;
1365             flags = (int)r_long(p);
1366             if (PyErr_Occurred())
1367                 goto code_error;
1368             code = r_object(p);
1369             if (code == NULL)
1370                 goto code_error;
1371             consts = r_object(p);
1372             if (consts == NULL)
1373                 goto code_error;
1374             names = r_object(p);
1375             if (names == NULL)
1376                 goto code_error;
1377             varnames = r_object(p);
1378             if (varnames == NULL)
1379                 goto code_error;
1380             freevars = r_object(p);
1381             if (freevars == NULL)
1382                 goto code_error;
1383             cellvars = r_object(p);
1384             if (cellvars == NULL)
1385                 goto code_error;
1386             filename = r_object(p);
1387             if (filename == NULL)
1388                 goto code_error;
1389             name = r_object(p);
1390             if (name == NULL)
1391                 goto code_error;
1392             firstlineno = (int)r_long(p);
1393             if (firstlineno == -1 && PyErr_Occurred())
1394                 break;
1395             lnotab = r_object(p);
1396             if (lnotab == NULL)
1397                 goto code_error;
1398 
1399             if (PySys_Audit("code.__new__", "OOOiiiiii",
1400                             code, filename, name, argcount, posonlyargcount,
1401                             kwonlyargcount, nlocals, stacksize, flags) < 0) {
1402                 goto code_error;
1403             }
1404 
1405             v = (PyObject *) PyCode_NewWithPosOnlyArgs(
1406                             argcount, posonlyargcount, kwonlyargcount,
1407                             nlocals, stacksize, flags,
1408                             code, consts, names, varnames,
1409                             freevars, cellvars, filename, name,
1410                             firstlineno, lnotab);
1411             v = r_ref_insert(v, idx, flag, p);
1412 
1413           code_error:
1414             Py_XDECREF(code);
1415             Py_XDECREF(consts);
1416             Py_XDECREF(names);
1417             Py_XDECREF(varnames);
1418             Py_XDECREF(freevars);
1419             Py_XDECREF(cellvars);
1420             Py_XDECREF(filename);
1421             Py_XDECREF(name);
1422             Py_XDECREF(lnotab);
1423         }
1424         retval = v;
1425         break;
1426 
1427     case TYPE_REF:
1428         n = r_long(p);
1429         if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1430             if (n == -1 && PyErr_Occurred())
1431                 break;
1432             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1433             break;
1434         }
1435         v = PyList_GET_ITEM(p->refs, n);
1436         if (v == Py_None) {
1437             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1438             break;
1439         }
1440         Py_INCREF(v);
1441         retval = v;
1442         break;
1443 
1444     default:
1445         /* Bogus data got written, which isn't ideal.
1446            This will let you keep working and recover. */
1447         PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1448         break;
1449 
1450     }
1451     p->depth--;
1452     return retval;
1453 }
1454 
1455 static PyObject *
read_object(RFILE * p)1456 read_object(RFILE *p)
1457 {
1458     PyObject *v;
1459     if (PyErr_Occurred()) {
1460         fprintf(stderr, "XXX readobject called with exception set\n");
1461         return NULL;
1462     }
1463     v = r_object(p);
1464     if (v == NULL && !PyErr_Occurred())
1465         PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1466     return v;
1467 }
1468 
1469 int
PyMarshal_ReadShortFromFile(FILE * fp)1470 PyMarshal_ReadShortFromFile(FILE *fp)
1471 {
1472     RFILE rf;
1473     int res;
1474     assert(fp);
1475     rf.readable = NULL;
1476     rf.fp = fp;
1477     rf.end = rf.ptr = NULL;
1478     rf.buf = NULL;
1479     res = r_short(&rf);
1480     if (rf.buf != NULL)
1481         PyMem_FREE(rf.buf);
1482     return res;
1483 }
1484 
1485 long
PyMarshal_ReadLongFromFile(FILE * fp)1486 PyMarshal_ReadLongFromFile(FILE *fp)
1487 {
1488     RFILE rf;
1489     long res;
1490     rf.fp = fp;
1491     rf.readable = NULL;
1492     rf.ptr = rf.end = NULL;
1493     rf.buf = NULL;
1494     res = r_long(&rf);
1495     if (rf.buf != NULL)
1496         PyMem_FREE(rf.buf);
1497     return res;
1498 }
1499 
1500 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1501 static off_t
getfilesize(FILE * fp)1502 getfilesize(FILE *fp)
1503 {
1504     struct _Py_stat_struct st;
1505     if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1506         return -1;
1507 #if SIZEOF_OFF_T == 4
1508     else if (st.st_size >= INT_MAX)
1509         return (off_t)INT_MAX;
1510 #endif
1511     else
1512         return (off_t)st.st_size;
1513 }
1514 
1515 /* If we can get the size of the file up-front, and it's reasonably small,
1516  * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1517  * than reading a byte at a time from file; speeds .pyc imports.
1518  * CAUTION:  since this may read the entire remainder of the file, don't
1519  * call it unless you know you're done with the file.
1520  */
1521 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1522 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1523 {
1524 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1525 #define REASONABLE_FILE_LIMIT (1L << 18)
1526     off_t filesize;
1527     filesize = getfilesize(fp);
1528     if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1529         char* pBuf = (char *)PyMem_MALLOC(filesize);
1530         if (pBuf != NULL) {
1531             size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1532             PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1533             PyMem_FREE(pBuf);
1534             return v;
1535         }
1536 
1537     }
1538     /* We don't have fstat, or we do but the file is larger than
1539      * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1540      */
1541     return PyMarshal_ReadObjectFromFile(fp);
1542 
1543 #undef REASONABLE_FILE_LIMIT
1544 }
1545 
1546 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1547 PyMarshal_ReadObjectFromFile(FILE *fp)
1548 {
1549     RFILE rf;
1550     PyObject *result;
1551     rf.fp = fp;
1552     rf.readable = NULL;
1553     rf.depth = 0;
1554     rf.ptr = rf.end = NULL;
1555     rf.buf = NULL;
1556     rf.refs = PyList_New(0);
1557     if (rf.refs == NULL)
1558         return NULL;
1559     result = r_object(&rf);
1560     Py_DECREF(rf.refs);
1561     if (rf.buf != NULL)
1562         PyMem_FREE(rf.buf);
1563     return result;
1564 }
1565 
1566 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1567 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1568 {
1569     RFILE rf;
1570     PyObject *result;
1571     rf.fp = NULL;
1572     rf.readable = NULL;
1573     rf.ptr = (char *)str;
1574     rf.end = (char *)str + len;
1575     rf.buf = NULL;
1576     rf.depth = 0;
1577     rf.refs = PyList_New(0);
1578     if (rf.refs == NULL)
1579         return NULL;
1580     result = r_object(&rf);
1581     Py_DECREF(rf.refs);
1582     if (rf.buf != NULL)
1583         PyMem_FREE(rf.buf);
1584     return result;
1585 }
1586 
1587 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1588 PyMarshal_WriteObjectToString(PyObject *x, int version)
1589 {
1590     WFILE wf;
1591 
1592     memset(&wf, 0, sizeof(wf));
1593     wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1594     if (wf.str == NULL)
1595         return NULL;
1596     wf.ptr = wf.buf = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1597     wf.end = wf.ptr + PyBytes_Size(wf.str);
1598     wf.error = WFERR_OK;
1599     wf.version = version;
1600     if (w_init_refs(&wf, version)) {
1601         Py_DECREF(wf.str);
1602         return NULL;
1603     }
1604     w_object(x, &wf);
1605     w_clear_refs(&wf);
1606     if (wf.str != NULL) {
1607         char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1608         if (wf.ptr - base > PY_SSIZE_T_MAX) {
1609             Py_DECREF(wf.str);
1610             PyErr_SetString(PyExc_OverflowError,
1611                             "too much marshal data for a bytes object");
1612             return NULL;
1613         }
1614         if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1615             return NULL;
1616     }
1617     if (wf.error != WFERR_OK) {
1618         Py_XDECREF(wf.str);
1619         if (wf.error == WFERR_NOMEMORY)
1620             PyErr_NoMemory();
1621         else
1622             PyErr_SetString(PyExc_ValueError,
1623               (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1624                :"object too deeply nested to marshal");
1625         return NULL;
1626     }
1627     return wf.str;
1628 }
1629 
1630 /* And an interface for Python programs... */
1631 /*[clinic input]
1632 marshal.dump
1633 
1634     value: object
1635         Must be a supported type.
1636     file: object
1637         Must be a writeable binary file.
1638     version: int(c_default="Py_MARSHAL_VERSION") = version
1639         Indicates the data format that dump should use.
1640     /
1641 
1642 Write the value on the open file.
1643 
1644 If the value has (or contains an object that has) an unsupported type, a
1645 ValueError exception is raised - but garbage data will also be written
1646 to the file. The object will not be properly read back by load().
1647 [clinic start generated code]*/
1648 
1649 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1650 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1651                   int version)
1652 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1653 {
1654     /* XXX Quick hack -- need to do this differently */
1655     PyObject *s;
1656     PyObject *res;
1657     _Py_IDENTIFIER(write);
1658 
1659     s = PyMarshal_WriteObjectToString(value, version);
1660     if (s == NULL)
1661         return NULL;
1662     res = _PyObject_CallMethodIdObjArgs(file, &PyId_write, s, NULL);
1663     Py_DECREF(s);
1664     return res;
1665 }
1666 
1667 /*[clinic input]
1668 marshal.load
1669 
1670     file: object
1671         Must be readable binary file.
1672     /
1673 
1674 Read one value from the open file and return it.
1675 
1676 If no valid value is read (e.g. because the data has a different Python
1677 version's incompatible marshal format), raise EOFError, ValueError or
1678 TypeError.
1679 
1680 Note: If an object containing an unsupported type was marshalled with
1681 dump(), load() will substitute None for the unmarshallable type.
1682 [clinic start generated code]*/
1683 
1684 static PyObject *
marshal_load(PyObject * module,PyObject * file)1685 marshal_load(PyObject *module, PyObject *file)
1686 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1687 {
1688     PyObject *data, *result;
1689     _Py_IDENTIFIER(read);
1690     RFILE rf;
1691 
1692     /*
1693      * Make a call to the read method, but read zero bytes.
1694      * This is to ensure that the object passed in at least
1695      * has a read method which returns bytes.
1696      * This can be removed if we guarantee good error handling
1697      * for r_string()
1698      */
1699     data = _PyObject_CallMethodId(file, &PyId_read, "i", 0);
1700     if (data == NULL)
1701         return NULL;
1702     if (!PyBytes_Check(data)) {
1703         PyErr_Format(PyExc_TypeError,
1704                      "file.read() returned not bytes but %.100s",
1705                      data->ob_type->tp_name);
1706         result = NULL;
1707     }
1708     else {
1709         rf.depth = 0;
1710         rf.fp = NULL;
1711         rf.readable = file;
1712         rf.ptr = rf.end = NULL;
1713         rf.buf = NULL;
1714         if ((rf.refs = PyList_New(0)) != NULL) {
1715             result = read_object(&rf);
1716             Py_DECREF(rf.refs);
1717             if (rf.buf != NULL)
1718                 PyMem_FREE(rf.buf);
1719         } else
1720             result = NULL;
1721     }
1722     Py_DECREF(data);
1723     return result;
1724 }
1725 
1726 /*[clinic input]
1727 marshal.dumps
1728 
1729     value: object
1730         Must be a supported type.
1731     version: int(c_default="Py_MARSHAL_VERSION") = version
1732         Indicates the data format that dumps should use.
1733     /
1734 
1735 Return the bytes object that would be written to a file by dump(value, file).
1736 
1737 Raise a ValueError exception if value has (or contains an object that has) an
1738 unsupported type.
1739 [clinic start generated code]*/
1740 
1741 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1742 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1743 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1744 {
1745     return PyMarshal_WriteObjectToString(value, version);
1746 }
1747 
1748 /*[clinic input]
1749 marshal.loads
1750 
1751     bytes: Py_buffer
1752     /
1753 
1754 Convert the bytes-like object to a value.
1755 
1756 If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
1757 bytes in the input are ignored.
1758 [clinic start generated code]*/
1759 
1760 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1761 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1762 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1763 {
1764     RFILE rf;
1765     char *s = bytes->buf;
1766     Py_ssize_t n = bytes->len;
1767     PyObject* result;
1768     rf.fp = NULL;
1769     rf.readable = NULL;
1770     rf.ptr = s;
1771     rf.end = s + n;
1772     rf.depth = 0;
1773     if ((rf.refs = PyList_New(0)) == NULL)
1774         return NULL;
1775     result = read_object(&rf);
1776     Py_DECREF(rf.refs);
1777     return result;
1778 }
1779 
1780 static PyMethodDef marshal_methods[] = {
1781     MARSHAL_DUMP_METHODDEF
1782     MARSHAL_LOAD_METHODDEF
1783     MARSHAL_DUMPS_METHODDEF
1784     MARSHAL_LOADS_METHODDEF
1785     {NULL,              NULL}           /* sentinel */
1786 };
1787 
1788 
1789 PyDoc_STRVAR(module_doc,
1790 "This module contains functions that can read and write Python values in\n\
1791 a binary format. The format is specific to Python, but independent of\n\
1792 machine architecture issues.\n\
1793 \n\
1794 Not all Python object types are supported; in general, only objects\n\
1795 whose value is independent from a particular invocation of Python can be\n\
1796 written and read by this module. The following types are supported:\n\
1797 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1798 tuples, lists, sets, dictionaries, and code objects, where it\n\
1799 should be understood that tuples, lists and dictionaries are only\n\
1800 supported as long as the values contained therein are themselves\n\
1801 supported; and recursive lists and dictionaries should not be written\n\
1802 (they will cause infinite loops).\n\
1803 \n\
1804 Variables:\n\
1805 \n\
1806 version -- indicates the format that the module uses. Version 0 is the\n\
1807     historical format, version 1 shares interned strings and version 2\n\
1808     uses a binary format for floating point numbers.\n\
1809     Version 3 shares common object references (New in version 3.4).\n\
1810 \n\
1811 Functions:\n\
1812 \n\
1813 dump() -- write value to a file\n\
1814 load() -- read value from a file\n\
1815 dumps() -- marshal value as a bytes object\n\
1816 loads() -- read value from a bytes-like object");
1817 
1818 
1819 
1820 static struct PyModuleDef marshalmodule = {
1821     PyModuleDef_HEAD_INIT,
1822     "marshal",
1823     module_doc,
1824     0,
1825     marshal_methods,
1826     NULL,
1827     NULL,
1828     NULL,
1829     NULL
1830 };
1831 
1832 PyMODINIT_FUNC
PyMarshal_Init(void)1833 PyMarshal_Init(void)
1834 {
1835     PyObject *mod = PyModule_Create(&marshalmodule);
1836     if (mod == NULL)
1837         return NULL;
1838     if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1839         Py_DECREF(mod);
1840         return NULL;
1841     }
1842     return mod;
1843 }
1844