1 
2 /* Write Python objects to files and read them back.
3    This is primarily intended for writing and reading compiled Python code,
4    even though dicts, lists, sets and frozensets, not commonly seen in
5    code objects, are supported.
6    Version 3 of this protocol properly supports circular links
7    and sharing. */
8 
9 #define PY_SSIZE_T_CLEAN
10 
11 #include "Python.h"
12 #include "longintrepr.h"
13 #include "code.h"
14 #include "marshal.h"
15 #include "pycore_hashtable.h"
16 
17 /*[clinic input]
18 module marshal
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
21 
22 #include "clinic/marshal.c.h"
23 
24 /* High water mark to determine when the marshalled object is dangerously deep
25  * and risks coring the interpreter.  When the object stack gets this deep,
26  * raise an exception instead of continuing.
27  * On Windows debug builds, reduce this value.
28  *
29  * BUG: https://bugs.python.org/issue33720
30  * On Windows PGO builds, the r_object function overallocates its stack and
31  * can cause a stack overflow. We reduce the maximum depth for all Windows
32  * releases to protect against this.
33  * #if defined(MS_WINDOWS) && defined(_DEBUG)
34  */
35 #if defined(MS_WINDOWS)
36 #define MAX_MARSHAL_STACK_DEPTH 1000
37 #else
38 #define MAX_MARSHAL_STACK_DEPTH 2000
39 #endif
40 
41 #define TYPE_NULL               '0'
42 #define TYPE_NONE               'N'
43 #define TYPE_FALSE              'F'
44 #define TYPE_TRUE               'T'
45 #define TYPE_STOPITER           'S'
46 #define TYPE_ELLIPSIS           '.'
47 #define TYPE_INT                'i'
48 /* TYPE_INT64 is not generated anymore.
49    Supported for backward compatibility only. */
50 #define TYPE_INT64              'I'
51 #define TYPE_FLOAT              'f'
52 #define TYPE_BINARY_FLOAT       'g'
53 #define TYPE_COMPLEX            'x'
54 #define TYPE_BINARY_COMPLEX     'y'
55 #define TYPE_LONG               'l'
56 #define TYPE_STRING             's'
57 #define TYPE_INTERNED           't'
58 #define TYPE_REF                'r'
59 #define TYPE_TUPLE              '('
60 #define TYPE_LIST               '['
61 #define TYPE_DICT               '{'
62 #define TYPE_CODE               'c'
63 #define TYPE_UNICODE            'u'
64 #define TYPE_UNKNOWN            '?'
65 #define TYPE_SET                '<'
66 #define TYPE_FROZENSET          '>'
67 #define FLAG_REF                '\x80' /* with a type, add obj to index */
68 
69 #define TYPE_ASCII              'a'
70 #define TYPE_ASCII_INTERNED     'A'
71 #define TYPE_SMALL_TUPLE        ')'
72 #define TYPE_SHORT_ASCII        'z'
73 #define TYPE_SHORT_ASCII_INTERNED 'Z'
74 
75 #define WFERR_OK 0
76 #define WFERR_UNMARSHALLABLE 1
77 #define WFERR_NESTEDTOODEEP 2
78 #define WFERR_NOMEMORY 3
79 
80 typedef struct {
81     FILE *fp;
82     int error;  /* see WFERR_* values */
83     int depth;
84     PyObject *str;
85     char *ptr;
86     const char *end;
87     char *buf;
88     _Py_hashtable_t *hashtable;
89     int version;
90 } WFILE;
91 
92 #define w_byte(c, p) do {                               \
93         if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
94             *(p)->ptr++ = (c);                          \
95     } while(0)
96 
97 static void
w_flush(WFILE * p)98 w_flush(WFILE *p)
99 {
100     assert(p->fp != NULL);
101     fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
102     p->ptr = p->buf;
103 }
104 
105 static int
w_reserve(WFILE * p,Py_ssize_t needed)106 w_reserve(WFILE *p, Py_ssize_t needed)
107 {
108     Py_ssize_t pos, size, delta;
109     if (p->ptr == NULL)
110         return 0; /* An error already occurred */
111     if (p->fp != NULL) {
112         w_flush(p);
113         return needed <= p->end - p->ptr;
114     }
115     assert(p->str != NULL);
116     pos = p->ptr - p->buf;
117     size = PyBytes_GET_SIZE(p->str);
118     if (size > 16*1024*1024)
119         delta = (size >> 3);            /* 12.5% overallocation */
120     else
121         delta = size + 1024;
122     delta = Py_MAX(delta, needed);
123     if (delta > PY_SSIZE_T_MAX - size) {
124         p->error = WFERR_NOMEMORY;
125         return 0;
126     }
127     size += delta;
128     if (_PyBytes_Resize(&p->str, size) != 0) {
129         p->end = p->ptr = p->buf = NULL;
130         return 0;
131     }
132     else {
133         p->buf = PyBytes_AS_STRING(p->str);
134         p->ptr = p->buf + pos;
135         p->end = p->buf + size;
136         return 1;
137     }
138 }
139 
140 static void
w_string(const void * s,Py_ssize_t n,WFILE * p)141 w_string(const void *s, Py_ssize_t n, WFILE *p)
142 {
143     Py_ssize_t m;
144     if (!n || p->ptr == NULL)
145         return;
146     m = p->end - p->ptr;
147     if (p->fp != NULL) {
148         if (n <= m) {
149             memcpy(p->ptr, s, n);
150             p->ptr += n;
151         }
152         else {
153             w_flush(p);
154             fwrite(s, 1, n, p->fp);
155         }
156     }
157     else {
158         if (n <= m || w_reserve(p, n - m)) {
159             memcpy(p->ptr, s, n);
160             p->ptr += n;
161         }
162     }
163 }
164 
165 static void
w_short(int x,WFILE * p)166 w_short(int x, WFILE *p)
167 {
168     w_byte((char)( x      & 0xff), p);
169     w_byte((char)((x>> 8) & 0xff), p);
170 }
171 
172 static void
w_long(long x,WFILE * p)173 w_long(long x, WFILE *p)
174 {
175     w_byte((char)( x      & 0xff), p);
176     w_byte((char)((x>> 8) & 0xff), p);
177     w_byte((char)((x>>16) & 0xff), p);
178     w_byte((char)((x>>24) & 0xff), p);
179 }
180 
181 #define SIZE32_MAX  0x7FFFFFFF
182 
183 #if SIZEOF_SIZE_T > 4
184 # define W_SIZE(n, p)  do {                     \
185         if ((n) > SIZE32_MAX) {                 \
186             (p)->depth--;                       \
187             (p)->error = WFERR_UNMARSHALLABLE;  \
188             return;                             \
189         }                                       \
190         w_long((long)(n), p);                   \
191     } while(0)
192 #else
193 # define W_SIZE  w_long
194 #endif
195 
196 static void
w_pstring(const void * s,Py_ssize_t n,WFILE * p)197 w_pstring(const void *s, Py_ssize_t n, WFILE *p)
198 {
199         W_SIZE(n, p);
200         w_string(s, n, p);
201 }
202 
203 static void
w_short_pstring(const void * s,Py_ssize_t n,WFILE * p)204 w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
205 {
206     w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
207     w_string(s, n, p);
208 }
209 
210 /* We assume that Python ints are stored internally in base some power of
211    2**15; for the sake of portability we'll always read and write them in base
212    exactly 2**15. */
213 
214 #define PyLong_MARSHAL_SHIFT 15
215 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
216 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
217 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
218 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
219 #endif
220 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
221 
222 #define W_TYPE(t, p) do { \
223     w_byte((t) | flag, (p)); \
224 } while(0)
225 
226 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)227 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
228 {
229     Py_ssize_t i, j, n, l;
230     digit d;
231 
232     W_TYPE(TYPE_LONG, p);
233     if (Py_SIZE(ob) == 0) {
234         w_long((long)0, p);
235         return;
236     }
237 
238     /* set l to number of base PyLong_MARSHAL_BASE digits */
239     n = Py_ABS(Py_SIZE(ob));
240     l = (n-1) * PyLong_MARSHAL_RATIO;
241     d = ob->ob_digit[n-1];
242     assert(d != 0); /* a PyLong is always normalized */
243     do {
244         d >>= PyLong_MARSHAL_SHIFT;
245         l++;
246     } while (d != 0);
247     if (l > SIZE32_MAX) {
248         p->depth--;
249         p->error = WFERR_UNMARSHALLABLE;
250         return;
251     }
252     w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
253 
254     for (i=0; i < n-1; i++) {
255         d = ob->ob_digit[i];
256         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
257             w_short(d & PyLong_MARSHAL_MASK, p);
258             d >>= PyLong_MARSHAL_SHIFT;
259         }
260         assert (d == 0);
261     }
262     d = ob->ob_digit[n-1];
263     do {
264         w_short(d & PyLong_MARSHAL_MASK, p);
265         d >>= PyLong_MARSHAL_SHIFT;
266     } while (d != 0);
267 }
268 
269 static void
w_float_bin(double v,WFILE * p)270 w_float_bin(double v, WFILE *p)
271 {
272     unsigned char buf[8];
273     if (_PyFloat_Pack8(v, buf, 1) < 0) {
274         p->error = WFERR_UNMARSHALLABLE;
275         return;
276     }
277     w_string(buf, 8, p);
278 }
279 
280 static void
w_float_str(double v,WFILE * p)281 w_float_str(double v, WFILE *p)
282 {
283     char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
284     if (!buf) {
285         p->error = WFERR_NOMEMORY;
286         return;
287     }
288     w_short_pstring(buf, strlen(buf), p);
289     PyMem_Free(buf);
290 }
291 
292 static int
w_ref(PyObject * v,char * flag,WFILE * p)293 w_ref(PyObject *v, char *flag, WFILE *p)
294 {
295     _Py_hashtable_entry_t *entry;
296     int w;
297 
298     if (p->version < 3 || p->hashtable == NULL)
299         return 0; /* not writing object references */
300 
301     /* if it has only one reference, it definitely isn't shared */
302     if (Py_REFCNT(v) == 1)
303         return 0;
304 
305     entry = _Py_hashtable_get_entry(p->hashtable, v);
306     if (entry != NULL) {
307         /* write the reference index to the stream */
308         w = (int)(uintptr_t)entry->value;
309         /* we don't store "long" indices in the dict */
310         assert(0 <= w && w <= 0x7fffffff);
311         w_byte(TYPE_REF, p);
312         w_long(w, p);
313         return 1;
314     } else {
315         size_t s = p->hashtable->nentries;
316         /* we don't support long indices */
317         if (s >= 0x7fffffff) {
318             PyErr_SetString(PyExc_ValueError, "too many objects");
319             goto err;
320         }
321         w = (int)s;
322         Py_INCREF(v);
323         if (_Py_hashtable_set(p->hashtable, v, (void *)(uintptr_t)w) < 0) {
324             Py_DECREF(v);
325             goto err;
326         }
327         *flag |= FLAG_REF;
328         return 0;
329     }
330 err:
331     p->error = WFERR_UNMARSHALLABLE;
332     return 1;
333 }
334 
335 static void
336 w_complex_object(PyObject *v, char flag, WFILE *p);
337 
338 static void
w_object(PyObject * v,WFILE * p)339 w_object(PyObject *v, WFILE *p)
340 {
341     char flag = '\0';
342 
343     p->depth++;
344 
345     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
346         p->error = WFERR_NESTEDTOODEEP;
347     }
348     else if (v == NULL) {
349         w_byte(TYPE_NULL, p);
350     }
351     else if (v == Py_None) {
352         w_byte(TYPE_NONE, p);
353     }
354     else if (v == PyExc_StopIteration) {
355         w_byte(TYPE_STOPITER, p);
356     }
357     else if (v == Py_Ellipsis) {
358         w_byte(TYPE_ELLIPSIS, p);
359     }
360     else if (v == Py_False) {
361         w_byte(TYPE_FALSE, p);
362     }
363     else if (v == Py_True) {
364         w_byte(TYPE_TRUE, p);
365     }
366     else if (!w_ref(v, &flag, p))
367         w_complex_object(v, flag, p);
368 
369     p->depth--;
370 }
371 
372 static void
w_complex_object(PyObject * v,char flag,WFILE * p)373 w_complex_object(PyObject *v, char flag, WFILE *p)
374 {
375     Py_ssize_t i, n;
376 
377     if (PyLong_CheckExact(v)) {
378         int overflow;
379         long x = PyLong_AsLongAndOverflow(v, &overflow);
380         if (overflow) {
381             w_PyLong((PyLongObject *)v, flag, p);
382         }
383         else {
384 #if SIZEOF_LONG > 4
385             long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
386             if (y && y != -1) {
387                 /* Too large for TYPE_INT */
388                 w_PyLong((PyLongObject*)v, flag, p);
389             }
390             else
391 #endif
392             {
393                 W_TYPE(TYPE_INT, p);
394                 w_long(x, p);
395             }
396         }
397     }
398     else if (PyFloat_CheckExact(v)) {
399         if (p->version > 1) {
400             W_TYPE(TYPE_BINARY_FLOAT, p);
401             w_float_bin(PyFloat_AS_DOUBLE(v), p);
402         }
403         else {
404             W_TYPE(TYPE_FLOAT, p);
405             w_float_str(PyFloat_AS_DOUBLE(v), p);
406         }
407     }
408     else if (PyComplex_CheckExact(v)) {
409         if (p->version > 1) {
410             W_TYPE(TYPE_BINARY_COMPLEX, p);
411             w_float_bin(PyComplex_RealAsDouble(v), p);
412             w_float_bin(PyComplex_ImagAsDouble(v), p);
413         }
414         else {
415             W_TYPE(TYPE_COMPLEX, p);
416             w_float_str(PyComplex_RealAsDouble(v), p);
417             w_float_str(PyComplex_ImagAsDouble(v), p);
418         }
419     }
420     else if (PyBytes_CheckExact(v)) {
421         W_TYPE(TYPE_STRING, p);
422         w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
423     }
424     else if (PyUnicode_CheckExact(v)) {
425         if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
426             int is_short = PyUnicode_GET_LENGTH(v) < 256;
427             if (is_short) {
428                 if (PyUnicode_CHECK_INTERNED(v))
429                     W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
430                 else
431                     W_TYPE(TYPE_SHORT_ASCII, p);
432                 w_short_pstring(PyUnicode_1BYTE_DATA(v),
433                                 PyUnicode_GET_LENGTH(v), p);
434             }
435             else {
436                 if (PyUnicode_CHECK_INTERNED(v))
437                     W_TYPE(TYPE_ASCII_INTERNED, p);
438                 else
439                     W_TYPE(TYPE_ASCII, p);
440                 w_pstring(PyUnicode_1BYTE_DATA(v),
441                           PyUnicode_GET_LENGTH(v), p);
442             }
443         }
444         else {
445             PyObject *utf8;
446             utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
447             if (utf8 == NULL) {
448                 p->depth--;
449                 p->error = WFERR_UNMARSHALLABLE;
450                 return;
451             }
452             if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
453                 W_TYPE(TYPE_INTERNED, p);
454             else
455                 W_TYPE(TYPE_UNICODE, p);
456             w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
457             Py_DECREF(utf8);
458         }
459     }
460     else if (PyTuple_CheckExact(v)) {
461         n = PyTuple_GET_SIZE(v);
462         if (p->version >= 4 && n < 256) {
463             W_TYPE(TYPE_SMALL_TUPLE, p);
464             w_byte((unsigned char)n, p);
465         }
466         else {
467             W_TYPE(TYPE_TUPLE, p);
468             W_SIZE(n, p);
469         }
470         for (i = 0; i < n; i++) {
471             w_object(PyTuple_GET_ITEM(v, i), p);
472         }
473     }
474     else if (PyList_CheckExact(v)) {
475         W_TYPE(TYPE_LIST, p);
476         n = PyList_GET_SIZE(v);
477         W_SIZE(n, p);
478         for (i = 0; i < n; i++) {
479             w_object(PyList_GET_ITEM(v, i), p);
480         }
481     }
482     else if (PyDict_CheckExact(v)) {
483         Py_ssize_t pos;
484         PyObject *key, *value;
485         W_TYPE(TYPE_DICT, p);
486         /* This one is NULL object terminated! */
487         pos = 0;
488         while (PyDict_Next(v, &pos, &key, &value)) {
489             w_object(key, p);
490             w_object(value, p);
491         }
492         w_object((PyObject *)NULL, p);
493     }
494     else if (PyAnySet_CheckExact(v)) {
495         PyObject *value;
496         Py_ssize_t pos = 0;
497         Py_hash_t hash;
498 
499         if (PyFrozenSet_CheckExact(v))
500             W_TYPE(TYPE_FROZENSET, p);
501         else
502             W_TYPE(TYPE_SET, p);
503         n = PySet_GET_SIZE(v);
504         W_SIZE(n, p);
505         while (_PySet_NextEntry(v, &pos, &value, &hash)) {
506             w_object(value, p);
507         }
508     }
509     else if (PyCode_Check(v)) {
510         PyCodeObject *co = (PyCodeObject *)v;
511         W_TYPE(TYPE_CODE, p);
512         w_long(co->co_argcount, p);
513         w_long(co->co_posonlyargcount, p);
514         w_long(co->co_kwonlyargcount, p);
515         w_long(co->co_nlocals, p);
516         w_long(co->co_stacksize, p);
517         w_long(co->co_flags, p);
518         w_object(co->co_code, p);
519         w_object(co->co_consts, p);
520         w_object(co->co_names, p);
521         w_object(co->co_varnames, p);
522         w_object(co->co_freevars, p);
523         w_object(co->co_cellvars, p);
524         w_object(co->co_filename, p);
525         w_object(co->co_name, p);
526         w_long(co->co_firstlineno, p);
527         w_object(co->co_linetable, p);
528     }
529     else if (PyObject_CheckBuffer(v)) {
530         /* Write unknown bytes-like objects as a bytes object */
531         Py_buffer view;
532         if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
533             w_byte(TYPE_UNKNOWN, p);
534             p->depth--;
535             p->error = WFERR_UNMARSHALLABLE;
536             return;
537         }
538         W_TYPE(TYPE_STRING, p);
539         w_pstring(view.buf, view.len, p);
540         PyBuffer_Release(&view);
541     }
542     else {
543         W_TYPE(TYPE_UNKNOWN, p);
544         p->error = WFERR_UNMARSHALLABLE;
545     }
546 }
547 
548 static void
w_decref_entry(void * key)549 w_decref_entry(void *key)
550 {
551     PyObject *entry_key = (PyObject *)key;
552     Py_XDECREF(entry_key);
553 }
554 
555 static int
w_init_refs(WFILE * wf,int version)556 w_init_refs(WFILE *wf, int version)
557 {
558     if (version >= 3) {
559         wf->hashtable = _Py_hashtable_new_full(_Py_hashtable_hash_ptr,
560                                                _Py_hashtable_compare_direct,
561                                                w_decref_entry, NULL, NULL);
562         if (wf->hashtable == NULL) {
563             PyErr_NoMemory();
564             return -1;
565         }
566     }
567     return 0;
568 }
569 
570 static void
w_clear_refs(WFILE * wf)571 w_clear_refs(WFILE *wf)
572 {
573     if (wf->hashtable != NULL) {
574         _Py_hashtable_destroy(wf->hashtable);
575     }
576 }
577 
578 /* version currently has no effect for writing ints. */
579 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)580 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
581 {
582     char buf[4];
583     WFILE wf;
584     memset(&wf, 0, sizeof(wf));
585     wf.fp = fp;
586     wf.ptr = wf.buf = buf;
587     wf.end = wf.ptr + sizeof(buf);
588     wf.error = WFERR_OK;
589     wf.version = version;
590     w_long(x, &wf);
591     w_flush(&wf);
592 }
593 
594 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)595 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
596 {
597     char buf[BUFSIZ];
598     WFILE wf;
599     if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
600         return; /* caller must check PyErr_Occurred() */
601     }
602     memset(&wf, 0, sizeof(wf));
603     wf.fp = fp;
604     wf.ptr = wf.buf = buf;
605     wf.end = wf.ptr + sizeof(buf);
606     wf.error = WFERR_OK;
607     wf.version = version;
608     if (w_init_refs(&wf, version)) {
609         return; /* caller must check PyErr_Occurred() */
610     }
611     w_object(x, &wf);
612     w_clear_refs(&wf);
613     w_flush(&wf);
614 }
615 
616 typedef struct {
617     FILE *fp;
618     int depth;
619     PyObject *readable;  /* Stream-like object being read from */
620     const char *ptr;
621     const char *end;
622     char *buf;
623     Py_ssize_t buf_size;
624     PyObject *refs;  /* a list */
625 } RFILE;
626 
627 static const char *
r_string(Py_ssize_t n,RFILE * p)628 r_string(Py_ssize_t n, RFILE *p)
629 {
630     Py_ssize_t read = -1;
631 
632     if (p->ptr != NULL) {
633         /* Fast path for loads() */
634         const char *res = p->ptr;
635         Py_ssize_t left = p->end - p->ptr;
636         if (left < n) {
637             PyErr_SetString(PyExc_EOFError,
638                             "marshal data too short");
639             return NULL;
640         }
641         p->ptr += n;
642         return res;
643     }
644     if (p->buf == NULL) {
645         p->buf = PyMem_Malloc(n);
646         if (p->buf == NULL) {
647             PyErr_NoMemory();
648             return NULL;
649         }
650         p->buf_size = n;
651     }
652     else if (p->buf_size < n) {
653         char *tmp = PyMem_Realloc(p->buf, n);
654         if (tmp == NULL) {
655             PyErr_NoMemory();
656             return NULL;
657         }
658         p->buf = tmp;
659         p->buf_size = n;
660     }
661 
662     if (!p->readable) {
663         assert(p->fp != NULL);
664         read = fread(p->buf, 1, n, p->fp);
665     }
666     else {
667         _Py_IDENTIFIER(readinto);
668         PyObject *res, *mview;
669         Py_buffer buf;
670 
671         if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
672             return NULL;
673         mview = PyMemoryView_FromBuffer(&buf);
674         if (mview == NULL)
675             return NULL;
676 
677         res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
678         if (res != NULL) {
679             read = PyNumber_AsSsize_t(res, PyExc_ValueError);
680             Py_DECREF(res);
681         }
682     }
683     if (read != n) {
684         if (!PyErr_Occurred()) {
685             if (read > n)
686                 PyErr_Format(PyExc_ValueError,
687                              "read() returned too much data: "
688                              "%zd bytes requested, %zd returned",
689                              n, read);
690             else
691                 PyErr_SetString(PyExc_EOFError,
692                                 "EOF read where not expected");
693         }
694         return NULL;
695     }
696     return p->buf;
697 }
698 
699 static int
r_byte(RFILE * p)700 r_byte(RFILE *p)
701 {
702     int c = EOF;
703 
704     if (p->ptr != NULL) {
705         if (p->ptr < p->end)
706             c = (unsigned char) *p->ptr++;
707         return c;
708     }
709     if (!p->readable) {
710         assert(p->fp);
711         c = getc(p->fp);
712     }
713     else {
714         const char *ptr = r_string(1, p);
715         if (ptr != NULL)
716             c = *(const unsigned char *) ptr;
717     }
718     return c;
719 }
720 
721 static int
r_short(RFILE * p)722 r_short(RFILE *p)
723 {
724     short x = -1;
725     const unsigned char *buffer;
726 
727     buffer = (const unsigned char *) r_string(2, p);
728     if (buffer != NULL) {
729         x = buffer[0];
730         x |= buffer[1] << 8;
731         /* Sign-extension, in case short greater than 16 bits */
732         x |= -(x & 0x8000);
733     }
734     return x;
735 }
736 
737 static long
r_long(RFILE * p)738 r_long(RFILE *p)
739 {
740     long x = -1;
741     const unsigned char *buffer;
742 
743     buffer = (const unsigned char *) r_string(4, p);
744     if (buffer != NULL) {
745         x = buffer[0];
746         x |= (long)buffer[1] << 8;
747         x |= (long)buffer[2] << 16;
748         x |= (long)buffer[3] << 24;
749 #if SIZEOF_LONG > 4
750         /* Sign extension for 64-bit machines */
751         x |= -(x & 0x80000000L);
752 #endif
753     }
754     return x;
755 }
756 
757 /* r_long64 deals with the TYPE_INT64 code. */
758 static PyObject *
r_long64(RFILE * p)759 r_long64(RFILE *p)
760 {
761     const unsigned char *buffer = (const unsigned char *) r_string(8, p);
762     if (buffer == NULL) {
763         return NULL;
764     }
765     return _PyLong_FromByteArray(buffer, 8,
766                                  1 /* little endian */,
767                                  1 /* signed */);
768 }
769 
770 static PyObject *
r_PyLong(RFILE * p)771 r_PyLong(RFILE *p)
772 {
773     PyLongObject *ob;
774     long n, size, i;
775     int j, md, shorts_in_top_digit;
776     digit d;
777 
778     n = r_long(p);
779     if (PyErr_Occurred())
780         return NULL;
781     if (n == 0)
782         return (PyObject *)_PyLong_New(0);
783     if (n < -SIZE32_MAX || n > SIZE32_MAX) {
784         PyErr_SetString(PyExc_ValueError,
785                        "bad marshal data (long size out of range)");
786         return NULL;
787     }
788 
789     size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
790     shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
791     ob = _PyLong_New(size);
792     if (ob == NULL)
793         return NULL;
794 
795     Py_SET_SIZE(ob, n > 0 ? size : -size);
796 
797     for (i = 0; i < size-1; i++) {
798         d = 0;
799         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
800             md = r_short(p);
801             if (PyErr_Occurred()) {
802                 Py_DECREF(ob);
803                 return NULL;
804             }
805             if (md < 0 || md > PyLong_MARSHAL_BASE)
806                 goto bad_digit;
807             d += (digit)md << j*PyLong_MARSHAL_SHIFT;
808         }
809         ob->ob_digit[i] = d;
810     }
811 
812     d = 0;
813     for (j=0; j < shorts_in_top_digit; j++) {
814         md = r_short(p);
815         if (PyErr_Occurred()) {
816             Py_DECREF(ob);
817             return NULL;
818         }
819         if (md < 0 || md > PyLong_MARSHAL_BASE)
820             goto bad_digit;
821         /* topmost marshal digit should be nonzero */
822         if (md == 0 && j == shorts_in_top_digit - 1) {
823             Py_DECREF(ob);
824             PyErr_SetString(PyExc_ValueError,
825                 "bad marshal data (unnormalized long data)");
826             return NULL;
827         }
828         d += (digit)md << j*PyLong_MARSHAL_SHIFT;
829     }
830     if (PyErr_Occurred()) {
831         Py_DECREF(ob);
832         return NULL;
833     }
834     /* top digit should be nonzero, else the resulting PyLong won't be
835        normalized */
836     ob->ob_digit[size-1] = d;
837     return (PyObject *)ob;
838   bad_digit:
839     Py_DECREF(ob);
840     PyErr_SetString(PyExc_ValueError,
841                     "bad marshal data (digit out of range in long)");
842     return NULL;
843 }
844 
845 static double
r_float_bin(RFILE * p)846 r_float_bin(RFILE *p)
847 {
848     const unsigned char *buf = (const unsigned char *) r_string(8, p);
849     if (buf == NULL)
850         return -1;
851     return _PyFloat_Unpack8(buf, 1);
852 }
853 
854 /* Issue #33720: Disable inlining for reducing the C stack consumption
855    on PGO builds. */
856 _Py_NO_INLINE static double
r_float_str(RFILE * p)857 r_float_str(RFILE *p)
858 {
859     int n;
860     char buf[256];
861     const char *ptr;
862     n = r_byte(p);
863     if (n == EOF) {
864         PyErr_SetString(PyExc_EOFError,
865             "EOF read where object expected");
866         return -1;
867     }
868     ptr = r_string(n, p);
869     if (ptr == NULL) {
870         return -1;
871     }
872     memcpy(buf, ptr, n);
873     buf[n] = '\0';
874     return PyOS_string_to_double(buf, NULL, NULL);
875 }
876 
877 /* allocate the reflist index for a new object. Return -1 on failure */
878 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)879 r_ref_reserve(int flag, RFILE *p)
880 {
881     if (flag) { /* currently only FLAG_REF is defined */
882         Py_ssize_t idx = PyList_GET_SIZE(p->refs);
883         if (idx >= 0x7ffffffe) {
884             PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
885             return -1;
886         }
887         if (PyList_Append(p->refs, Py_None) < 0)
888             return -1;
889         return idx;
890     } else
891         return 0;
892 }
893 
894 /* insert the new object 'o' to the reflist at previously
895  * allocated index 'idx'.
896  * 'o' can be NULL, in which case nothing is done.
897  * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
898  * if 'o' was non-NULL, and the function fails, 'o' is released and
899  * NULL returned. This simplifies error checking at the call site since
900  * a single test for NULL for the function result is enough.
901  */
902 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)903 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
904 {
905     if (o != NULL && flag) { /* currently only FLAG_REF is defined */
906         PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
907         Py_INCREF(o);
908         PyList_SET_ITEM(p->refs, idx, o);
909         Py_DECREF(tmp);
910     }
911     return o;
912 }
913 
914 /* combination of both above, used when an object can be
915  * created whenever it is seen in the file, as opposed to
916  * after having loaded its sub-objects.
917  */
918 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)919 r_ref(PyObject *o, int flag, RFILE *p)
920 {
921     assert(flag & FLAG_REF);
922     if (o == NULL)
923         return NULL;
924     if (PyList_Append(p->refs, o) < 0) {
925         Py_DECREF(o); /* release the new object */
926         return NULL;
927     }
928     return o;
929 }
930 
931 static PyObject *
r_object(RFILE * p)932 r_object(RFILE *p)
933 {
934     /* NULL is a valid return value, it does not necessarily means that
935        an exception is set. */
936     PyObject *v, *v2;
937     Py_ssize_t idx = 0;
938     long i, n;
939     int type, code = r_byte(p);
940     int flag, is_interned = 0;
941     PyObject *retval = NULL;
942 
943     if (code == EOF) {
944         PyErr_SetString(PyExc_EOFError,
945                         "EOF read where object expected");
946         return NULL;
947     }
948 
949     p->depth++;
950 
951     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
952         p->depth--;
953         PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
954         return NULL;
955     }
956 
957     flag = code & FLAG_REF;
958     type = code & ~FLAG_REF;
959 
960 #define R_REF(O) do{\
961     if (flag) \
962         O = r_ref(O, flag, p);\
963 } while (0)
964 
965     switch (type) {
966 
967     case TYPE_NULL:
968         break;
969 
970     case TYPE_NONE:
971         Py_INCREF(Py_None);
972         retval = Py_None;
973         break;
974 
975     case TYPE_STOPITER:
976         Py_INCREF(PyExc_StopIteration);
977         retval = PyExc_StopIteration;
978         break;
979 
980     case TYPE_ELLIPSIS:
981         Py_INCREF(Py_Ellipsis);
982         retval = Py_Ellipsis;
983         break;
984 
985     case TYPE_FALSE:
986         Py_INCREF(Py_False);
987         retval = Py_False;
988         break;
989 
990     case TYPE_TRUE:
991         Py_INCREF(Py_True);
992         retval = Py_True;
993         break;
994 
995     case TYPE_INT:
996         n = r_long(p);
997         retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
998         R_REF(retval);
999         break;
1000 
1001     case TYPE_INT64:
1002         retval = r_long64(p);
1003         R_REF(retval);
1004         break;
1005 
1006     case TYPE_LONG:
1007         retval = r_PyLong(p);
1008         R_REF(retval);
1009         break;
1010 
1011     case TYPE_FLOAT:
1012         {
1013             double x = r_float_str(p);
1014             if (x == -1.0 && PyErr_Occurred())
1015                 break;
1016             retval = PyFloat_FromDouble(x);
1017             R_REF(retval);
1018             break;
1019         }
1020 
1021     case TYPE_BINARY_FLOAT:
1022         {
1023             double x = r_float_bin(p);
1024             if (x == -1.0 && PyErr_Occurred())
1025                 break;
1026             retval = PyFloat_FromDouble(x);
1027             R_REF(retval);
1028             break;
1029         }
1030 
1031     case TYPE_COMPLEX:
1032         {
1033             Py_complex c;
1034             c.real = r_float_str(p);
1035             if (c.real == -1.0 && PyErr_Occurred())
1036                 break;
1037             c.imag = r_float_str(p);
1038             if (c.imag == -1.0 && PyErr_Occurred())
1039                 break;
1040             retval = PyComplex_FromCComplex(c);
1041             R_REF(retval);
1042             break;
1043         }
1044 
1045     case TYPE_BINARY_COMPLEX:
1046         {
1047             Py_complex c;
1048             c.real = r_float_bin(p);
1049             if (c.real == -1.0 && PyErr_Occurred())
1050                 break;
1051             c.imag = r_float_bin(p);
1052             if (c.imag == -1.0 && PyErr_Occurred())
1053                 break;
1054             retval = PyComplex_FromCComplex(c);
1055             R_REF(retval);
1056             break;
1057         }
1058 
1059     case TYPE_STRING:
1060         {
1061             const char *ptr;
1062             n = r_long(p);
1063             if (PyErr_Occurred())
1064                 break;
1065             if (n < 0 || n > SIZE32_MAX) {
1066                 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1067                 break;
1068             }
1069             v = PyBytes_FromStringAndSize((char *)NULL, n);
1070             if (v == NULL)
1071                 break;
1072             ptr = r_string(n, p);
1073             if (ptr == NULL) {
1074                 Py_DECREF(v);
1075                 break;
1076             }
1077             memcpy(PyBytes_AS_STRING(v), ptr, n);
1078             retval = v;
1079             R_REF(retval);
1080             break;
1081         }
1082 
1083     case TYPE_ASCII_INTERNED:
1084         is_interned = 1;
1085         /* fall through */
1086     case TYPE_ASCII:
1087         n = r_long(p);
1088         if (PyErr_Occurred())
1089             break;
1090         if (n < 0 || n > SIZE32_MAX) {
1091             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1092             break;
1093         }
1094         goto _read_ascii;
1095 
1096     case TYPE_SHORT_ASCII_INTERNED:
1097         is_interned = 1;
1098         /* fall through */
1099     case TYPE_SHORT_ASCII:
1100         n = r_byte(p);
1101         if (n == EOF) {
1102             PyErr_SetString(PyExc_EOFError,
1103                 "EOF read where object expected");
1104             break;
1105         }
1106     _read_ascii:
1107         {
1108             const char *ptr;
1109             ptr = r_string(n, p);
1110             if (ptr == NULL)
1111                 break;
1112             v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1113             if (v == NULL)
1114                 break;
1115             if (is_interned)
1116                 PyUnicode_InternInPlace(&v);
1117             retval = v;
1118             R_REF(retval);
1119             break;
1120         }
1121 
1122     case TYPE_INTERNED:
1123         is_interned = 1;
1124         /* fall through */
1125     case TYPE_UNICODE:
1126         {
1127         const char *buffer;
1128 
1129         n = r_long(p);
1130         if (PyErr_Occurred())
1131             break;
1132         if (n < 0 || n > SIZE32_MAX) {
1133             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1134             break;
1135         }
1136         if (n != 0) {
1137             buffer = r_string(n, p);
1138             if (buffer == NULL)
1139                 break;
1140             v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1141         }
1142         else {
1143             v = PyUnicode_New(0, 0);
1144         }
1145         if (v == NULL)
1146             break;
1147         if (is_interned)
1148             PyUnicode_InternInPlace(&v);
1149         retval = v;
1150         R_REF(retval);
1151         break;
1152         }
1153 
1154     case TYPE_SMALL_TUPLE:
1155         n = (unsigned char) r_byte(p);
1156         if (PyErr_Occurred())
1157             break;
1158         goto _read_tuple;
1159     case TYPE_TUPLE:
1160         n = r_long(p);
1161         if (PyErr_Occurred())
1162             break;
1163         if (n < 0 || n > SIZE32_MAX) {
1164             PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1165             break;
1166         }
1167     _read_tuple:
1168         v = PyTuple_New(n);
1169         R_REF(v);
1170         if (v == NULL)
1171             break;
1172 
1173         for (i = 0; i < n; i++) {
1174             v2 = r_object(p);
1175             if ( v2 == NULL ) {
1176                 if (!PyErr_Occurred())
1177                     PyErr_SetString(PyExc_TypeError,
1178                         "NULL object in marshal data for tuple");
1179                 Py_DECREF(v);
1180                 v = NULL;
1181                 break;
1182             }
1183             PyTuple_SET_ITEM(v, i, v2);
1184         }
1185         retval = v;
1186         break;
1187 
1188     case TYPE_LIST:
1189         n = r_long(p);
1190         if (PyErr_Occurred())
1191             break;
1192         if (n < 0 || n > SIZE32_MAX) {
1193             PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1194             break;
1195         }
1196         v = PyList_New(n);
1197         R_REF(v);
1198         if (v == NULL)
1199             break;
1200         for (i = 0; i < n; i++) {
1201             v2 = r_object(p);
1202             if ( v2 == NULL ) {
1203                 if (!PyErr_Occurred())
1204                     PyErr_SetString(PyExc_TypeError,
1205                         "NULL object in marshal data for list");
1206                 Py_DECREF(v);
1207                 v = NULL;
1208                 break;
1209             }
1210             PyList_SET_ITEM(v, i, v2);
1211         }
1212         retval = v;
1213         break;
1214 
1215     case TYPE_DICT:
1216         v = PyDict_New();
1217         R_REF(v);
1218         if (v == NULL)
1219             break;
1220         for (;;) {
1221             PyObject *key, *val;
1222             key = r_object(p);
1223             if (key == NULL)
1224                 break;
1225             val = r_object(p);
1226             if (val == NULL) {
1227                 Py_DECREF(key);
1228                 break;
1229             }
1230             if (PyDict_SetItem(v, key, val) < 0) {
1231                 Py_DECREF(key);
1232                 Py_DECREF(val);
1233                 break;
1234             }
1235             Py_DECREF(key);
1236             Py_DECREF(val);
1237         }
1238         if (PyErr_Occurred()) {
1239             Py_DECREF(v);
1240             v = NULL;
1241         }
1242         retval = v;
1243         break;
1244 
1245     case TYPE_SET:
1246     case TYPE_FROZENSET:
1247         n = r_long(p);
1248         if (PyErr_Occurred())
1249             break;
1250         if (n < 0 || n > SIZE32_MAX) {
1251             PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1252             break;
1253         }
1254 
1255         if (n == 0 && type == TYPE_FROZENSET) {
1256             /* call frozenset() to get the empty frozenset singleton */
1257             v = _PyObject_CallNoArg((PyObject*)&PyFrozenSet_Type);
1258             if (v == NULL)
1259                 break;
1260             R_REF(v);
1261             retval = v;
1262         }
1263         else {
1264             v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1265             if (type == TYPE_SET) {
1266                 R_REF(v);
1267             } else {
1268                 /* must use delayed registration of frozensets because they must
1269                  * be init with a refcount of 1
1270                  */
1271                 idx = r_ref_reserve(flag, p);
1272                 if (idx < 0)
1273                     Py_CLEAR(v); /* signal error */
1274             }
1275             if (v == NULL)
1276                 break;
1277 
1278             for (i = 0; i < n; i++) {
1279                 v2 = r_object(p);
1280                 if ( v2 == NULL ) {
1281                     if (!PyErr_Occurred())
1282                         PyErr_SetString(PyExc_TypeError,
1283                             "NULL object in marshal data for set");
1284                     Py_DECREF(v);
1285                     v = NULL;
1286                     break;
1287                 }
1288                 if (PySet_Add(v, v2) == -1) {
1289                     Py_DECREF(v);
1290                     Py_DECREF(v2);
1291                     v = NULL;
1292                     break;
1293                 }
1294                 Py_DECREF(v2);
1295             }
1296             if (type != TYPE_SET)
1297                 v = r_ref_insert(v, idx, flag, p);
1298             retval = v;
1299         }
1300         break;
1301 
1302     case TYPE_CODE:
1303         {
1304             int argcount;
1305             int posonlyargcount;
1306             int kwonlyargcount;
1307             int nlocals;
1308             int stacksize;
1309             int flags;
1310             PyObject *code = NULL;
1311             PyObject *consts = NULL;
1312             PyObject *names = NULL;
1313             PyObject *varnames = NULL;
1314             PyObject *freevars = NULL;
1315             PyObject *cellvars = NULL;
1316             PyObject *filename = NULL;
1317             PyObject *name = NULL;
1318             int firstlineno;
1319             PyObject *linetable = NULL;
1320 
1321             idx = r_ref_reserve(flag, p);
1322             if (idx < 0)
1323                 break;
1324 
1325             v = NULL;
1326 
1327             /* XXX ignore long->int overflows for now */
1328             argcount = (int)r_long(p);
1329             if (PyErr_Occurred())
1330                 goto code_error;
1331             posonlyargcount = (int)r_long(p);
1332             if (PyErr_Occurred()) {
1333                 goto code_error;
1334             }
1335             kwonlyargcount = (int)r_long(p);
1336             if (PyErr_Occurred())
1337                 goto code_error;
1338             nlocals = (int)r_long(p);
1339             if (PyErr_Occurred())
1340                 goto code_error;
1341             stacksize = (int)r_long(p);
1342             if (PyErr_Occurred())
1343                 goto code_error;
1344             flags = (int)r_long(p);
1345             if (PyErr_Occurred())
1346                 goto code_error;
1347             code = r_object(p);
1348             if (code == NULL)
1349                 goto code_error;
1350             consts = r_object(p);
1351             if (consts == NULL)
1352                 goto code_error;
1353             names = r_object(p);
1354             if (names == NULL)
1355                 goto code_error;
1356             varnames = r_object(p);
1357             if (varnames == NULL)
1358                 goto code_error;
1359             freevars = r_object(p);
1360             if (freevars == NULL)
1361                 goto code_error;
1362             cellvars = r_object(p);
1363             if (cellvars == NULL)
1364                 goto code_error;
1365             filename = r_object(p);
1366             if (filename == NULL)
1367                 goto code_error;
1368             name = r_object(p);
1369             if (name == NULL)
1370                 goto code_error;
1371             firstlineno = (int)r_long(p);
1372             if (firstlineno == -1 && PyErr_Occurred())
1373                 break;
1374             linetable = r_object(p);
1375             if (linetable == NULL)
1376                 goto code_error;
1377 
1378             v = (PyObject *) PyCode_NewWithPosOnlyArgs(
1379                             argcount, posonlyargcount, kwonlyargcount,
1380                             nlocals, stacksize, flags,
1381                             code, consts, names, varnames,
1382                             freevars, cellvars, filename, name,
1383                             firstlineno, linetable);
1384             v = r_ref_insert(v, idx, flag, p);
1385 
1386           code_error:
1387             Py_XDECREF(code);
1388             Py_XDECREF(consts);
1389             Py_XDECREF(names);
1390             Py_XDECREF(varnames);
1391             Py_XDECREF(freevars);
1392             Py_XDECREF(cellvars);
1393             Py_XDECREF(filename);
1394             Py_XDECREF(name);
1395             Py_XDECREF(linetable);
1396         }
1397         retval = v;
1398         break;
1399 
1400     case TYPE_REF:
1401         n = r_long(p);
1402         if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1403             if (n == -1 && PyErr_Occurred())
1404                 break;
1405             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1406             break;
1407         }
1408         v = PyList_GET_ITEM(p->refs, n);
1409         if (v == Py_None) {
1410             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1411             break;
1412         }
1413         Py_INCREF(v);
1414         retval = v;
1415         break;
1416 
1417     default:
1418         /* Bogus data got written, which isn't ideal.
1419            This will let you keep working and recover. */
1420         PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1421         break;
1422 
1423     }
1424     p->depth--;
1425     return retval;
1426 }
1427 
1428 static PyObject *
read_object(RFILE * p)1429 read_object(RFILE *p)
1430 {
1431     PyObject *v;
1432     if (PyErr_Occurred()) {
1433         fprintf(stderr, "XXX readobject called with exception set\n");
1434         return NULL;
1435     }
1436     if (p->ptr && p->end) {
1437         if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
1438             return NULL;
1439         }
1440     } else if (p->fp || p->readable) {
1441         if (PySys_Audit("marshal.load", NULL) < 0) {
1442             return NULL;
1443         }
1444     }
1445     v = r_object(p);
1446     if (v == NULL && !PyErr_Occurred())
1447         PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1448     return v;
1449 }
1450 
1451 int
PyMarshal_ReadShortFromFile(FILE * fp)1452 PyMarshal_ReadShortFromFile(FILE *fp)
1453 {
1454     RFILE rf;
1455     int res;
1456     assert(fp);
1457     rf.readable = NULL;
1458     rf.fp = fp;
1459     rf.end = rf.ptr = NULL;
1460     rf.buf = NULL;
1461     res = r_short(&rf);
1462     if (rf.buf != NULL)
1463         PyMem_Free(rf.buf);
1464     return res;
1465 }
1466 
1467 long
PyMarshal_ReadLongFromFile(FILE * fp)1468 PyMarshal_ReadLongFromFile(FILE *fp)
1469 {
1470     RFILE rf;
1471     long res;
1472     rf.fp = fp;
1473     rf.readable = NULL;
1474     rf.ptr = rf.end = NULL;
1475     rf.buf = NULL;
1476     res = r_long(&rf);
1477     if (rf.buf != NULL)
1478         PyMem_Free(rf.buf);
1479     return res;
1480 }
1481 
1482 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1483 static off_t
getfilesize(FILE * fp)1484 getfilesize(FILE *fp)
1485 {
1486     struct _Py_stat_struct st;
1487     if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1488         return -1;
1489 #if SIZEOF_OFF_T == 4
1490     else if (st.st_size >= INT_MAX)
1491         return (off_t)INT_MAX;
1492 #endif
1493     else
1494         return (off_t)st.st_size;
1495 }
1496 
1497 /* If we can get the size of the file up-front, and it's reasonably small,
1498  * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1499  * than reading a byte at a time from file; speeds .pyc imports.
1500  * CAUTION:  since this may read the entire remainder of the file, don't
1501  * call it unless you know you're done with the file.
1502  */
1503 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1504 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1505 {
1506 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1507 #define REASONABLE_FILE_LIMIT (1L << 18)
1508     off_t filesize;
1509     filesize = getfilesize(fp);
1510     if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1511         char* pBuf = (char *)PyMem_Malloc(filesize);
1512         if (pBuf != NULL) {
1513             size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1514             PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1515             PyMem_Free(pBuf);
1516             return v;
1517         }
1518 
1519     }
1520     /* We don't have fstat, or we do but the file is larger than
1521      * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1522      */
1523     return PyMarshal_ReadObjectFromFile(fp);
1524 
1525 #undef REASONABLE_FILE_LIMIT
1526 }
1527 
1528 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1529 PyMarshal_ReadObjectFromFile(FILE *fp)
1530 {
1531     RFILE rf;
1532     PyObject *result;
1533     rf.fp = fp;
1534     rf.readable = NULL;
1535     rf.depth = 0;
1536     rf.ptr = rf.end = NULL;
1537     rf.buf = NULL;
1538     rf.refs = PyList_New(0);
1539     if (rf.refs == NULL)
1540         return NULL;
1541     result = read_object(&rf);
1542     Py_DECREF(rf.refs);
1543     if (rf.buf != NULL)
1544         PyMem_Free(rf.buf);
1545     return result;
1546 }
1547 
1548 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1549 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1550 {
1551     RFILE rf;
1552     PyObject *result;
1553     rf.fp = NULL;
1554     rf.readable = NULL;
1555     rf.ptr = str;
1556     rf.end = str + len;
1557     rf.buf = NULL;
1558     rf.depth = 0;
1559     rf.refs = PyList_New(0);
1560     if (rf.refs == NULL)
1561         return NULL;
1562     result = read_object(&rf);
1563     Py_DECREF(rf.refs);
1564     if (rf.buf != NULL)
1565         PyMem_Free(rf.buf);
1566     return result;
1567 }
1568 
1569 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1570 PyMarshal_WriteObjectToString(PyObject *x, int version)
1571 {
1572     WFILE wf;
1573 
1574     if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
1575         return NULL;
1576     }
1577     memset(&wf, 0, sizeof(wf));
1578     wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1579     if (wf.str == NULL)
1580         return NULL;
1581     wf.ptr = wf.buf = PyBytes_AS_STRING(wf.str);
1582     wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
1583     wf.error = WFERR_OK;
1584     wf.version = version;
1585     if (w_init_refs(&wf, version)) {
1586         Py_DECREF(wf.str);
1587         return NULL;
1588     }
1589     w_object(x, &wf);
1590     w_clear_refs(&wf);
1591     if (wf.str != NULL) {
1592         const char *base = PyBytes_AS_STRING(wf.str);
1593         if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1594             return NULL;
1595     }
1596     if (wf.error != WFERR_OK) {
1597         Py_XDECREF(wf.str);
1598         if (wf.error == WFERR_NOMEMORY)
1599             PyErr_NoMemory();
1600         else
1601             PyErr_SetString(PyExc_ValueError,
1602               (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1603                :"object too deeply nested to marshal");
1604         return NULL;
1605     }
1606     return wf.str;
1607 }
1608 
1609 /* And an interface for Python programs... */
1610 /*[clinic input]
1611 marshal.dump
1612 
1613     value: object
1614         Must be a supported type.
1615     file: object
1616         Must be a writeable binary file.
1617     version: int(c_default="Py_MARSHAL_VERSION") = version
1618         Indicates the data format that dump should use.
1619     /
1620 
1621 Write the value on the open file.
1622 
1623 If the value has (or contains an object that has) an unsupported type, a
1624 ValueError exception is raised - but garbage data will also be written
1625 to the file. The object will not be properly read back by load().
1626 [clinic start generated code]*/
1627 
1628 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1629 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1630                   int version)
1631 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1632 {
1633     /* XXX Quick hack -- need to do this differently */
1634     PyObject *s;
1635     PyObject *res;
1636     _Py_IDENTIFIER(write);
1637 
1638     s = PyMarshal_WriteObjectToString(value, version);
1639     if (s == NULL)
1640         return NULL;
1641     res = _PyObject_CallMethodIdOneArg(file, &PyId_write, s);
1642     Py_DECREF(s);
1643     return res;
1644 }
1645 
1646 /*[clinic input]
1647 marshal.load
1648 
1649     file: object
1650         Must be readable binary file.
1651     /
1652 
1653 Read one value from the open file and return it.
1654 
1655 If no valid value is read (e.g. because the data has a different Python
1656 version's incompatible marshal format), raise EOFError, ValueError or
1657 TypeError.
1658 
1659 Note: If an object containing an unsupported type was marshalled with
1660 dump(), load() will substitute None for the unmarshallable type.
1661 [clinic start generated code]*/
1662 
1663 static PyObject *
marshal_load(PyObject * module,PyObject * file)1664 marshal_load(PyObject *module, PyObject *file)
1665 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1666 {
1667     PyObject *data, *result;
1668     _Py_IDENTIFIER(read);
1669     RFILE rf;
1670 
1671     /*
1672      * Make a call to the read method, but read zero bytes.
1673      * This is to ensure that the object passed in at least
1674      * has a read method which returns bytes.
1675      * This can be removed if we guarantee good error handling
1676      * for r_string()
1677      */
1678     data = _PyObject_CallMethodId(file, &PyId_read, "i", 0);
1679     if (data == NULL)
1680         return NULL;
1681     if (!PyBytes_Check(data)) {
1682         PyErr_Format(PyExc_TypeError,
1683                      "file.read() returned not bytes but %.100s",
1684                      Py_TYPE(data)->tp_name);
1685         result = NULL;
1686     }
1687     else {
1688         rf.depth = 0;
1689         rf.fp = NULL;
1690         rf.readable = file;
1691         rf.ptr = rf.end = NULL;
1692         rf.buf = NULL;
1693         if ((rf.refs = PyList_New(0)) != NULL) {
1694             result = read_object(&rf);
1695             Py_DECREF(rf.refs);
1696             if (rf.buf != NULL)
1697                 PyMem_Free(rf.buf);
1698         } else
1699             result = NULL;
1700     }
1701     Py_DECREF(data);
1702     return result;
1703 }
1704 
1705 /*[clinic input]
1706 marshal.dumps
1707 
1708     value: object
1709         Must be a supported type.
1710     version: int(c_default="Py_MARSHAL_VERSION") = version
1711         Indicates the data format that dumps should use.
1712     /
1713 
1714 Return the bytes object that would be written to a file by dump(value, file).
1715 
1716 Raise a ValueError exception if value has (or contains an object that has) an
1717 unsupported type.
1718 [clinic start generated code]*/
1719 
1720 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1721 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1722 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1723 {
1724     return PyMarshal_WriteObjectToString(value, version);
1725 }
1726 
1727 /*[clinic input]
1728 marshal.loads
1729 
1730     bytes: Py_buffer
1731     /
1732 
1733 Convert the bytes-like object to a value.
1734 
1735 If no valid value is found, raise EOFError, ValueError or TypeError.  Extra
1736 bytes in the input are ignored.
1737 [clinic start generated code]*/
1738 
1739 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1740 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1741 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1742 {
1743     RFILE rf;
1744     char *s = bytes->buf;
1745     Py_ssize_t n = bytes->len;
1746     PyObject* result;
1747     rf.fp = NULL;
1748     rf.readable = NULL;
1749     rf.ptr = s;
1750     rf.end = s + n;
1751     rf.depth = 0;
1752     if ((rf.refs = PyList_New(0)) == NULL)
1753         return NULL;
1754     result = read_object(&rf);
1755     Py_DECREF(rf.refs);
1756     return result;
1757 }
1758 
1759 static PyMethodDef marshal_methods[] = {
1760     MARSHAL_DUMP_METHODDEF
1761     MARSHAL_LOAD_METHODDEF
1762     MARSHAL_DUMPS_METHODDEF
1763     MARSHAL_LOADS_METHODDEF
1764     {NULL,              NULL}           /* sentinel */
1765 };
1766 
1767 
1768 PyDoc_STRVAR(module_doc,
1769 "This module contains functions that can read and write Python values in\n\
1770 a binary format. The format is specific to Python, but independent of\n\
1771 machine architecture issues.\n\
1772 \n\
1773 Not all Python object types are supported; in general, only objects\n\
1774 whose value is independent from a particular invocation of Python can be\n\
1775 written and read by this module. The following types are supported:\n\
1776 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1777 tuples, lists, sets, dictionaries, and code objects, where it\n\
1778 should be understood that tuples, lists and dictionaries are only\n\
1779 supported as long as the values contained therein are themselves\n\
1780 supported; and recursive lists and dictionaries should not be written\n\
1781 (they will cause infinite loops).\n\
1782 \n\
1783 Variables:\n\
1784 \n\
1785 version -- indicates the format that the module uses. Version 0 is the\n\
1786     historical format, version 1 shares interned strings and version 2\n\
1787     uses a binary format for floating point numbers.\n\
1788     Version 3 shares common object references (New in version 3.4).\n\
1789 \n\
1790 Functions:\n\
1791 \n\
1792 dump() -- write value to a file\n\
1793 load() -- read value from a file\n\
1794 dumps() -- marshal value as a bytes object\n\
1795 loads() -- read value from a bytes-like object");
1796 
1797 
1798 static int
marshal_module_exec(PyObject * mod)1799 marshal_module_exec(PyObject *mod)
1800 {
1801     if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1802         return -1;
1803     }
1804     return 0;
1805 }
1806 
1807 static PyModuleDef_Slot marshalmodule_slots[] = {
1808     {Py_mod_exec, marshal_module_exec},
1809     {0, NULL}
1810 };
1811 
1812 static struct PyModuleDef marshalmodule = {
1813     PyModuleDef_HEAD_INIT,
1814     .m_name = "marshal",
1815     .m_doc = module_doc,
1816     .m_methods = marshal_methods,
1817     .m_slots = marshalmodule_slots,
1818 };
1819 
1820 PyMODINIT_FUNC
PyMarshal_Init(void)1821 PyMarshal_Init(void)
1822 {
1823     return PyModuleDef_Init(&marshalmodule);
1824 }
1825