1 
2 /* Write Python objects to files and read them back.
3    This is intended for writing and reading compiled Python code only;
4    a true persistent storage facility would be much harder, since
5    it would have to take circular links and sharing into account. */
6 
7 #define PY_SSIZE_T_CLEAN
8 
9 #include "Python.h"
10 #include "longintrepr.h"
11 #include "code.h"
12 #include "marshal.h"
13 
14 #define ABS(x) ((x) < 0 ? -(x) : (x))
15 
16 /* High water mark to determine when the marshalled object is dangerously deep
17  * and risks coring the interpreter.  When the object stack gets this deep,
18  * raise an exception instead of continuing.
19  * On Windows debug builds, reduce this value.
20  */
21 #if defined(MS_WINDOWS) && defined(_DEBUG)
22 #define MAX_MARSHAL_STACK_DEPTH 1000
23 #else
24 #define MAX_MARSHAL_STACK_DEPTH 2000
25 #endif
26 
27 #define TYPE_NULL               '0'
28 #define TYPE_NONE               'N'
29 #define TYPE_FALSE              'F'
30 #define TYPE_TRUE               'T'
31 #define TYPE_STOPITER           'S'
32 #define TYPE_ELLIPSIS           '.'
33 #define TYPE_INT                'i'
34 #define TYPE_INT64              'I'
35 #define TYPE_FLOAT              'f'
36 #define TYPE_BINARY_FLOAT       'g'
37 #define TYPE_COMPLEX            'x'
38 #define TYPE_BINARY_COMPLEX     'y'
39 #define TYPE_LONG               'l'
40 #define TYPE_STRING             's'
41 #define TYPE_INTERNED           't'
42 #define TYPE_STRINGREF          'R'
43 #define TYPE_TUPLE              '('
44 #define TYPE_LIST               '['
45 #define TYPE_DICT               '{'
46 #define TYPE_CODE               'c'
47 #define TYPE_UNICODE            'u'
48 #define TYPE_UNKNOWN            '?'
49 #define TYPE_SET                '<'
50 #define TYPE_FROZENSET          '>'
51 
52 #define WFERR_OK 0
53 #define WFERR_UNMARSHALLABLE 1
54 #define WFERR_NESTEDTOODEEP 2
55 #define WFERR_NOMEMORY 3
56 
57 typedef struct {
58     FILE *fp;
59     int error;  /* see WFERR_* values */
60     int depth;
61     /* If fp == NULL, the following are valid: */
62     PyObject *str;
63     char *ptr;
64     char *end;
65     PyObject *strings; /* dict on marshal, list on unmarshal */
66     int version;
67 } WFILE;
68 
69 #define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
70                       else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \
71                            else w_more(c, p)
72 
73 static void
w_more(int c,WFILE * p)74 w_more(int c, WFILE *p)
75 {
76     Py_ssize_t size, newsize;
77     if (p->str == NULL)
78         return; /* An error already occurred */
79     size = PyString_Size(p->str);
80     newsize = size + size + 1024;
81     if (newsize > 32*1024*1024) {
82         newsize = size + (size >> 3);           /* 12.5% overallocation */
83     }
84     if (_PyString_Resize(&p->str, newsize) != 0) {
85         p->ptr = p->end = NULL;
86     }
87     else {
88         p->ptr = PyString_AS_STRING((PyStringObject *)p->str) + size;
89         p->end =
90             PyString_AS_STRING((PyStringObject *)p->str) + newsize;
91         *p->ptr++ = Py_SAFE_DOWNCAST(c, int, char);
92     }
93 }
94 
95 static void
w_string(const char * s,Py_ssize_t n,WFILE * p)96 w_string(const char *s, Py_ssize_t n, WFILE *p)
97 {
98     if (p->fp != NULL) {
99         fwrite(s, 1, n, p->fp);
100     }
101     else {
102         while (--n >= 0) {
103             w_byte(*s, p);
104             s++;
105         }
106     }
107 }
108 
109 static void
w_short(int x,WFILE * p)110 w_short(int x, WFILE *p)
111 {
112     w_byte((char)( x      & 0xff), p);
113     w_byte((char)((x>> 8) & 0xff), p);
114 }
115 
116 static void
w_long(long x,WFILE * p)117 w_long(long x, WFILE *p)
118 {
119     w_byte((char)( x      & 0xff), p);
120     w_byte((char)((x>> 8) & 0xff), p);
121     w_byte((char)((x>>16) & 0xff), p);
122     w_byte((char)((x>>24) & 0xff), p);
123 }
124 
125 #if SIZEOF_LONG > 4
126 static void
w_long64(long x,WFILE * p)127 w_long64(long x, WFILE *p)
128 {
129     w_long(x, p);
130     w_long(x>>32, p);
131 }
132 #endif
133 
134 #define SIZE32_MAX  0x7FFFFFFF
135 
136 #if SIZEOF_SIZE_T > 4
137 # define W_SIZE(n, p)  do {                     \
138         if ((n) > SIZE32_MAX) {                 \
139             (p)->depth--;                       \
140             (p)->error = WFERR_UNMARSHALLABLE;  \
141             return;                             \
142         }                                       \
143         w_long((long)(n), p);                   \
144     } while(0)
145 #else
146 # define W_SIZE  w_long
147 #endif
148 
149 static void
w_pstring(const char * s,Py_ssize_t n,WFILE * p)150 w_pstring(const char *s, Py_ssize_t n, WFILE *p)
151 {
152         W_SIZE(n, p);
153         w_string(s, n, p);
154 }
155 
156 /* We assume that Python longs are stored internally in base some power of
157    2**15; for the sake of portability we'll always read and write them in base
158    exactly 2**15. */
159 
160 #define PyLong_MARSHAL_SHIFT 15
161 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
162 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
163 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
164 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
165 #endif
166 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
167 
168 static void
w_PyLong(const PyLongObject * ob,WFILE * p)169 w_PyLong(const PyLongObject *ob, WFILE *p)
170 {
171     Py_ssize_t i, j, n, l;
172     digit d;
173 
174     w_byte(TYPE_LONG, p);
175     if (Py_SIZE(ob) == 0) {
176         w_long((long)0, p);
177         return;
178     }
179 
180     /* set l to number of base PyLong_MARSHAL_BASE digits */
181     n = ABS(Py_SIZE(ob));
182     l = (n-1) * PyLong_MARSHAL_RATIO;
183     d = ob->ob_digit[n-1];
184     assert(d != 0); /* a PyLong is always normalized */
185     do {
186         d >>= PyLong_MARSHAL_SHIFT;
187         l++;
188     } while (d != 0);
189     if (l > SIZE32_MAX) {
190         p->depth--;
191         p->error = WFERR_UNMARSHALLABLE;
192         return;
193     }
194     w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
195 
196     for (i=0; i < n-1; i++) {
197         d = ob->ob_digit[i];
198         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
199             w_short(d & PyLong_MARSHAL_MASK, p);
200             d >>= PyLong_MARSHAL_SHIFT;
201         }
202         assert (d == 0);
203     }
204     d = ob->ob_digit[n-1];
205     do {
206         w_short(d & PyLong_MARSHAL_MASK, p);
207         d >>= PyLong_MARSHAL_SHIFT;
208     } while (d != 0);
209 }
210 
211 static void
w_object(PyObject * v,WFILE * p)212 w_object(PyObject *v, WFILE *p)
213 {
214     Py_ssize_t i, n;
215 
216     p->depth++;
217 
218     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
219         p->error = WFERR_NESTEDTOODEEP;
220     }
221     else if (v == NULL) {
222         w_byte(TYPE_NULL, p);
223     }
224     else if (v == Py_None) {
225         w_byte(TYPE_NONE, p);
226     }
227     else if (v == PyExc_StopIteration) {
228         w_byte(TYPE_STOPITER, p);
229     }
230     else if (v == Py_Ellipsis) {
231         w_byte(TYPE_ELLIPSIS, p);
232     }
233     else if (v == Py_False) {
234         w_byte(TYPE_FALSE, p);
235     }
236     else if (v == Py_True) {
237         w_byte(TYPE_TRUE, p);
238     }
239     else if (PyInt_CheckExact(v)) {
240         long x = PyInt_AS_LONG((PyIntObject *)v);
241 #if SIZEOF_LONG > 4
242         long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
243         if (y && y != -1) {
244             w_byte(TYPE_INT64, p);
245             w_long64(x, p);
246         }
247         else
248 #endif
249             {
250             w_byte(TYPE_INT, p);
251             w_long(x, p);
252         }
253     }
254     else if (PyLong_CheckExact(v)) {
255         PyLongObject *ob = (PyLongObject *)v;
256         w_PyLong(ob, p);
257     }
258     else if (PyFloat_CheckExact(v)) {
259         if (p->version > 1) {
260             unsigned char buf[8];
261             if (_PyFloat_Pack8(PyFloat_AsDouble(v),
262                                buf, 1) < 0) {
263                 p->error = WFERR_UNMARSHALLABLE;
264                 return;
265             }
266             w_byte(TYPE_BINARY_FLOAT, p);
267             w_string((char*)buf, 8, p);
268         }
269         else {
270             char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
271                                               'g', 17, 0, NULL);
272             if (!buf) {
273                 p->error = WFERR_NOMEMORY;
274                 return;
275             }
276             n = strlen(buf);
277             w_byte(TYPE_FLOAT, p);
278             w_byte((int)n, p);
279             w_string(buf, n, p);
280             PyMem_Free(buf);
281         }
282     }
283 #ifndef WITHOUT_COMPLEX
284     else if (PyComplex_CheckExact(v)) {
285         if (p->version > 1) {
286             unsigned char buf[8];
287             if (_PyFloat_Pack8(PyComplex_RealAsDouble(v),
288                                buf, 1) < 0) {
289                 p->error = WFERR_UNMARSHALLABLE;
290                 return;
291             }
292             w_byte(TYPE_BINARY_COMPLEX, p);
293             w_string((char*)buf, 8, p);
294             if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),
295                                buf, 1) < 0) {
296                 p->error = WFERR_UNMARSHALLABLE;
297                 return;
298             }
299             w_string((char*)buf, 8, p);
300         }
301         else {
302             char *buf;
303             w_byte(TYPE_COMPLEX, p);
304             buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
305                                         'g', 17, 0, NULL);
306             if (!buf) {
307                 p->error = WFERR_NOMEMORY;
308                 return;
309             }
310             n = strlen(buf);
311             w_byte((int)n, p);
312             w_string(buf, n, p);
313             PyMem_Free(buf);
314             buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
315                                         'g', 17, 0, NULL);
316             if (!buf) {
317                 p->error = WFERR_NOMEMORY;
318                 return;
319             }
320             n = strlen(buf);
321             w_byte((int)n, p);
322             w_string(buf, n, p);
323             PyMem_Free(buf);
324         }
325     }
326 #endif
327     else if (PyString_CheckExact(v)) {
328         if (p->strings && PyString_CHECK_INTERNED(v)) {
329             PyObject *o = PyDict_GetItem(p->strings, v);
330             if (o) {
331                 long w = PyInt_AsLong(o);
332                 w_byte(TYPE_STRINGREF, p);
333                 w_long(w, p);
334                 goto exit;
335             }
336             else {
337                 int ok;
338                 o = PyInt_FromSsize_t(PyDict_Size(p->strings));
339                 ok = o &&
340                      PyDict_SetItem(p->strings, v, o) >= 0;
341                 Py_XDECREF(o);
342                 if (!ok) {
343                     p->depth--;
344                     p->error = WFERR_UNMARSHALLABLE;
345                     return;
346                 }
347                 w_byte(TYPE_INTERNED, p);
348             }
349         }
350         else {
351             w_byte(TYPE_STRING, p);
352         }
353         w_pstring(PyBytes_AS_STRING(v), PyString_GET_SIZE(v), p);
354     }
355 #ifdef Py_USING_UNICODE
356     else if (PyUnicode_CheckExact(v)) {
357         PyObject *utf8;
358         utf8 = PyUnicode_AsUTF8String(v);
359         if (utf8 == NULL) {
360             p->depth--;
361             p->error = WFERR_UNMARSHALLABLE;
362             return;
363         }
364         w_byte(TYPE_UNICODE, p);
365         w_pstring(PyString_AS_STRING(utf8), PyString_GET_SIZE(utf8), p);
366         Py_DECREF(utf8);
367     }
368 #endif
369     else if (PyTuple_CheckExact(v)) {
370         w_byte(TYPE_TUPLE, p);
371         n = PyTuple_Size(v);
372         W_SIZE(n, p);
373         for (i = 0; i < n; i++) {
374             w_object(PyTuple_GET_ITEM(v, i), p);
375         }
376     }
377     else if (PyList_CheckExact(v)) {
378         w_byte(TYPE_LIST, p);
379         n = PyList_GET_SIZE(v);
380         W_SIZE(n, p);
381         for (i = 0; i < n; i++) {
382             w_object(PyList_GET_ITEM(v, i), p);
383         }
384     }
385     else if (PyDict_CheckExact(v)) {
386         Py_ssize_t pos;
387         PyObject *key, *value;
388         w_byte(TYPE_DICT, p);
389         /* This one is NULL object terminated! */
390         pos = 0;
391         while (PyDict_Next(v, &pos, &key, &value)) {
392             w_object(key, p);
393             w_object(value, p);
394         }
395         w_object((PyObject *)NULL, p);
396     }
397     else if (PyAnySet_CheckExact(v)) {
398         PyObject *value, *it;
399 
400         if (PyObject_TypeCheck(v, &PySet_Type))
401             w_byte(TYPE_SET, p);
402         else
403             w_byte(TYPE_FROZENSET, p);
404         n = PyObject_Size(v);
405         if (n == -1) {
406             p->depth--;
407             p->error = WFERR_UNMARSHALLABLE;
408             return;
409         }
410         W_SIZE(n, p);
411         it = PyObject_GetIter(v);
412         if (it == NULL) {
413             p->depth--;
414             p->error = WFERR_UNMARSHALLABLE;
415             return;
416         }
417         while ((value = PyIter_Next(it)) != NULL) {
418             w_object(value, p);
419             Py_DECREF(value);
420         }
421         Py_DECREF(it);
422         if (PyErr_Occurred()) {
423             p->depth--;
424             p->error = WFERR_UNMARSHALLABLE;
425             return;
426         }
427     }
428     else if (PyCode_Check(v)) {
429         PyCodeObject *co = (PyCodeObject *)v;
430         w_byte(TYPE_CODE, p);
431         w_long(co->co_argcount, p);
432         w_long(co->co_nlocals, p);
433         w_long(co->co_stacksize, p);
434         w_long(co->co_flags, p);
435         w_object(co->co_code, p);
436         w_object(co->co_consts, p);
437         w_object(co->co_names, p);
438         w_object(co->co_varnames, p);
439         w_object(co->co_freevars, p);
440         w_object(co->co_cellvars, p);
441         w_object(co->co_filename, p);
442         w_object(co->co_name, p);
443         w_long(co->co_firstlineno, p);
444         w_object(co->co_lnotab, p);
445     }
446     else if (PyObject_CheckReadBuffer(v)) {
447         /* Write unknown buffer-style objects as a string */
448         char *s;
449         PyBufferProcs *pb = v->ob_type->tp_as_buffer;
450         w_byte(TYPE_STRING, p);
451         n = (*pb->bf_getreadbuffer)(v, 0, (void **)&s);
452         w_pstring(s, n, p);
453     }
454     else {
455         w_byte(TYPE_UNKNOWN, p);
456         p->error = WFERR_UNMARSHALLABLE;
457     }
458    exit:
459     p->depth--;
460 }
461 
462 /* version currently has no effect for writing longs. */
463 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)464 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
465 {
466     WFILE wf;
467     wf.fp = fp;
468     wf.str = NULL;
469     wf.ptr = NULL;
470     wf.end = NULL;
471     wf.error = WFERR_OK;
472     wf.depth = 0;
473     wf.strings = NULL;
474     wf.version = version;
475     w_long(x, &wf);
476 }
477 
478 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)479 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
480 {
481     WFILE wf;
482     wf.fp = fp;
483     wf.str = NULL;
484     wf.ptr = NULL;
485     wf.end = NULL;
486     wf.error = WFERR_OK;
487     wf.depth = 0;
488     wf.strings = (version > 0) ? PyDict_New() : NULL;
489     wf.version = version;
490     w_object(x, &wf);
491     Py_XDECREF(wf.strings);
492 }
493 
494 typedef WFILE RFILE; /* Same struct with different invariants */
495 
496 #define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
497 
498 #define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p))
499 
500 static Py_ssize_t
r_string(char * s,Py_ssize_t n,RFILE * p)501 r_string(char *s, Py_ssize_t n, RFILE *p)
502 {
503     if (p->fp != NULL)
504         /* The result fits into int because it must be <=n. */
505         return fread(s, 1, n, p->fp);
506     if (p->end - p->ptr < n)
507         n = p->end - p->ptr;
508     memcpy(s, p->ptr, n);
509     p->ptr += n;
510     return n;
511 }
512 
513 static int
r_short(RFILE * p)514 r_short(RFILE *p)
515 {
516     register short x;
517     x = r_byte(p);
518     x |= r_byte(p) << 8;
519     /* Sign-extension, in case short greater than 16 bits */
520     x |= -(x & 0x8000);
521     return x;
522 }
523 
524 static long
r_long(RFILE * p)525 r_long(RFILE *p)
526 {
527     register long x;
528     register FILE *fp = p->fp;
529     if (fp) {
530         x = getc(fp);
531         x |= (long)getc(fp) << 8;
532         x |= (long)getc(fp) << 16;
533         x |= (long)getc(fp) << 24;
534     }
535     else {
536         x = rs_byte(p);
537         x |= (long)rs_byte(p) << 8;
538         x |= (long)rs_byte(p) << 16;
539         x |= (long)rs_byte(p) << 24;
540     }
541 #if SIZEOF_LONG > 4
542     /* Sign extension for 64-bit machines */
543     x |= -(x & 0x80000000L);
544 #endif
545     return x;
546 }
547 
548 /* r_long64 deals with the TYPE_INT64 code.  On a machine with
549    sizeof(long) > 4, it returns a Python int object, else a Python long
550    object.  Note that w_long64 writes out TYPE_INT if 32 bits is enough,
551    so there's no inefficiency here in returning a PyLong on 32-bit boxes
552    for everything written via TYPE_INT64 (i.e., if an int is written via
553    TYPE_INT64, it *needs* more than 32 bits).
554 */
555 static PyObject *
r_long64(RFILE * p)556 r_long64(RFILE *p)
557 {
558     long lo4 = r_long(p);
559     long hi4 = r_long(p);
560 #if SIZEOF_LONG > 4
561     long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL);
562     return PyInt_FromLong(x);
563 #else
564     unsigned char buf[8];
565     int one = 1;
566     int is_little_endian = (int)*(char*)&one;
567     if (is_little_endian) {
568         memcpy(buf, &lo4, 4);
569         memcpy(buf+4, &hi4, 4);
570     }
571     else {
572         memcpy(buf, &hi4, 4);
573         memcpy(buf+4, &lo4, 4);
574     }
575     return _PyLong_FromByteArray(buf, 8, is_little_endian, 1);
576 #endif
577 }
578 
579 static PyObject *
r_PyLong(RFILE * p)580 r_PyLong(RFILE *p)
581 {
582     PyLongObject *ob;
583     long n, size, i;
584     int j, md, shorts_in_top_digit;
585     digit d;
586 
587     n = r_long(p);
588     if (n == 0)
589         return (PyObject *)_PyLong_New(0);
590     if (n < -SIZE32_MAX || n > SIZE32_MAX) {
591         PyErr_SetString(PyExc_ValueError,
592                        "bad marshal data (long size out of range)");
593         return NULL;
594     }
595 
596     size = 1 + (ABS(n) - 1) / PyLong_MARSHAL_RATIO;
597     shorts_in_top_digit = 1 + (ABS(n) - 1) % PyLong_MARSHAL_RATIO;
598     ob = _PyLong_New(size);
599     if (ob == NULL)
600         return NULL;
601     Py_SIZE(ob) = n > 0 ? size : -size;
602 
603     for (i = 0; i < size-1; i++) {
604         d = 0;
605         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
606             md = r_short(p);
607             if (md < 0 || md > PyLong_MARSHAL_BASE)
608                 goto bad_digit;
609             d += (digit)md << j*PyLong_MARSHAL_SHIFT;
610         }
611         ob->ob_digit[i] = d;
612     }
613     d = 0;
614     for (j=0; j < shorts_in_top_digit; j++) {
615         md = r_short(p);
616         if (md < 0 || md > PyLong_MARSHAL_BASE)
617             goto bad_digit;
618         /* topmost marshal digit should be nonzero */
619         if (md == 0 && j == shorts_in_top_digit - 1) {
620             Py_DECREF(ob);
621             PyErr_SetString(PyExc_ValueError,
622                 "bad marshal data (unnormalized long data)");
623             return NULL;
624         }
625         d += (digit)md << j*PyLong_MARSHAL_SHIFT;
626     }
627     /* top digit should be nonzero, else the resulting PyLong won't be
628        normalized */
629     ob->ob_digit[size-1] = d;
630     return (PyObject *)ob;
631   bad_digit:
632     Py_DECREF(ob);
633     PyErr_SetString(PyExc_ValueError,
634                     "bad marshal data (digit out of range in long)");
635     return NULL;
636 }
637 
638 
639 static PyObject *
r_object(RFILE * p)640 r_object(RFILE *p)
641 {
642     /* NULL is a valid return value, it does not necessarily means that
643        an exception is set. */
644     PyObject *v, *v2;
645     long i, n;
646     int type = r_byte(p);
647     PyObject *retval;
648 
649     p->depth++;
650 
651     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
652         p->depth--;
653         PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
654         return NULL;
655     }
656 
657     switch (type) {
658 
659     case EOF:
660         PyErr_SetString(PyExc_EOFError,
661                         "EOF read where object expected");
662         retval = NULL;
663         break;
664 
665     case TYPE_NULL:
666         retval = NULL;
667         break;
668 
669     case TYPE_NONE:
670         Py_INCREF(Py_None);
671         retval = Py_None;
672         break;
673 
674     case TYPE_STOPITER:
675         Py_INCREF(PyExc_StopIteration);
676         retval = PyExc_StopIteration;
677         break;
678 
679     case TYPE_ELLIPSIS:
680         Py_INCREF(Py_Ellipsis);
681         retval = Py_Ellipsis;
682         break;
683 
684     case TYPE_FALSE:
685         Py_INCREF(Py_False);
686         retval = Py_False;
687         break;
688 
689     case TYPE_TRUE:
690         Py_INCREF(Py_True);
691         retval = Py_True;
692         break;
693 
694     case TYPE_INT:
695         retval = PyInt_FromLong(r_long(p));
696         break;
697 
698     case TYPE_INT64:
699         retval = r_long64(p);
700         break;
701 
702     case TYPE_LONG:
703         retval = r_PyLong(p);
704         break;
705 
706     case TYPE_FLOAT:
707         {
708             char buf[256];
709             double dx;
710             n = r_byte(p);
711             if (n == EOF || r_string(buf, n, p) != n) {
712                 PyErr_SetString(PyExc_EOFError,
713                     "EOF read where object expected");
714                 retval = NULL;
715                 break;
716             }
717             buf[n] = '\0';
718             dx = PyOS_string_to_double(buf, NULL, NULL);
719             if (dx == -1.0 && PyErr_Occurred()) {
720                 retval = NULL;
721                 break;
722             }
723             retval = PyFloat_FromDouble(dx);
724             break;
725         }
726 
727     case TYPE_BINARY_FLOAT:
728         {
729             unsigned char buf[8];
730             double x;
731             if (r_string((char*)buf, 8, p) != 8) {
732                 PyErr_SetString(PyExc_EOFError,
733                     "EOF read where object expected");
734                 retval = NULL;
735                 break;
736             }
737             x = _PyFloat_Unpack8(buf, 1);
738             if (x == -1.0 && PyErr_Occurred()) {
739                 retval = NULL;
740                 break;
741             }
742             retval = PyFloat_FromDouble(x);
743             break;
744         }
745 
746 #ifndef WITHOUT_COMPLEX
747     case TYPE_COMPLEX:
748         {
749             char buf[256];
750             Py_complex c;
751             n = r_byte(p);
752             if (n == EOF || r_string(buf, n, p) != n) {
753                 PyErr_SetString(PyExc_EOFError,
754                     "EOF read where object expected");
755                 retval = NULL;
756                 break;
757             }
758             buf[n] = '\0';
759             c.real = PyOS_string_to_double(buf, NULL, NULL);
760             if (c.real == -1.0 && PyErr_Occurred()) {
761                 retval = NULL;
762                 break;
763             }
764             n = r_byte(p);
765             if (n == EOF || r_string(buf, n, p) != n) {
766                 PyErr_SetString(PyExc_EOFError,
767                     "EOF read where object expected");
768                 retval = NULL;
769                 break;
770             }
771             buf[n] = '\0';
772             c.imag = PyOS_string_to_double(buf, NULL, NULL);
773             if (c.imag == -1.0 && PyErr_Occurred()) {
774                 retval = NULL;
775                 break;
776             }
777             retval = PyComplex_FromCComplex(c);
778             break;
779         }
780 
781     case TYPE_BINARY_COMPLEX:
782         {
783             unsigned char buf[8];
784             Py_complex c;
785             if (r_string((char*)buf, 8, p) != 8) {
786                 PyErr_SetString(PyExc_EOFError,
787                     "EOF read where object expected");
788                 retval = NULL;
789                 break;
790             }
791             c.real = _PyFloat_Unpack8(buf, 1);
792             if (c.real == -1.0 && PyErr_Occurred()) {
793                 retval = NULL;
794                 break;
795             }
796             if (r_string((char*)buf, 8, p) != 8) {
797                 PyErr_SetString(PyExc_EOFError,
798                     "EOF read where object expected");
799                 retval = NULL;
800                 break;
801             }
802             c.imag = _PyFloat_Unpack8(buf, 1);
803             if (c.imag == -1.0 && PyErr_Occurred()) {
804                 retval = NULL;
805                 break;
806             }
807             retval = PyComplex_FromCComplex(c);
808             break;
809         }
810 #endif
811 
812     case TYPE_INTERNED:
813     case TYPE_STRING:
814         n = r_long(p);
815         if (n < 0 || n > SIZE32_MAX) {
816             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
817             retval = NULL;
818             break;
819         }
820         v = PyString_FromStringAndSize((char *)NULL, n);
821         if (v == NULL) {
822             retval = NULL;
823             break;
824         }
825         if (r_string(PyString_AS_STRING(v), n, p) != n) {
826             Py_DECREF(v);
827             PyErr_SetString(PyExc_EOFError,
828                             "EOF read where object expected");
829             retval = NULL;
830             break;
831         }
832         if (type == TYPE_INTERNED) {
833             PyString_InternInPlace(&v);
834             if (PyList_Append(p->strings, v) < 0) {
835                 retval = NULL;
836                 break;
837             }
838         }
839         retval = v;
840         break;
841 
842     case TYPE_STRINGREF:
843         n = r_long(p);
844         if (n < 0 || n >= PyList_GET_SIZE(p->strings)) {
845             PyErr_SetString(PyExc_ValueError, "bad marshal data (string ref out of range)");
846             retval = NULL;
847             break;
848         }
849         v = PyList_GET_ITEM(p->strings, n);
850         Py_INCREF(v);
851         retval = v;
852         break;
853 
854 #ifdef Py_USING_UNICODE
855     case TYPE_UNICODE:
856         {
857         char *buffer;
858 
859         n = r_long(p);
860         if (n < 0 || n > SIZE32_MAX) {
861             PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
862             retval = NULL;
863             break;
864         }
865         buffer = PyMem_NEW(char, n);
866         if (buffer == NULL) {
867             retval = PyErr_NoMemory();
868             break;
869         }
870         if (r_string(buffer, n, p) != n) {
871             PyMem_DEL(buffer);
872             PyErr_SetString(PyExc_EOFError,
873                 "EOF read where object expected");
874             retval = NULL;
875             break;
876         }
877         v = PyUnicode_DecodeUTF8(buffer, n, NULL);
878         PyMem_DEL(buffer);
879         retval = v;
880         break;
881         }
882 #endif
883 
884     case TYPE_TUPLE:
885         n = r_long(p);
886         if (n < 0 || n > SIZE32_MAX) {
887             PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
888             retval = NULL;
889             break;
890         }
891         v = PyTuple_New(n);
892         if (v == NULL) {
893             retval = NULL;
894             break;
895         }
896         for (i = 0; i < n; i++) {
897             v2 = r_object(p);
898             if ( v2 == NULL ) {
899                 if (!PyErr_Occurred())
900                     PyErr_SetString(PyExc_TypeError,
901                         "NULL object in marshal data for tuple");
902                 Py_DECREF(v);
903                 v = NULL;
904                 break;
905             }
906             PyTuple_SET_ITEM(v, i, v2);
907         }
908         retval = v;
909         break;
910 
911     case TYPE_LIST:
912         n = r_long(p);
913         if (n < 0 || n > SIZE32_MAX) {
914             PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
915             retval = NULL;
916             break;
917         }
918         v = PyList_New(n);
919         if (v == NULL) {
920             retval = NULL;
921             break;
922         }
923         for (i = 0; i < n; i++) {
924             v2 = r_object(p);
925             if ( v2 == NULL ) {
926                 if (!PyErr_Occurred())
927                     PyErr_SetString(PyExc_TypeError,
928                         "NULL object in marshal data for list");
929                 Py_DECREF(v);
930                 v = NULL;
931                 break;
932             }
933             PyList_SET_ITEM(v, i, v2);
934         }
935         retval = v;
936         break;
937 
938     case TYPE_DICT:
939         v = PyDict_New();
940         if (v == NULL) {
941             retval = NULL;
942             break;
943         }
944         for (;;) {
945             PyObject *key, *val;
946             key = r_object(p);
947             if (key == NULL)
948                 break;
949             val = r_object(p);
950             if (val != NULL)
951                 PyDict_SetItem(v, key, val);
952             Py_DECREF(key);
953             Py_XDECREF(val);
954         }
955         if (PyErr_Occurred()) {
956             Py_DECREF(v);
957             v = NULL;
958         }
959         retval = v;
960         break;
961 
962     case TYPE_SET:
963     case TYPE_FROZENSET:
964         n = r_long(p);
965         if (n < 0 || n > SIZE32_MAX) {
966             PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
967             retval = NULL;
968             break;
969         }
970         v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
971         if (v == NULL) {
972             retval = NULL;
973             break;
974         }
975         for (i = 0; i < n; i++) {
976             v2 = r_object(p);
977             if ( v2 == NULL ) {
978                 if (!PyErr_Occurred())
979                     PyErr_SetString(PyExc_TypeError,
980                         "NULL object in marshal data for set");
981                 Py_DECREF(v);
982                 v = NULL;
983                 break;
984             }
985             if (PySet_Add(v, v2) == -1) {
986                 Py_DECREF(v);
987                 Py_DECREF(v2);
988                 v = NULL;
989                 break;
990             }
991             Py_DECREF(v2);
992         }
993         retval = v;
994         break;
995 
996     case TYPE_CODE:
997         if (PyEval_GetRestricted()) {
998             PyErr_SetString(PyExc_RuntimeError,
999                 "cannot unmarshal code objects in "
1000                 "restricted execution mode");
1001             retval = NULL;
1002             break;
1003         }
1004         else {
1005             int argcount;
1006             int nlocals;
1007             int stacksize;
1008             int flags;
1009             PyObject *code = NULL;
1010             PyObject *consts = NULL;
1011             PyObject *names = NULL;
1012             PyObject *varnames = NULL;
1013             PyObject *freevars = NULL;
1014             PyObject *cellvars = NULL;
1015             PyObject *filename = NULL;
1016             PyObject *name = NULL;
1017             int firstlineno;
1018             PyObject *lnotab = NULL;
1019 
1020             v = NULL;
1021 
1022             /* XXX ignore long->int overflows for now */
1023             argcount = (int)r_long(p);
1024             nlocals = (int)r_long(p);
1025             stacksize = (int)r_long(p);
1026             flags = (int)r_long(p);
1027             code = r_object(p);
1028             if (code == NULL)
1029                 goto code_error;
1030             consts = r_object(p);
1031             if (consts == NULL)
1032                 goto code_error;
1033             names = r_object(p);
1034             if (names == NULL)
1035                 goto code_error;
1036             varnames = r_object(p);
1037             if (varnames == NULL)
1038                 goto code_error;
1039             freevars = r_object(p);
1040             if (freevars == NULL)
1041                 goto code_error;
1042             cellvars = r_object(p);
1043             if (cellvars == NULL)
1044                 goto code_error;
1045             filename = r_object(p);
1046             if (filename == NULL)
1047                 goto code_error;
1048             name = r_object(p);
1049             if (name == NULL)
1050                 goto code_error;
1051             firstlineno = (int)r_long(p);
1052             lnotab = r_object(p);
1053             if (lnotab == NULL)
1054                 goto code_error;
1055 
1056             v = (PyObject *) PyCode_New(
1057                             argcount, nlocals, stacksize, flags,
1058                             code, consts, names, varnames,
1059                             freevars, cellvars, filename, name,
1060                             firstlineno, lnotab);
1061 
1062           code_error:
1063             Py_XDECREF(code);
1064             Py_XDECREF(consts);
1065             Py_XDECREF(names);
1066             Py_XDECREF(varnames);
1067             Py_XDECREF(freevars);
1068             Py_XDECREF(cellvars);
1069             Py_XDECREF(filename);
1070             Py_XDECREF(name);
1071             Py_XDECREF(lnotab);
1072 
1073         }
1074         retval = v;
1075         break;
1076 
1077     default:
1078         /* Bogus data got written, which isn't ideal.
1079            This will let you keep working and recover. */
1080         PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1081         retval = NULL;
1082         break;
1083 
1084     }
1085     p->depth--;
1086     return retval;
1087 }
1088 
1089 static PyObject *
read_object(RFILE * p)1090 read_object(RFILE *p)
1091 {
1092     PyObject *v;
1093     if (PyErr_Occurred()) {
1094         fprintf(stderr, "XXX readobject called with exception set\n");
1095         return NULL;
1096     }
1097     v = r_object(p);
1098     if (v == NULL && !PyErr_Occurred())
1099         PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1100     return v;
1101 }
1102 
1103 int
PyMarshal_ReadShortFromFile(FILE * fp)1104 PyMarshal_ReadShortFromFile(FILE *fp)
1105 {
1106     RFILE rf;
1107     assert(fp);
1108     rf.fp = fp;
1109     rf.strings = NULL;
1110     rf.end = rf.ptr = NULL;
1111     return r_short(&rf);
1112 }
1113 
1114 long
PyMarshal_ReadLongFromFile(FILE * fp)1115 PyMarshal_ReadLongFromFile(FILE *fp)
1116 {
1117     RFILE rf;
1118     rf.fp = fp;
1119     rf.strings = NULL;
1120     rf.ptr = rf.end = NULL;
1121     return r_long(&rf);
1122 }
1123 
1124 #ifdef HAVE_FSTAT
1125 /* Return size of file in bytes; < 0 if unknown. */
1126 static off_t
getfilesize(FILE * fp)1127 getfilesize(FILE *fp)
1128 {
1129     struct stat st;
1130     if (fstat(fileno(fp), &st) != 0)
1131         return -1;
1132     else
1133         return st.st_size;
1134 }
1135 #endif
1136 
1137 /* If we can get the size of the file up-front, and it's reasonably small,
1138  * read it in one gulp and delegate to ...FromString() instead.  Much quicker
1139  * than reading a byte at a time from file; speeds .pyc imports.
1140  * CAUTION:  since this may read the entire remainder of the file, don't
1141  * call it unless you know you're done with the file.
1142  */
1143 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1144 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1145 {
1146 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1147 #define REASONABLE_FILE_LIMIT (1L << 18)
1148 #ifdef HAVE_FSTAT
1149     off_t filesize;
1150     filesize = getfilesize(fp);
1151     if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1152         char* pBuf = (char *)PyMem_MALLOC(filesize);
1153         if (pBuf != NULL) {
1154             size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1155             PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1156             PyMem_FREE(pBuf);
1157             return v;
1158         }
1159 
1160     }
1161 #endif
1162     /* We don't have fstat, or we do but the file is larger than
1163      * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1164      */
1165     return PyMarshal_ReadObjectFromFile(fp);
1166 
1167 #undef REASONABLE_FILE_LIMIT
1168 }
1169 
1170 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1171 PyMarshal_ReadObjectFromFile(FILE *fp)
1172 {
1173     RFILE rf;
1174     PyObject *result;
1175     rf.fp = fp;
1176     rf.strings = PyList_New(0);
1177     rf.depth = 0;
1178     rf.ptr = rf.end = NULL;
1179     result = r_object(&rf);
1180     Py_DECREF(rf.strings);
1181     return result;
1182 }
1183 
1184 PyObject *
PyMarshal_ReadObjectFromString(char * str,Py_ssize_t len)1185 PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len)
1186 {
1187     RFILE rf;
1188     PyObject *result;
1189     rf.fp = NULL;
1190     rf.ptr = str;
1191     rf.end = str + len;
1192     rf.strings = PyList_New(0);
1193     rf.depth = 0;
1194     result = r_object(&rf);
1195     Py_DECREF(rf.strings);
1196     return result;
1197 }
1198 
1199 static void
set_error(int error)1200 set_error(int error)
1201 {
1202     switch (error) {
1203     case WFERR_NOMEMORY:
1204         PyErr_NoMemory();
1205         break;
1206     case WFERR_UNMARSHALLABLE:
1207         PyErr_SetString(PyExc_ValueError, "unmarshallable object");
1208         break;
1209     case WFERR_NESTEDTOODEEP:
1210     default:
1211         PyErr_SetString(PyExc_ValueError,
1212             "object too deeply nested to marshal");
1213         break;
1214     }
1215 }
1216 
1217 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1218 PyMarshal_WriteObjectToString(PyObject *x, int version)
1219 {
1220     WFILE wf;
1221     wf.fp = NULL;
1222     wf.str = PyString_FromStringAndSize((char *)NULL, 50);
1223     if (wf.str == NULL)
1224         return NULL;
1225     wf.ptr = PyString_AS_STRING((PyStringObject *)wf.str);
1226     wf.end = wf.ptr + PyString_Size(wf.str);
1227     wf.error = WFERR_OK;
1228     wf.depth = 0;
1229     wf.version = version;
1230     wf.strings = (version > 0) ? PyDict_New() : NULL;
1231     w_object(x, &wf);
1232     Py_XDECREF(wf.strings);
1233     if (wf.str != NULL) {
1234         char *base = PyString_AS_STRING((PyStringObject *)wf.str);
1235         if (wf.ptr - base > PY_SSIZE_T_MAX) {
1236             Py_DECREF(wf.str);
1237             PyErr_SetString(PyExc_OverflowError,
1238                             "too much marshall data for a string");
1239             return NULL;
1240         }
1241         if (_PyString_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)))
1242             return NULL;
1243     }
1244     if (wf.error != WFERR_OK) {
1245         Py_XDECREF(wf.str);
1246         set_error(wf.error);
1247         return NULL;
1248     }
1249     return wf.str;
1250 }
1251 
1252 /* And an interface for Python programs... */
1253 
1254 static PyObject *
marshal_dump(PyObject * self,PyObject * args)1255 marshal_dump(PyObject *self, PyObject *args)
1256 {
1257     WFILE wf;
1258     PyObject *x;
1259     PyObject *f;
1260     int version = Py_MARSHAL_VERSION;
1261     if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version))
1262         return NULL;
1263     if (!PyFile_Check(f)) {
1264         PyErr_SetString(PyExc_TypeError,
1265                         "marshal.dump() 2nd arg must be file");
1266         return NULL;
1267     }
1268     wf.fp = PyFile_AsFile(f);
1269     wf.str = NULL;
1270     wf.ptr = wf.end = NULL;
1271     wf.error = WFERR_OK;
1272     wf.depth = 0;
1273     wf.strings = (version > 0) ? PyDict_New() : 0;
1274     wf.version = version;
1275     w_object(x, &wf);
1276     Py_XDECREF(wf.strings);
1277     if (wf.error != WFERR_OK) {
1278         set_error(wf.error);
1279         return NULL;
1280     }
1281     Py_INCREF(Py_None);
1282     return Py_None;
1283 }
1284 
1285 PyDoc_STRVAR(dump_doc,
1286 "dump(value, file[, version])\n\
1287 \n\
1288 Write the value on the open file. The value must be a supported type.\n\
1289 The file must be an open file object such as sys.stdout or returned by\n\
1290 open() or os.popen(). It must be opened in binary mode ('wb' or 'w+b').\n\
1291 \n\
1292 If the value has (or contains an object that has) an unsupported type, a\n\
1293 ValueError exception is raised — but garbage data will also be written\n\
1294 to the file. The object will not be properly read back by load()\n\
1295 \n\
1296 New in version 2.4: The version argument indicates the data format that\n\
1297 dump should use.");
1298 
1299 static PyObject *
marshal_load(PyObject * self,PyObject * f)1300 marshal_load(PyObject *self, PyObject *f)
1301 {
1302     RFILE rf;
1303     PyObject *result;
1304     if (!PyFile_Check(f)) {
1305         PyErr_SetString(PyExc_TypeError,
1306                         "marshal.load() arg must be file");
1307         return NULL;
1308     }
1309     rf.fp = PyFile_AsFile(f);
1310     rf.strings = PyList_New(0);
1311     rf.depth = 0;
1312     result = read_object(&rf);
1313     Py_DECREF(rf.strings);
1314     return result;
1315 }
1316 
1317 PyDoc_STRVAR(load_doc,
1318 "load(file)\n\
1319 \n\
1320 Read one value from the open file and return it. If no valid value is\n\
1321 read (e.g. because the data has a different Python version’s\n\
1322 incompatible marshal format), raise EOFError, ValueError or TypeError.\n\
1323 The file must be an open file object opened in binary mode ('rb' or\n\
1324 'r+b').\n\
1325 \n\
1326 Note: If an object containing an unsupported type was marshalled with\n\
1327 dump(), load() will substitute None for the unmarshallable type.");
1328 
1329 
1330 static PyObject *
marshal_dumps(PyObject * self,PyObject * args)1331 marshal_dumps(PyObject *self, PyObject *args)
1332 {
1333     PyObject *x;
1334     int version = Py_MARSHAL_VERSION;
1335     if (!PyArg_ParseTuple(args, "O|i:dumps", &x, &version))
1336         return NULL;
1337     return PyMarshal_WriteObjectToString(x, version);
1338 }
1339 
1340 PyDoc_STRVAR(dumps_doc,
1341 "dumps(value[, version])\n\
1342 \n\
1343 Return the string that would be written to a file by dump(value, file).\n\
1344 The value must be a supported type. Raise a ValueError exception if\n\
1345 value has (or contains an object that has) an unsupported type.\n\
1346 \n\
1347 New in version 2.4: The version argument indicates the data format that\n\
1348 dumps should use.");
1349 
1350 
1351 static PyObject *
marshal_loads(PyObject * self,PyObject * args)1352 marshal_loads(PyObject *self, PyObject *args)
1353 {
1354     RFILE rf;
1355     char *s;
1356     Py_ssize_t n;
1357     PyObject* result;
1358     if (!PyArg_ParseTuple(args, "s#:loads", &s, &n))
1359         return NULL;
1360     rf.fp = NULL;
1361     rf.ptr = s;
1362     rf.end = s + n;
1363     rf.strings = PyList_New(0);
1364     rf.depth = 0;
1365     result = read_object(&rf);
1366     Py_DECREF(rf.strings);
1367     return result;
1368 }
1369 
1370 PyDoc_STRVAR(loads_doc,
1371 "loads(string)\n\
1372 \n\
1373 Convert the string to a value. If no valid value is found, raise\n\
1374 EOFError, ValueError or TypeError. Extra characters in the string are\n\
1375 ignored.");
1376 
1377 static PyMethodDef marshal_methods[] = {
1378     {"dump",            marshal_dump,   METH_VARARGS,   dump_doc},
1379     {"load",            marshal_load,   METH_O,         load_doc},
1380     {"dumps",           marshal_dumps,  METH_VARARGS,   dumps_doc},
1381     {"loads",           marshal_loads,  METH_VARARGS,   loads_doc},
1382     {NULL,              NULL}           /* sentinel */
1383 };
1384 
1385 PyDoc_STRVAR(marshal_doc,
1386 "This module contains functions that can read and write Python values in\n\
1387 a binary format. The format is specific to Python, but independent of\n\
1388 machine architecture issues.\n\
1389 \n\
1390 Not all Python object types are supported; in general, only objects\n\
1391 whose value is independent from a particular invocation of Python can be\n\
1392 written and read by this module. The following types are supported:\n\
1393 None, integers, long integers, floating point numbers, strings, Unicode\n\
1394 objects, tuples, lists, sets, dictionaries, and code objects, where it\n\
1395 should be understood that tuples, lists and dictionaries are only\n\
1396 supported as long as the values contained therein are themselves\n\
1397 supported; and recursive lists and dictionaries should not be written\n\
1398 (they will cause infinite loops).\n\
1399 \n\
1400 Variables:\n\
1401 \n\
1402 version -- indicates the format that the module uses. Version 0 is the\n\
1403     historical format, version 1 (added in Python 2.4) shares interned\n\
1404     strings and version 2 (added in Python 2.5) uses a binary format for\n\
1405     floating point numbers. (New in version 2.4)\n\
1406 \n\
1407 Functions:\n\
1408 \n\
1409 dump() -- write value to a file\n\
1410 load() -- read value from a file\n\
1411 dumps() -- write value to a string\n\
1412 loads() -- read value from a string");
1413 
1414 
1415 PyMODINIT_FUNC
PyMarshal_Init(void)1416 PyMarshal_Init(void)
1417 {
1418     PyObject *mod = Py_InitModule3("marshal", marshal_methods,
1419         marshal_doc);
1420     if (mod == NULL)
1421         return;
1422     PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);
1423 }
1424