1
2 /* Write Python objects to files and read them back.
3 This is primarily intended for writing and reading compiled Python code,
4 even though dicts, lists, sets and frozensets, not commonly seen in
5 code objects, are supported.
6 Version 3 of this protocol properly supports circular links
7 and sharing. */
8
9 #define PY_SSIZE_T_CLEAN
10
11 #include "Python.h"
12 #include "longintrepr.h"
13 #include "code.h"
14 #include "marshal.h"
15 #include "../Modules/hashtable.h"
16
17 /*[clinic input]
18 module marshal
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c982b7930dee17db]*/
21
22 #include "clinic/marshal.c.h"
23
24 /* High water mark to determine when the marshalled object is dangerously deep
25 * and risks coring the interpreter. When the object stack gets this deep,
26 * raise an exception instead of continuing.
27 * On Windows debug builds, reduce this value.
28 *
29 * BUG: https://bugs.python.org/issue33720
30 * On Windows PGO builds, the r_object function overallocates its stack and
31 * can cause a stack overflow. We reduce the maximum depth for all Windows
32 * releases to protect against this.
33 * #if defined(MS_WINDOWS) && defined(_DEBUG)
34 */
35 #if defined(MS_WINDOWS)
36 #define MAX_MARSHAL_STACK_DEPTH 1000
37 #else
38 #define MAX_MARSHAL_STACK_DEPTH 2000
39 #endif
40
41 #define TYPE_NULL '0'
42 #define TYPE_NONE 'N'
43 #define TYPE_FALSE 'F'
44 #define TYPE_TRUE 'T'
45 #define TYPE_STOPITER 'S'
46 #define TYPE_ELLIPSIS '.'
47 #define TYPE_INT 'i'
48 /* TYPE_INT64 is not generated anymore.
49 Supported for backward compatibility only. */
50 #define TYPE_INT64 'I'
51 #define TYPE_FLOAT 'f'
52 #define TYPE_BINARY_FLOAT 'g'
53 #define TYPE_COMPLEX 'x'
54 #define TYPE_BINARY_COMPLEX 'y'
55 #define TYPE_LONG 'l'
56 #define TYPE_STRING 's'
57 #define TYPE_INTERNED 't'
58 #define TYPE_REF 'r'
59 #define TYPE_TUPLE '('
60 #define TYPE_LIST '['
61 #define TYPE_DICT '{'
62 #define TYPE_CODE 'c'
63 #define TYPE_UNICODE 'u'
64 #define TYPE_UNKNOWN '?'
65 #define TYPE_SET '<'
66 #define TYPE_FROZENSET '>'
67 #define FLAG_REF '\x80' /* with a type, add obj to index */
68
69 #define TYPE_ASCII 'a'
70 #define TYPE_ASCII_INTERNED 'A'
71 #define TYPE_SMALL_TUPLE ')'
72 #define TYPE_SHORT_ASCII 'z'
73 #define TYPE_SHORT_ASCII_INTERNED 'Z'
74
75 #define WFERR_OK 0
76 #define WFERR_UNMARSHALLABLE 1
77 #define WFERR_NESTEDTOODEEP 2
78 #define WFERR_NOMEMORY 3
79
80 typedef struct {
81 FILE *fp;
82 int error; /* see WFERR_* values */
83 int depth;
84 PyObject *str;
85 char *ptr;
86 char *end;
87 char *buf;
88 _Py_hashtable_t *hashtable;
89 int version;
90 } WFILE;
91
92 #define w_byte(c, p) do { \
93 if ((p)->ptr != (p)->end || w_reserve((p), 1)) \
94 *(p)->ptr++ = (c); \
95 } while(0)
96
97 static void
w_flush(WFILE * p)98 w_flush(WFILE *p)
99 {
100 assert(p->fp != NULL);
101 fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
102 p->ptr = p->buf;
103 }
104
105 static int
w_reserve(WFILE * p,Py_ssize_t needed)106 w_reserve(WFILE *p, Py_ssize_t needed)
107 {
108 Py_ssize_t pos, size, delta;
109 if (p->ptr == NULL)
110 return 0; /* An error already occurred */
111 if (p->fp != NULL) {
112 w_flush(p);
113 return needed <= p->end - p->ptr;
114 }
115 assert(p->str != NULL);
116 pos = p->ptr - p->buf;
117 size = PyBytes_Size(p->str);
118 if (size > 16*1024*1024)
119 delta = (size >> 3); /* 12.5% overallocation */
120 else
121 delta = size + 1024;
122 delta = Py_MAX(delta, needed);
123 if (delta > PY_SSIZE_T_MAX - size) {
124 p->error = WFERR_NOMEMORY;
125 return 0;
126 }
127 size += delta;
128 if (_PyBytes_Resize(&p->str, size) != 0) {
129 p->ptr = p->buf = p->end = NULL;
130 return 0;
131 }
132 else {
133 p->buf = PyBytes_AS_STRING(p->str);
134 p->ptr = p->buf + pos;
135 p->end = p->buf + size;
136 return 1;
137 }
138 }
139
140 static void
w_string(const char * s,Py_ssize_t n,WFILE * p)141 w_string(const char *s, Py_ssize_t n, WFILE *p)
142 {
143 Py_ssize_t m;
144 if (!n || p->ptr == NULL)
145 return;
146 m = p->end - p->ptr;
147 if (p->fp != NULL) {
148 if (n <= m) {
149 memcpy(p->ptr, s, n);
150 p->ptr += n;
151 }
152 else {
153 w_flush(p);
154 fwrite(s, 1, n, p->fp);
155 }
156 }
157 else {
158 if (n <= m || w_reserve(p, n - m)) {
159 memcpy(p->ptr, s, n);
160 p->ptr += n;
161 }
162 }
163 }
164
165 static void
w_short(int x,WFILE * p)166 w_short(int x, WFILE *p)
167 {
168 w_byte((char)( x & 0xff), p);
169 w_byte((char)((x>> 8) & 0xff), p);
170 }
171
172 static void
w_long(long x,WFILE * p)173 w_long(long x, WFILE *p)
174 {
175 w_byte((char)( x & 0xff), p);
176 w_byte((char)((x>> 8) & 0xff), p);
177 w_byte((char)((x>>16) & 0xff), p);
178 w_byte((char)((x>>24) & 0xff), p);
179 }
180
181 #define SIZE32_MAX 0x7FFFFFFF
182
183 #if SIZEOF_SIZE_T > 4
184 # define W_SIZE(n, p) do { \
185 if ((n) > SIZE32_MAX) { \
186 (p)->depth--; \
187 (p)->error = WFERR_UNMARSHALLABLE; \
188 return; \
189 } \
190 w_long((long)(n), p); \
191 } while(0)
192 #else
193 # define W_SIZE w_long
194 #endif
195
196 static void
w_pstring(const char * s,Py_ssize_t n,WFILE * p)197 w_pstring(const char *s, Py_ssize_t n, WFILE *p)
198 {
199 W_SIZE(n, p);
200 w_string(s, n, p);
201 }
202
203 static void
w_short_pstring(const char * s,Py_ssize_t n,WFILE * p)204 w_short_pstring(const char *s, Py_ssize_t n, WFILE *p)
205 {
206 w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
207 w_string(s, n, p);
208 }
209
210 /* We assume that Python ints are stored internally in base some power of
211 2**15; for the sake of portability we'll always read and write them in base
212 exactly 2**15. */
213
214 #define PyLong_MARSHAL_SHIFT 15
215 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
216 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
217 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
218 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
219 #endif
220 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
221
222 #define W_TYPE(t, p) do { \
223 w_byte((t) | flag, (p)); \
224 } while(0)
225
226 static void
w_PyLong(const PyLongObject * ob,char flag,WFILE * p)227 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
228 {
229 Py_ssize_t i, j, n, l;
230 digit d;
231
232 W_TYPE(TYPE_LONG, p);
233 if (Py_SIZE(ob) == 0) {
234 w_long((long)0, p);
235 return;
236 }
237
238 /* set l to number of base PyLong_MARSHAL_BASE digits */
239 n = Py_ABS(Py_SIZE(ob));
240 l = (n-1) * PyLong_MARSHAL_RATIO;
241 d = ob->ob_digit[n-1];
242 assert(d != 0); /* a PyLong is always normalized */
243 do {
244 d >>= PyLong_MARSHAL_SHIFT;
245 l++;
246 } while (d != 0);
247 if (l > SIZE32_MAX) {
248 p->depth--;
249 p->error = WFERR_UNMARSHALLABLE;
250 return;
251 }
252 w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
253
254 for (i=0; i < n-1; i++) {
255 d = ob->ob_digit[i];
256 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
257 w_short(d & PyLong_MARSHAL_MASK, p);
258 d >>= PyLong_MARSHAL_SHIFT;
259 }
260 assert (d == 0);
261 }
262 d = ob->ob_digit[n-1];
263 do {
264 w_short(d & PyLong_MARSHAL_MASK, p);
265 d >>= PyLong_MARSHAL_SHIFT;
266 } while (d != 0);
267 }
268
269 static void
w_float_bin(double v,WFILE * p)270 w_float_bin(double v, WFILE *p)
271 {
272 unsigned char buf[8];
273 if (_PyFloat_Pack8(v, buf, 1) < 0) {
274 p->error = WFERR_UNMARSHALLABLE;
275 return;
276 }
277 w_string((const char *)buf, 8, p);
278 }
279
280 static void
w_float_str(double v,WFILE * p)281 w_float_str(double v, WFILE *p)
282 {
283 int n;
284 char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
285 if (!buf) {
286 p->error = WFERR_NOMEMORY;
287 return;
288 }
289 n = (int)strlen(buf);
290 w_byte(n, p);
291 w_string(buf, n, p);
292 PyMem_Free(buf);
293 }
294
295 static int
w_ref(PyObject * v,char * flag,WFILE * p)296 w_ref(PyObject *v, char *flag, WFILE *p)
297 {
298 _Py_hashtable_entry_t *entry;
299 int w;
300
301 if (p->version < 3 || p->hashtable == NULL)
302 return 0; /* not writing object references */
303
304 /* if it has only one reference, it definitely isn't shared */
305 if (Py_REFCNT(v) == 1)
306 return 0;
307
308 entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
309 if (entry != NULL) {
310 /* write the reference index to the stream */
311 _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w);
312 /* we don't store "long" indices in the dict */
313 assert(0 <= w && w <= 0x7fffffff);
314 w_byte(TYPE_REF, p);
315 w_long(w, p);
316 return 1;
317 } else {
318 size_t s = p->hashtable->entries;
319 /* we don't support long indices */
320 if (s >= 0x7fffffff) {
321 PyErr_SetString(PyExc_ValueError, "too many objects");
322 goto err;
323 }
324 w = (int)s;
325 Py_INCREF(v);
326 if (_Py_HASHTABLE_SET(p->hashtable, v, w) < 0) {
327 Py_DECREF(v);
328 goto err;
329 }
330 *flag |= FLAG_REF;
331 return 0;
332 }
333 err:
334 p->error = WFERR_UNMARSHALLABLE;
335 return 1;
336 }
337
338 static void
339 w_complex_object(PyObject *v, char flag, WFILE *p);
340
341 static void
w_object(PyObject * v,WFILE * p)342 w_object(PyObject *v, WFILE *p)
343 {
344 char flag = '\0';
345
346 p->depth++;
347
348 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
349 p->error = WFERR_NESTEDTOODEEP;
350 }
351 else if (v == NULL) {
352 w_byte(TYPE_NULL, p);
353 }
354 else if (v == Py_None) {
355 w_byte(TYPE_NONE, p);
356 }
357 else if (v == PyExc_StopIteration) {
358 w_byte(TYPE_STOPITER, p);
359 }
360 else if (v == Py_Ellipsis) {
361 w_byte(TYPE_ELLIPSIS, p);
362 }
363 else if (v == Py_False) {
364 w_byte(TYPE_FALSE, p);
365 }
366 else if (v == Py_True) {
367 w_byte(TYPE_TRUE, p);
368 }
369 else if (!w_ref(v, &flag, p))
370 w_complex_object(v, flag, p);
371
372 p->depth--;
373 }
374
375 static void
w_complex_object(PyObject * v,char flag,WFILE * p)376 w_complex_object(PyObject *v, char flag, WFILE *p)
377 {
378 Py_ssize_t i, n;
379
380 if (PyLong_CheckExact(v)) {
381 long x = PyLong_AsLong(v);
382 if ((x == -1) && PyErr_Occurred()) {
383 PyLongObject *ob = (PyLongObject *)v;
384 PyErr_Clear();
385 w_PyLong(ob, flag, p);
386 }
387 else {
388 #if SIZEOF_LONG > 4
389 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
390 if (y && y != -1) {
391 /* Too large for TYPE_INT */
392 w_PyLong((PyLongObject*)v, flag, p);
393 }
394 else
395 #endif
396 {
397 W_TYPE(TYPE_INT, p);
398 w_long(x, p);
399 }
400 }
401 }
402 else if (PyFloat_CheckExact(v)) {
403 if (p->version > 1) {
404 W_TYPE(TYPE_BINARY_FLOAT, p);
405 w_float_bin(PyFloat_AS_DOUBLE(v), p);
406 }
407 else {
408 W_TYPE(TYPE_FLOAT, p);
409 w_float_str(PyFloat_AS_DOUBLE(v), p);
410 }
411 }
412 else if (PyComplex_CheckExact(v)) {
413 if (p->version > 1) {
414 W_TYPE(TYPE_BINARY_COMPLEX, p);
415 w_float_bin(PyComplex_RealAsDouble(v), p);
416 w_float_bin(PyComplex_ImagAsDouble(v), p);
417 }
418 else {
419 W_TYPE(TYPE_COMPLEX, p);
420 w_float_str(PyComplex_RealAsDouble(v), p);
421 w_float_str(PyComplex_ImagAsDouble(v), p);
422 }
423 }
424 else if (PyBytes_CheckExact(v)) {
425 W_TYPE(TYPE_STRING, p);
426 w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
427 }
428 else if (PyUnicode_CheckExact(v)) {
429 if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
430 int is_short = PyUnicode_GET_LENGTH(v) < 256;
431 if (is_short) {
432 if (PyUnicode_CHECK_INTERNED(v))
433 W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
434 else
435 W_TYPE(TYPE_SHORT_ASCII, p);
436 w_short_pstring((char *) PyUnicode_1BYTE_DATA(v),
437 PyUnicode_GET_LENGTH(v), p);
438 }
439 else {
440 if (PyUnicode_CHECK_INTERNED(v))
441 W_TYPE(TYPE_ASCII_INTERNED, p);
442 else
443 W_TYPE(TYPE_ASCII, p);
444 w_pstring((char *) PyUnicode_1BYTE_DATA(v),
445 PyUnicode_GET_LENGTH(v), p);
446 }
447 }
448 else {
449 PyObject *utf8;
450 utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
451 if (utf8 == NULL) {
452 p->depth--;
453 p->error = WFERR_UNMARSHALLABLE;
454 return;
455 }
456 if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v))
457 W_TYPE(TYPE_INTERNED, p);
458 else
459 W_TYPE(TYPE_UNICODE, p);
460 w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
461 Py_DECREF(utf8);
462 }
463 }
464 else if (PyTuple_CheckExact(v)) {
465 n = PyTuple_Size(v);
466 if (p->version >= 4 && n < 256) {
467 W_TYPE(TYPE_SMALL_TUPLE, p);
468 w_byte((unsigned char)n, p);
469 }
470 else {
471 W_TYPE(TYPE_TUPLE, p);
472 W_SIZE(n, p);
473 }
474 for (i = 0; i < n; i++) {
475 w_object(PyTuple_GET_ITEM(v, i), p);
476 }
477 }
478 else if (PyList_CheckExact(v)) {
479 W_TYPE(TYPE_LIST, p);
480 n = PyList_GET_SIZE(v);
481 W_SIZE(n, p);
482 for (i = 0; i < n; i++) {
483 w_object(PyList_GET_ITEM(v, i), p);
484 }
485 }
486 else if (PyDict_CheckExact(v)) {
487 Py_ssize_t pos;
488 PyObject *key, *value;
489 W_TYPE(TYPE_DICT, p);
490 /* This one is NULL object terminated! */
491 pos = 0;
492 while (PyDict_Next(v, &pos, &key, &value)) {
493 w_object(key, p);
494 w_object(value, p);
495 }
496 w_object((PyObject *)NULL, p);
497 }
498 else if (PyAnySet_CheckExact(v)) {
499 PyObject *value, *it;
500
501 if (PyObject_TypeCheck(v, &PySet_Type))
502 W_TYPE(TYPE_SET, p);
503 else
504 W_TYPE(TYPE_FROZENSET, p);
505 n = PyObject_Size(v);
506 if (n == -1) {
507 p->depth--;
508 p->error = WFERR_UNMARSHALLABLE;
509 return;
510 }
511 W_SIZE(n, p);
512 it = PyObject_GetIter(v);
513 if (it == NULL) {
514 p->depth--;
515 p->error = WFERR_UNMARSHALLABLE;
516 return;
517 }
518 while ((value = PyIter_Next(it)) != NULL) {
519 w_object(value, p);
520 Py_DECREF(value);
521 }
522 Py_DECREF(it);
523 if (PyErr_Occurred()) {
524 p->depth--;
525 p->error = WFERR_UNMARSHALLABLE;
526 return;
527 }
528 }
529 else if (PyCode_Check(v)) {
530 PyCodeObject *co = (PyCodeObject *)v;
531 W_TYPE(TYPE_CODE, p);
532 w_long(co->co_argcount, p);
533 w_long(co->co_posonlyargcount, p);
534 w_long(co->co_kwonlyargcount, p);
535 w_long(co->co_nlocals, p);
536 w_long(co->co_stacksize, p);
537 w_long(co->co_flags, p);
538 w_object(co->co_code, p);
539 w_object(co->co_consts, p);
540 w_object(co->co_names, p);
541 w_object(co->co_varnames, p);
542 w_object(co->co_freevars, p);
543 w_object(co->co_cellvars, p);
544 w_object(co->co_filename, p);
545 w_object(co->co_name, p);
546 w_long(co->co_firstlineno, p);
547 w_object(co->co_lnotab, p);
548 }
549 else if (PyObject_CheckBuffer(v)) {
550 /* Write unknown bytes-like objects as a bytes object */
551 Py_buffer view;
552 if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
553 w_byte(TYPE_UNKNOWN, p);
554 p->depth--;
555 p->error = WFERR_UNMARSHALLABLE;
556 return;
557 }
558 W_TYPE(TYPE_STRING, p);
559 w_pstring(view.buf, view.len, p);
560 PyBuffer_Release(&view);
561 }
562 else {
563 W_TYPE(TYPE_UNKNOWN, p);
564 p->error = WFERR_UNMARSHALLABLE;
565 }
566 }
567
568 static int
w_init_refs(WFILE * wf,int version)569 w_init_refs(WFILE *wf, int version)
570 {
571 if (version >= 3) {
572 wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
573 _Py_hashtable_hash_ptr,
574 _Py_hashtable_compare_direct);
575 if (wf->hashtable == NULL) {
576 PyErr_NoMemory();
577 return -1;
578 }
579 }
580 return 0;
581 }
582
583 static int
w_decref_entry(_Py_hashtable_t * ht,_Py_hashtable_entry_t * entry,void * Py_UNUSED (data))584 w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
585 void *Py_UNUSED(data))
586 {
587 PyObject *entry_key;
588
589 _Py_HASHTABLE_ENTRY_READ_KEY(ht, entry, entry_key);
590 Py_XDECREF(entry_key);
591 return 0;
592 }
593
594 static void
w_clear_refs(WFILE * wf)595 w_clear_refs(WFILE *wf)
596 {
597 if (wf->hashtable != NULL) {
598 _Py_hashtable_foreach(wf->hashtable, w_decref_entry, NULL);
599 _Py_hashtable_destroy(wf->hashtable);
600 }
601 }
602
603 /* version currently has no effect for writing ints. */
604 void
PyMarshal_WriteLongToFile(long x,FILE * fp,int version)605 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
606 {
607 char buf[4];
608 WFILE wf;
609 memset(&wf, 0, sizeof(wf));
610 wf.fp = fp;
611 wf.ptr = wf.buf = buf;
612 wf.end = wf.ptr + sizeof(buf);
613 wf.error = WFERR_OK;
614 wf.version = version;
615 w_long(x, &wf);
616 w_flush(&wf);
617 }
618
619 void
PyMarshal_WriteObjectToFile(PyObject * x,FILE * fp,int version)620 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
621 {
622 char buf[BUFSIZ];
623 WFILE wf;
624 memset(&wf, 0, sizeof(wf));
625 wf.fp = fp;
626 wf.ptr = wf.buf = buf;
627 wf.end = wf.ptr + sizeof(buf);
628 wf.error = WFERR_OK;
629 wf.version = version;
630 if (w_init_refs(&wf, version))
631 return; /* caller mush check PyErr_Occurred() */
632 w_object(x, &wf);
633 w_clear_refs(&wf);
634 w_flush(&wf);
635 }
636
637 typedef struct {
638 FILE *fp;
639 int depth;
640 PyObject *readable; /* Stream-like object being read from */
641 char *ptr;
642 char *end;
643 char *buf;
644 Py_ssize_t buf_size;
645 PyObject *refs; /* a list */
646 } RFILE;
647
648 static const char *
r_string(Py_ssize_t n,RFILE * p)649 r_string(Py_ssize_t n, RFILE *p)
650 {
651 Py_ssize_t read = -1;
652
653 if (p->ptr != NULL) {
654 /* Fast path for loads() */
655 char *res = p->ptr;
656 Py_ssize_t left = p->end - p->ptr;
657 if (left < n) {
658 PyErr_SetString(PyExc_EOFError,
659 "marshal data too short");
660 return NULL;
661 }
662 p->ptr += n;
663 return res;
664 }
665 if (p->buf == NULL) {
666 p->buf = PyMem_MALLOC(n);
667 if (p->buf == NULL) {
668 PyErr_NoMemory();
669 return NULL;
670 }
671 p->buf_size = n;
672 }
673 else if (p->buf_size < n) {
674 char *tmp = PyMem_REALLOC(p->buf, n);
675 if (tmp == NULL) {
676 PyErr_NoMemory();
677 return NULL;
678 }
679 p->buf = tmp;
680 p->buf_size = n;
681 }
682
683 if (!p->readable) {
684 assert(p->fp != NULL);
685 read = fread(p->buf, 1, n, p->fp);
686 }
687 else {
688 _Py_IDENTIFIER(readinto);
689 PyObject *res, *mview;
690 Py_buffer buf;
691
692 if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
693 return NULL;
694 mview = PyMemoryView_FromBuffer(&buf);
695 if (mview == NULL)
696 return NULL;
697
698 res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
699 if (res != NULL) {
700 read = PyNumber_AsSsize_t(res, PyExc_ValueError);
701 Py_DECREF(res);
702 }
703 }
704 if (read != n) {
705 if (!PyErr_Occurred()) {
706 if (read > n)
707 PyErr_Format(PyExc_ValueError,
708 "read() returned too much data: "
709 "%zd bytes requested, %zd returned",
710 n, read);
711 else
712 PyErr_SetString(PyExc_EOFError,
713 "EOF read where not expected");
714 }
715 return NULL;
716 }
717 return p->buf;
718 }
719
720 static int
r_byte(RFILE * p)721 r_byte(RFILE *p)
722 {
723 int c = EOF;
724
725 if (p->ptr != NULL) {
726 if (p->ptr < p->end)
727 c = (unsigned char) *p->ptr++;
728 return c;
729 }
730 if (!p->readable) {
731 assert(p->fp);
732 c = getc(p->fp);
733 }
734 else {
735 const char *ptr = r_string(1, p);
736 if (ptr != NULL)
737 c = *(const unsigned char *) ptr;
738 }
739 return c;
740 }
741
742 static int
r_short(RFILE * p)743 r_short(RFILE *p)
744 {
745 short x = -1;
746 const unsigned char *buffer;
747
748 buffer = (const unsigned char *) r_string(2, p);
749 if (buffer != NULL) {
750 x = buffer[0];
751 x |= buffer[1] << 8;
752 /* Sign-extension, in case short greater than 16 bits */
753 x |= -(x & 0x8000);
754 }
755 return x;
756 }
757
758 static long
r_long(RFILE * p)759 r_long(RFILE *p)
760 {
761 long x = -1;
762 const unsigned char *buffer;
763
764 buffer = (const unsigned char *) r_string(4, p);
765 if (buffer != NULL) {
766 x = buffer[0];
767 x |= (long)buffer[1] << 8;
768 x |= (long)buffer[2] << 16;
769 x |= (long)buffer[3] << 24;
770 #if SIZEOF_LONG > 4
771 /* Sign extension for 64-bit machines */
772 x |= -(x & 0x80000000L);
773 #endif
774 }
775 return x;
776 }
777
778 /* r_long64 deals with the TYPE_INT64 code. */
779 static PyObject *
r_long64(RFILE * p)780 r_long64(RFILE *p)
781 {
782 const unsigned char *buffer = (const unsigned char *) r_string(8, p);
783 if (buffer == NULL) {
784 return NULL;
785 }
786 return _PyLong_FromByteArray(buffer, 8,
787 1 /* little endian */,
788 1 /* signed */);
789 }
790
791 static PyObject *
r_PyLong(RFILE * p)792 r_PyLong(RFILE *p)
793 {
794 PyLongObject *ob;
795 long n, size, i;
796 int j, md, shorts_in_top_digit;
797 digit d;
798
799 n = r_long(p);
800 if (PyErr_Occurred())
801 return NULL;
802 if (n == 0)
803 return (PyObject *)_PyLong_New(0);
804 if (n < -SIZE32_MAX || n > SIZE32_MAX) {
805 PyErr_SetString(PyExc_ValueError,
806 "bad marshal data (long size out of range)");
807 return NULL;
808 }
809
810 size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
811 shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
812 ob = _PyLong_New(size);
813 if (ob == NULL)
814 return NULL;
815
816 Py_SIZE(ob) = n > 0 ? size : -size;
817
818 for (i = 0; i < size-1; i++) {
819 d = 0;
820 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
821 md = r_short(p);
822 if (PyErr_Occurred()) {
823 Py_DECREF(ob);
824 return NULL;
825 }
826 if (md < 0 || md > PyLong_MARSHAL_BASE)
827 goto bad_digit;
828 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
829 }
830 ob->ob_digit[i] = d;
831 }
832
833 d = 0;
834 for (j=0; j < shorts_in_top_digit; j++) {
835 md = r_short(p);
836 if (PyErr_Occurred()) {
837 Py_DECREF(ob);
838 return NULL;
839 }
840 if (md < 0 || md > PyLong_MARSHAL_BASE)
841 goto bad_digit;
842 /* topmost marshal digit should be nonzero */
843 if (md == 0 && j == shorts_in_top_digit - 1) {
844 Py_DECREF(ob);
845 PyErr_SetString(PyExc_ValueError,
846 "bad marshal data (unnormalized long data)");
847 return NULL;
848 }
849 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
850 }
851 if (PyErr_Occurred()) {
852 Py_DECREF(ob);
853 return NULL;
854 }
855 /* top digit should be nonzero, else the resulting PyLong won't be
856 normalized */
857 ob->ob_digit[size-1] = d;
858 return (PyObject *)ob;
859 bad_digit:
860 Py_DECREF(ob);
861 PyErr_SetString(PyExc_ValueError,
862 "bad marshal data (digit out of range in long)");
863 return NULL;
864 }
865
866 static double
r_float_bin(RFILE * p)867 r_float_bin(RFILE *p)
868 {
869 const unsigned char *buf = (const unsigned char *) r_string(8, p);
870 if (buf == NULL)
871 return -1;
872 return _PyFloat_Unpack8(buf, 1);
873 }
874
875 /* Issue #33720: Disable inlining for reducing the C stack consumption
876 on PGO builds. */
877 _Py_NO_INLINE static double
r_float_str(RFILE * p)878 r_float_str(RFILE *p)
879 {
880 int n;
881 char buf[256];
882 const char *ptr;
883 n = r_byte(p);
884 if (n == EOF) {
885 PyErr_SetString(PyExc_EOFError,
886 "EOF read where object expected");
887 return -1;
888 }
889 ptr = r_string(n, p);
890 if (ptr == NULL) {
891 return -1;
892 }
893 memcpy(buf, ptr, n);
894 buf[n] = '\0';
895 return PyOS_string_to_double(buf, NULL, NULL);
896 }
897
898 /* allocate the reflist index for a new object. Return -1 on failure */
899 static Py_ssize_t
r_ref_reserve(int flag,RFILE * p)900 r_ref_reserve(int flag, RFILE *p)
901 {
902 if (flag) { /* currently only FLAG_REF is defined */
903 Py_ssize_t idx = PyList_GET_SIZE(p->refs);
904 if (idx >= 0x7ffffffe) {
905 PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
906 return -1;
907 }
908 if (PyList_Append(p->refs, Py_None) < 0)
909 return -1;
910 return idx;
911 } else
912 return 0;
913 }
914
915 /* insert the new object 'o' to the reflist at previously
916 * allocated index 'idx'.
917 * 'o' can be NULL, in which case nothing is done.
918 * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
919 * if 'o' was non-NULL, and the function fails, 'o' is released and
920 * NULL returned. This simplifies error checking at the call site since
921 * a single test for NULL for the function result is enough.
922 */
923 static PyObject *
r_ref_insert(PyObject * o,Py_ssize_t idx,int flag,RFILE * p)924 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
925 {
926 if (o != NULL && flag) { /* currently only FLAG_REF is defined */
927 PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
928 Py_INCREF(o);
929 PyList_SET_ITEM(p->refs, idx, o);
930 Py_DECREF(tmp);
931 }
932 return o;
933 }
934
935 /* combination of both above, used when an object can be
936 * created whenever it is seen in the file, as opposed to
937 * after having loaded its sub-objects.
938 */
939 static PyObject *
r_ref(PyObject * o,int flag,RFILE * p)940 r_ref(PyObject *o, int flag, RFILE *p)
941 {
942 assert(flag & FLAG_REF);
943 if (o == NULL)
944 return NULL;
945 if (PyList_Append(p->refs, o) < 0) {
946 Py_DECREF(o); /* release the new object */
947 return NULL;
948 }
949 return o;
950 }
951
952 static PyObject *
r_object(RFILE * p)953 r_object(RFILE *p)
954 {
955 /* NULL is a valid return value, it does not necessarily means that
956 an exception is set. */
957 PyObject *v, *v2;
958 Py_ssize_t idx = 0;
959 long i, n;
960 int type, code = r_byte(p);
961 int flag, is_interned = 0;
962 PyObject *retval = NULL;
963
964 if (code == EOF) {
965 PyErr_SetString(PyExc_EOFError,
966 "EOF read where object expected");
967 return NULL;
968 }
969
970 p->depth++;
971
972 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
973 p->depth--;
974 PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
975 return NULL;
976 }
977
978 flag = code & FLAG_REF;
979 type = code & ~FLAG_REF;
980
981 #define R_REF(O) do{\
982 if (flag) \
983 O = r_ref(O, flag, p);\
984 } while (0)
985
986 switch (type) {
987
988 case TYPE_NULL:
989 break;
990
991 case TYPE_NONE:
992 Py_INCREF(Py_None);
993 retval = Py_None;
994 break;
995
996 case TYPE_STOPITER:
997 Py_INCREF(PyExc_StopIteration);
998 retval = PyExc_StopIteration;
999 break;
1000
1001 case TYPE_ELLIPSIS:
1002 Py_INCREF(Py_Ellipsis);
1003 retval = Py_Ellipsis;
1004 break;
1005
1006 case TYPE_FALSE:
1007 Py_INCREF(Py_False);
1008 retval = Py_False;
1009 break;
1010
1011 case TYPE_TRUE:
1012 Py_INCREF(Py_True);
1013 retval = Py_True;
1014 break;
1015
1016 case TYPE_INT:
1017 n = r_long(p);
1018 retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
1019 R_REF(retval);
1020 break;
1021
1022 case TYPE_INT64:
1023 retval = r_long64(p);
1024 R_REF(retval);
1025 break;
1026
1027 case TYPE_LONG:
1028 retval = r_PyLong(p);
1029 R_REF(retval);
1030 break;
1031
1032 case TYPE_FLOAT:
1033 {
1034 double x = r_float_str(p);
1035 if (x == -1.0 && PyErr_Occurred())
1036 break;
1037 retval = PyFloat_FromDouble(x);
1038 R_REF(retval);
1039 break;
1040 }
1041
1042 case TYPE_BINARY_FLOAT:
1043 {
1044 double x = r_float_bin(p);
1045 if (x == -1.0 && PyErr_Occurred())
1046 break;
1047 retval = PyFloat_FromDouble(x);
1048 R_REF(retval);
1049 break;
1050 }
1051
1052 case TYPE_COMPLEX:
1053 {
1054 Py_complex c;
1055 c.real = r_float_str(p);
1056 if (c.real == -1.0 && PyErr_Occurred())
1057 break;
1058 c.imag = r_float_str(p);
1059 if (c.imag == -1.0 && PyErr_Occurred())
1060 break;
1061 retval = PyComplex_FromCComplex(c);
1062 R_REF(retval);
1063 break;
1064 }
1065
1066 case TYPE_BINARY_COMPLEX:
1067 {
1068 Py_complex c;
1069 c.real = r_float_bin(p);
1070 if (c.real == -1.0 && PyErr_Occurred())
1071 break;
1072 c.imag = r_float_bin(p);
1073 if (c.imag == -1.0 && PyErr_Occurred())
1074 break;
1075 retval = PyComplex_FromCComplex(c);
1076 R_REF(retval);
1077 break;
1078 }
1079
1080 case TYPE_STRING:
1081 {
1082 const char *ptr;
1083 n = r_long(p);
1084 if (PyErr_Occurred())
1085 break;
1086 if (n < 0 || n > SIZE32_MAX) {
1087 PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1088 break;
1089 }
1090 v = PyBytes_FromStringAndSize((char *)NULL, n);
1091 if (v == NULL)
1092 break;
1093 ptr = r_string(n, p);
1094 if (ptr == NULL) {
1095 Py_DECREF(v);
1096 break;
1097 }
1098 memcpy(PyBytes_AS_STRING(v), ptr, n);
1099 retval = v;
1100 R_REF(retval);
1101 break;
1102 }
1103
1104 case TYPE_ASCII_INTERNED:
1105 is_interned = 1;
1106 /* fall through */
1107 case TYPE_ASCII:
1108 n = r_long(p);
1109 if (PyErr_Occurred())
1110 break;
1111 if (n < 0 || n > SIZE32_MAX) {
1112 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1113 break;
1114 }
1115 goto _read_ascii;
1116
1117 case TYPE_SHORT_ASCII_INTERNED:
1118 is_interned = 1;
1119 /* fall through */
1120 case TYPE_SHORT_ASCII:
1121 n = r_byte(p);
1122 if (n == EOF) {
1123 PyErr_SetString(PyExc_EOFError,
1124 "EOF read where object expected");
1125 break;
1126 }
1127 _read_ascii:
1128 {
1129 const char *ptr;
1130 ptr = r_string(n, p);
1131 if (ptr == NULL)
1132 break;
1133 v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
1134 if (v == NULL)
1135 break;
1136 if (is_interned)
1137 PyUnicode_InternInPlace(&v);
1138 retval = v;
1139 R_REF(retval);
1140 break;
1141 }
1142
1143 case TYPE_INTERNED:
1144 is_interned = 1;
1145 /* fall through */
1146 case TYPE_UNICODE:
1147 {
1148 const char *buffer;
1149
1150 n = r_long(p);
1151 if (PyErr_Occurred())
1152 break;
1153 if (n < 0 || n > SIZE32_MAX) {
1154 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1155 break;
1156 }
1157 if (n != 0) {
1158 buffer = r_string(n, p);
1159 if (buffer == NULL)
1160 break;
1161 v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
1162 }
1163 else {
1164 v = PyUnicode_New(0, 0);
1165 }
1166 if (v == NULL)
1167 break;
1168 if (is_interned)
1169 PyUnicode_InternInPlace(&v);
1170 retval = v;
1171 R_REF(retval);
1172 break;
1173 }
1174
1175 case TYPE_SMALL_TUPLE:
1176 n = (unsigned char) r_byte(p);
1177 if (PyErr_Occurred())
1178 break;
1179 goto _read_tuple;
1180 case TYPE_TUPLE:
1181 n = r_long(p);
1182 if (PyErr_Occurred())
1183 break;
1184 if (n < 0 || n > SIZE32_MAX) {
1185 PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1186 break;
1187 }
1188 _read_tuple:
1189 v = PyTuple_New(n);
1190 R_REF(v);
1191 if (v == NULL)
1192 break;
1193
1194 for (i = 0; i < n; i++) {
1195 v2 = r_object(p);
1196 if ( v2 == NULL ) {
1197 if (!PyErr_Occurred())
1198 PyErr_SetString(PyExc_TypeError,
1199 "NULL object in marshal data for tuple");
1200 Py_DECREF(v);
1201 v = NULL;
1202 break;
1203 }
1204 PyTuple_SET_ITEM(v, i, v2);
1205 }
1206 retval = v;
1207 break;
1208
1209 case TYPE_LIST:
1210 n = r_long(p);
1211 if (PyErr_Occurred())
1212 break;
1213 if (n < 0 || n > SIZE32_MAX) {
1214 PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1215 break;
1216 }
1217 v = PyList_New(n);
1218 R_REF(v);
1219 if (v == NULL)
1220 break;
1221 for (i = 0; i < n; i++) {
1222 v2 = r_object(p);
1223 if ( v2 == NULL ) {
1224 if (!PyErr_Occurred())
1225 PyErr_SetString(PyExc_TypeError,
1226 "NULL object in marshal data for list");
1227 Py_DECREF(v);
1228 v = NULL;
1229 break;
1230 }
1231 PyList_SET_ITEM(v, i, v2);
1232 }
1233 retval = v;
1234 break;
1235
1236 case TYPE_DICT:
1237 v = PyDict_New();
1238 R_REF(v);
1239 if (v == NULL)
1240 break;
1241 for (;;) {
1242 PyObject *key, *val;
1243 key = r_object(p);
1244 if (key == NULL)
1245 break;
1246 val = r_object(p);
1247 if (val == NULL) {
1248 Py_DECREF(key);
1249 break;
1250 }
1251 if (PyDict_SetItem(v, key, val) < 0) {
1252 Py_DECREF(key);
1253 Py_DECREF(val);
1254 break;
1255 }
1256 Py_DECREF(key);
1257 Py_DECREF(val);
1258 }
1259 if (PyErr_Occurred()) {
1260 Py_DECREF(v);
1261 v = NULL;
1262 }
1263 retval = v;
1264 break;
1265
1266 case TYPE_SET:
1267 case TYPE_FROZENSET:
1268 n = r_long(p);
1269 if (PyErr_Occurred())
1270 break;
1271 if (n < 0 || n > SIZE32_MAX) {
1272 PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1273 break;
1274 }
1275
1276 if (n == 0 && type == TYPE_FROZENSET) {
1277 /* call frozenset() to get the empty frozenset singleton */
1278 v = _PyObject_CallNoArg((PyObject*)&PyFrozenSet_Type);
1279 if (v == NULL)
1280 break;
1281 R_REF(v);
1282 retval = v;
1283 }
1284 else {
1285 v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
1286 if (type == TYPE_SET) {
1287 R_REF(v);
1288 } else {
1289 /* must use delayed registration of frozensets because they must
1290 * be init with a refcount of 1
1291 */
1292 idx = r_ref_reserve(flag, p);
1293 if (idx < 0)
1294 Py_CLEAR(v); /* signal error */
1295 }
1296 if (v == NULL)
1297 break;
1298
1299 for (i = 0; i < n; i++) {
1300 v2 = r_object(p);
1301 if ( v2 == NULL ) {
1302 if (!PyErr_Occurred())
1303 PyErr_SetString(PyExc_TypeError,
1304 "NULL object in marshal data for set");
1305 Py_DECREF(v);
1306 v = NULL;
1307 break;
1308 }
1309 if (PySet_Add(v, v2) == -1) {
1310 Py_DECREF(v);
1311 Py_DECREF(v2);
1312 v = NULL;
1313 break;
1314 }
1315 Py_DECREF(v2);
1316 }
1317 if (type != TYPE_SET)
1318 v = r_ref_insert(v, idx, flag, p);
1319 retval = v;
1320 }
1321 break;
1322
1323 case TYPE_CODE:
1324 {
1325 int argcount;
1326 int posonlyargcount;
1327 int kwonlyargcount;
1328 int nlocals;
1329 int stacksize;
1330 int flags;
1331 PyObject *code = NULL;
1332 PyObject *consts = NULL;
1333 PyObject *names = NULL;
1334 PyObject *varnames = NULL;
1335 PyObject *freevars = NULL;
1336 PyObject *cellvars = NULL;
1337 PyObject *filename = NULL;
1338 PyObject *name = NULL;
1339 int firstlineno;
1340 PyObject *lnotab = NULL;
1341
1342 idx = r_ref_reserve(flag, p);
1343 if (idx < 0)
1344 break;
1345
1346 v = NULL;
1347
1348 /* XXX ignore long->int overflows for now */
1349 argcount = (int)r_long(p);
1350 if (PyErr_Occurred())
1351 goto code_error;
1352 posonlyargcount = (int)r_long(p);
1353 if (PyErr_Occurred()) {
1354 goto code_error;
1355 }
1356 kwonlyargcount = (int)r_long(p);
1357 if (PyErr_Occurred())
1358 goto code_error;
1359 nlocals = (int)r_long(p);
1360 if (PyErr_Occurred())
1361 goto code_error;
1362 stacksize = (int)r_long(p);
1363 if (PyErr_Occurred())
1364 goto code_error;
1365 flags = (int)r_long(p);
1366 if (PyErr_Occurred())
1367 goto code_error;
1368 code = r_object(p);
1369 if (code == NULL)
1370 goto code_error;
1371 consts = r_object(p);
1372 if (consts == NULL)
1373 goto code_error;
1374 names = r_object(p);
1375 if (names == NULL)
1376 goto code_error;
1377 varnames = r_object(p);
1378 if (varnames == NULL)
1379 goto code_error;
1380 freevars = r_object(p);
1381 if (freevars == NULL)
1382 goto code_error;
1383 cellvars = r_object(p);
1384 if (cellvars == NULL)
1385 goto code_error;
1386 filename = r_object(p);
1387 if (filename == NULL)
1388 goto code_error;
1389 name = r_object(p);
1390 if (name == NULL)
1391 goto code_error;
1392 firstlineno = (int)r_long(p);
1393 if (firstlineno == -1 && PyErr_Occurred())
1394 break;
1395 lnotab = r_object(p);
1396 if (lnotab == NULL)
1397 goto code_error;
1398
1399 if (PySys_Audit("code.__new__", "OOOiiiiii",
1400 code, filename, name, argcount, posonlyargcount,
1401 kwonlyargcount, nlocals, stacksize, flags) < 0) {
1402 goto code_error;
1403 }
1404
1405 v = (PyObject *) PyCode_NewWithPosOnlyArgs(
1406 argcount, posonlyargcount, kwonlyargcount,
1407 nlocals, stacksize, flags,
1408 code, consts, names, varnames,
1409 freevars, cellvars, filename, name,
1410 firstlineno, lnotab);
1411 v = r_ref_insert(v, idx, flag, p);
1412
1413 code_error:
1414 Py_XDECREF(code);
1415 Py_XDECREF(consts);
1416 Py_XDECREF(names);
1417 Py_XDECREF(varnames);
1418 Py_XDECREF(freevars);
1419 Py_XDECREF(cellvars);
1420 Py_XDECREF(filename);
1421 Py_XDECREF(name);
1422 Py_XDECREF(lnotab);
1423 }
1424 retval = v;
1425 break;
1426
1427 case TYPE_REF:
1428 n = r_long(p);
1429 if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1430 if (n == -1 && PyErr_Occurred())
1431 break;
1432 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1433 break;
1434 }
1435 v = PyList_GET_ITEM(p->refs, n);
1436 if (v == Py_None) {
1437 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1438 break;
1439 }
1440 Py_INCREF(v);
1441 retval = v;
1442 break;
1443
1444 default:
1445 /* Bogus data got written, which isn't ideal.
1446 This will let you keep working and recover. */
1447 PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1448 break;
1449
1450 }
1451 p->depth--;
1452 return retval;
1453 }
1454
1455 static PyObject *
read_object(RFILE * p)1456 read_object(RFILE *p)
1457 {
1458 PyObject *v;
1459 if (PyErr_Occurred()) {
1460 fprintf(stderr, "XXX readobject called with exception set\n");
1461 return NULL;
1462 }
1463 v = r_object(p);
1464 if (v == NULL && !PyErr_Occurred())
1465 PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1466 return v;
1467 }
1468
1469 int
PyMarshal_ReadShortFromFile(FILE * fp)1470 PyMarshal_ReadShortFromFile(FILE *fp)
1471 {
1472 RFILE rf;
1473 int res;
1474 assert(fp);
1475 rf.readable = NULL;
1476 rf.fp = fp;
1477 rf.end = rf.ptr = NULL;
1478 rf.buf = NULL;
1479 res = r_short(&rf);
1480 if (rf.buf != NULL)
1481 PyMem_FREE(rf.buf);
1482 return res;
1483 }
1484
1485 long
PyMarshal_ReadLongFromFile(FILE * fp)1486 PyMarshal_ReadLongFromFile(FILE *fp)
1487 {
1488 RFILE rf;
1489 long res;
1490 rf.fp = fp;
1491 rf.readable = NULL;
1492 rf.ptr = rf.end = NULL;
1493 rf.buf = NULL;
1494 res = r_long(&rf);
1495 if (rf.buf != NULL)
1496 PyMem_FREE(rf.buf);
1497 return res;
1498 }
1499
1500 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
1501 static off_t
getfilesize(FILE * fp)1502 getfilesize(FILE *fp)
1503 {
1504 struct _Py_stat_struct st;
1505 if (_Py_fstat_noraise(fileno(fp), &st) != 0)
1506 return -1;
1507 #if SIZEOF_OFF_T == 4
1508 else if (st.st_size >= INT_MAX)
1509 return (off_t)INT_MAX;
1510 #endif
1511 else
1512 return (off_t)st.st_size;
1513 }
1514
1515 /* If we can get the size of the file up-front, and it's reasonably small,
1516 * read it in one gulp and delegate to ...FromString() instead. Much quicker
1517 * than reading a byte at a time from file; speeds .pyc imports.
1518 * CAUTION: since this may read the entire remainder of the file, don't
1519 * call it unless you know you're done with the file.
1520 */
1521 PyObject *
PyMarshal_ReadLastObjectFromFile(FILE * fp)1522 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1523 {
1524 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
1525 #define REASONABLE_FILE_LIMIT (1L << 18)
1526 off_t filesize;
1527 filesize = getfilesize(fp);
1528 if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
1529 char* pBuf = (char *)PyMem_MALLOC(filesize);
1530 if (pBuf != NULL) {
1531 size_t n = fread(pBuf, 1, (size_t)filesize, fp);
1532 PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
1533 PyMem_FREE(pBuf);
1534 return v;
1535 }
1536
1537 }
1538 /* We don't have fstat, or we do but the file is larger than
1539 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1540 */
1541 return PyMarshal_ReadObjectFromFile(fp);
1542
1543 #undef REASONABLE_FILE_LIMIT
1544 }
1545
1546 PyObject *
PyMarshal_ReadObjectFromFile(FILE * fp)1547 PyMarshal_ReadObjectFromFile(FILE *fp)
1548 {
1549 RFILE rf;
1550 PyObject *result;
1551 rf.fp = fp;
1552 rf.readable = NULL;
1553 rf.depth = 0;
1554 rf.ptr = rf.end = NULL;
1555 rf.buf = NULL;
1556 rf.refs = PyList_New(0);
1557 if (rf.refs == NULL)
1558 return NULL;
1559 result = r_object(&rf);
1560 Py_DECREF(rf.refs);
1561 if (rf.buf != NULL)
1562 PyMem_FREE(rf.buf);
1563 return result;
1564 }
1565
1566 PyObject *
PyMarshal_ReadObjectFromString(const char * str,Py_ssize_t len)1567 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
1568 {
1569 RFILE rf;
1570 PyObject *result;
1571 rf.fp = NULL;
1572 rf.readable = NULL;
1573 rf.ptr = (char *)str;
1574 rf.end = (char *)str + len;
1575 rf.buf = NULL;
1576 rf.depth = 0;
1577 rf.refs = PyList_New(0);
1578 if (rf.refs == NULL)
1579 return NULL;
1580 result = r_object(&rf);
1581 Py_DECREF(rf.refs);
1582 if (rf.buf != NULL)
1583 PyMem_FREE(rf.buf);
1584 return result;
1585 }
1586
1587 PyObject *
PyMarshal_WriteObjectToString(PyObject * x,int version)1588 PyMarshal_WriteObjectToString(PyObject *x, int version)
1589 {
1590 WFILE wf;
1591
1592 memset(&wf, 0, sizeof(wf));
1593 wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1594 if (wf.str == NULL)
1595 return NULL;
1596 wf.ptr = wf.buf = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1597 wf.end = wf.ptr + PyBytes_Size(wf.str);
1598 wf.error = WFERR_OK;
1599 wf.version = version;
1600 if (w_init_refs(&wf, version)) {
1601 Py_DECREF(wf.str);
1602 return NULL;
1603 }
1604 w_object(x, &wf);
1605 w_clear_refs(&wf);
1606 if (wf.str != NULL) {
1607 char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1608 if (wf.ptr - base > PY_SSIZE_T_MAX) {
1609 Py_DECREF(wf.str);
1610 PyErr_SetString(PyExc_OverflowError,
1611 "too much marshal data for a bytes object");
1612 return NULL;
1613 }
1614 if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1615 return NULL;
1616 }
1617 if (wf.error != WFERR_OK) {
1618 Py_XDECREF(wf.str);
1619 if (wf.error == WFERR_NOMEMORY)
1620 PyErr_NoMemory();
1621 else
1622 PyErr_SetString(PyExc_ValueError,
1623 (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1624 :"object too deeply nested to marshal");
1625 return NULL;
1626 }
1627 return wf.str;
1628 }
1629
1630 /* And an interface for Python programs... */
1631 /*[clinic input]
1632 marshal.dump
1633
1634 value: object
1635 Must be a supported type.
1636 file: object
1637 Must be a writeable binary file.
1638 version: int(c_default="Py_MARSHAL_VERSION") = version
1639 Indicates the data format that dump should use.
1640 /
1641
1642 Write the value on the open file.
1643
1644 If the value has (or contains an object that has) an unsupported type, a
1645 ValueError exception is raised - but garbage data will also be written
1646 to the file. The object will not be properly read back by load().
1647 [clinic start generated code]*/
1648
1649 static PyObject *
marshal_dump_impl(PyObject * module,PyObject * value,PyObject * file,int version)1650 marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
1651 int version)
1652 /*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
1653 {
1654 /* XXX Quick hack -- need to do this differently */
1655 PyObject *s;
1656 PyObject *res;
1657 _Py_IDENTIFIER(write);
1658
1659 s = PyMarshal_WriteObjectToString(value, version);
1660 if (s == NULL)
1661 return NULL;
1662 res = _PyObject_CallMethodIdObjArgs(file, &PyId_write, s, NULL);
1663 Py_DECREF(s);
1664 return res;
1665 }
1666
1667 /*[clinic input]
1668 marshal.load
1669
1670 file: object
1671 Must be readable binary file.
1672 /
1673
1674 Read one value from the open file and return it.
1675
1676 If no valid value is read (e.g. because the data has a different Python
1677 version's incompatible marshal format), raise EOFError, ValueError or
1678 TypeError.
1679
1680 Note: If an object containing an unsupported type was marshalled with
1681 dump(), load() will substitute None for the unmarshallable type.
1682 [clinic start generated code]*/
1683
1684 static PyObject *
marshal_load(PyObject * module,PyObject * file)1685 marshal_load(PyObject *module, PyObject *file)
1686 /*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
1687 {
1688 PyObject *data, *result;
1689 _Py_IDENTIFIER(read);
1690 RFILE rf;
1691
1692 /*
1693 * Make a call to the read method, but read zero bytes.
1694 * This is to ensure that the object passed in at least
1695 * has a read method which returns bytes.
1696 * This can be removed if we guarantee good error handling
1697 * for r_string()
1698 */
1699 data = _PyObject_CallMethodId(file, &PyId_read, "i", 0);
1700 if (data == NULL)
1701 return NULL;
1702 if (!PyBytes_Check(data)) {
1703 PyErr_Format(PyExc_TypeError,
1704 "file.read() returned not bytes but %.100s",
1705 data->ob_type->tp_name);
1706 result = NULL;
1707 }
1708 else {
1709 rf.depth = 0;
1710 rf.fp = NULL;
1711 rf.readable = file;
1712 rf.ptr = rf.end = NULL;
1713 rf.buf = NULL;
1714 if ((rf.refs = PyList_New(0)) != NULL) {
1715 result = read_object(&rf);
1716 Py_DECREF(rf.refs);
1717 if (rf.buf != NULL)
1718 PyMem_FREE(rf.buf);
1719 } else
1720 result = NULL;
1721 }
1722 Py_DECREF(data);
1723 return result;
1724 }
1725
1726 /*[clinic input]
1727 marshal.dumps
1728
1729 value: object
1730 Must be a supported type.
1731 version: int(c_default="Py_MARSHAL_VERSION") = version
1732 Indicates the data format that dumps should use.
1733 /
1734
1735 Return the bytes object that would be written to a file by dump(value, file).
1736
1737 Raise a ValueError exception if value has (or contains an object that has) an
1738 unsupported type.
1739 [clinic start generated code]*/
1740
1741 static PyObject *
marshal_dumps_impl(PyObject * module,PyObject * value,int version)1742 marshal_dumps_impl(PyObject *module, PyObject *value, int version)
1743 /*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
1744 {
1745 return PyMarshal_WriteObjectToString(value, version);
1746 }
1747
1748 /*[clinic input]
1749 marshal.loads
1750
1751 bytes: Py_buffer
1752 /
1753
1754 Convert the bytes-like object to a value.
1755
1756 If no valid value is found, raise EOFError, ValueError or TypeError. Extra
1757 bytes in the input are ignored.
1758 [clinic start generated code]*/
1759
1760 static PyObject *
marshal_loads_impl(PyObject * module,Py_buffer * bytes)1761 marshal_loads_impl(PyObject *module, Py_buffer *bytes)
1762 /*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
1763 {
1764 RFILE rf;
1765 char *s = bytes->buf;
1766 Py_ssize_t n = bytes->len;
1767 PyObject* result;
1768 rf.fp = NULL;
1769 rf.readable = NULL;
1770 rf.ptr = s;
1771 rf.end = s + n;
1772 rf.depth = 0;
1773 if ((rf.refs = PyList_New(0)) == NULL)
1774 return NULL;
1775 result = read_object(&rf);
1776 Py_DECREF(rf.refs);
1777 return result;
1778 }
1779
1780 static PyMethodDef marshal_methods[] = {
1781 MARSHAL_DUMP_METHODDEF
1782 MARSHAL_LOAD_METHODDEF
1783 MARSHAL_DUMPS_METHODDEF
1784 MARSHAL_LOADS_METHODDEF
1785 {NULL, NULL} /* sentinel */
1786 };
1787
1788
1789 PyDoc_STRVAR(module_doc,
1790 "This module contains functions that can read and write Python values in\n\
1791 a binary format. The format is specific to Python, but independent of\n\
1792 machine architecture issues.\n\
1793 \n\
1794 Not all Python object types are supported; in general, only objects\n\
1795 whose value is independent from a particular invocation of Python can be\n\
1796 written and read by this module. The following types are supported:\n\
1797 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1798 tuples, lists, sets, dictionaries, and code objects, where it\n\
1799 should be understood that tuples, lists and dictionaries are only\n\
1800 supported as long as the values contained therein are themselves\n\
1801 supported; and recursive lists and dictionaries should not be written\n\
1802 (they will cause infinite loops).\n\
1803 \n\
1804 Variables:\n\
1805 \n\
1806 version -- indicates the format that the module uses. Version 0 is the\n\
1807 historical format, version 1 shares interned strings and version 2\n\
1808 uses a binary format for floating point numbers.\n\
1809 Version 3 shares common object references (New in version 3.4).\n\
1810 \n\
1811 Functions:\n\
1812 \n\
1813 dump() -- write value to a file\n\
1814 load() -- read value from a file\n\
1815 dumps() -- marshal value as a bytes object\n\
1816 loads() -- read value from a bytes-like object");
1817
1818
1819
1820 static struct PyModuleDef marshalmodule = {
1821 PyModuleDef_HEAD_INIT,
1822 "marshal",
1823 module_doc,
1824 0,
1825 marshal_methods,
1826 NULL,
1827 NULL,
1828 NULL,
1829 NULL
1830 };
1831
1832 PyMODINIT_FUNC
PyMarshal_Init(void)1833 PyMarshal_Init(void)
1834 {
1835 PyObject *mod = PyModule_Create(&marshalmodule);
1836 if (mod == NULL)
1837 return NULL;
1838 if (PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION) < 0) {
1839 Py_DECREF(mod);
1840 return NULL;
1841 }
1842 return mod;
1843 }
1844