1 /* -*-  c-basic-offset: 4 -*-
2  *
3  *  Fast store and retrieve mechanism.
4  *
5  *  Copyright (c) 1995-2000, Raphael Manfredi
6  *  Copyright (c) 2016, 2017 cPanel Inc
7  *  Copyright (c) 2017 Reini Urban
8  *
9  *  You may redistribute only under the same terms as Perl 5, as specified
10  *  in the README file that comes with the distribution.
11  *
12  */
13 
14 #define PERL_NO_GET_CONTEXT     /* we want efficiency */
15 #include <EXTERN.h>
16 #include <perl.h>
17 #include <XSUB.h>
18 
19 #ifndef PERL_VERSION_LT
20 # if !defined(PERL_VERSION) || !defined(PERL_REVISION) || ( PERL_REVISION == 5 && ( PERL_VERSION < 10 || (PERL_VERSION == 10 && PERL_SUBVERSION < 1) ) )
21 #   define NEED_PL_parser
22 #   define NEED_sv_2pv_flags
23 #   define NEED_load_module
24 #   define NEED_vload_module
25 #   define NEED_newCONSTSUB
26 #   define NEED_newSVpvn_flags
27 #   define NEED_newRV_noinc
28 # endif
29 #include "ppport.h"             /* handle old perls */
30 #endif
31 
32 #ifdef DEBUGGING
33 #define DEBUGME /* Debug mode, turns assertions on as well */
34 #define DASSERT /* Assertion mode */
35 #endif
36 
37 /*
38  * Pre PerlIO time when none of USE_PERLIO and PERLIO_IS_STDIO is defined
39  * Provide them with the necessary defines so they can build with pre-5.004.
40  */
41 #ifndef USE_PERLIO
42 #ifndef PERLIO_IS_STDIO
43 #define PerlIO FILE
44 #define PerlIO_getc(x) getc(x)
45 #define PerlIO_putc(f,x) putc(x,f)
46 #define PerlIO_read(x,y,z) fread(y,1,z,x)
47 #define PerlIO_write(x,y,z) fwrite(y,1,z,x)
48 #define PerlIO_stdoutf printf
49 #endif	/* PERLIO_IS_STDIO */
50 #endif	/* USE_PERLIO */
51 
52 /*
53  * Earlier versions of perl might be used, we can't assume they have the latest!
54  */
55 
56 #ifndef HvSHAREKEYS_off
57 #define HvSHAREKEYS_off(hv)	/* Ignore */
58 #endif
59 
60 /* perl <= 5.8.2 needs this */
61 #ifndef SvIsCOW
62 # define SvIsCOW(sv) 0
63 #endif
64 
65 #ifndef HvRITER_set
66 #  define HvRITER_set(hv,r)	(HvRITER(hv) = r)
67 #endif
68 #ifndef HvEITER_set
69 #  define HvEITER_set(hv,r)	(HvEITER(hv) = r)
70 #endif
71 
72 #ifndef HvRITER_get
73 #  define HvRITER_get           HvRITER
74 #endif
75 #ifndef HvEITER_get
76 #  define HvEITER_get           HvEITER
77 #endif
78 
79 #ifndef HvPLACEHOLDERS_get
80 #  define HvPLACEHOLDERS_get    HvPLACEHOLDERS
81 #endif
82 
83 #ifndef HvTOTALKEYS
84 #  define HvTOTALKEYS(hv)	HvKEYS(hv)
85 #endif
86 /* 5.6 */
87 #ifndef HvUSEDKEYS
88 #  define HvUSEDKEYS(hv)	HvKEYS(hv)
89 #endif
90 
91 #ifdef SVf_IsCOW
92 #  define SvTRULYREADONLY(sv)	SvREADONLY(sv)
93 #else
94 #  define SvTRULYREADONLY(sv)	(SvREADONLY(sv) && !SvIsCOW(sv))
95 #endif
96 
97 #ifndef SvPVCLEAR
98 #  define SvPVCLEAR(sv) sv_setpvs(sv, "")
99 #endif
100 
101 #ifndef strEQc
102 #  define strEQc(s,c) memEQ(s, ("" c ""), sizeof(c))
103 #endif
104 
105 #if defined(HAS_FLOCK) || defined(FCNTL_CAN_LOCK) && defined(HAS_LOCKF)
106 #define CAN_FLOCK &PL_sv_yes
107 #else
108 #define CAN_FLOCK &PL_sv_no
109 #endif
110 
111 #ifdef DEBUGME
112 
113 #ifndef DASSERT
114 #define DASSERT
115 #endif
116 
117 /*
118  * TRACEME() will only output things when the $Storable::DEBUGME is true,
119  * using the value traceme cached in the context.
120  *
121  *
122  * TRACEMED() directly looks at the variable, for use before traceme has been
123  * updated.
124  */
125 
126 #define TRACEME(x)                                            \
127     STMT_START {					      \
128         if (cxt->traceme)				      \
129             { PerlIO_stdoutf x; PerlIO_stdoutf("\n"); }       \
130     } STMT_END
131 
132 #define TRACEMED(x)                                           \
133     STMT_START {                                              \
134         if (SvTRUE(get_sv("Storable::DEBUGME", GV_ADD)))      \
135             { PerlIO_stdoutf x; PerlIO_stdoutf("\n"); }       \
136     } STMT_END
137 
138 #define INIT_TRACEME							\
139     STMT_START {							\
140 	cxt->traceme = SvTRUE(get_sv("Storable::DEBUGME", GV_ADD));	\
141     } STMT_END
142 
143 #else
144 #define TRACEME(x)
145 #define TRACEMED(x)
146 #define INIT_TRACEME
147 #endif	/* DEBUGME */
148 
149 #ifdef DASSERT
150 #define ASSERT(x,y)                                              \
151     STMT_START {                                                 \
152         if (!(x)) {                                              \
153             PerlIO_stdoutf("ASSERT FAILED (\"%s\", line %d): ",  \
154                            __FILE__, (int)__LINE__);             \
155             PerlIO_stdoutf y; PerlIO_stdoutf("\n");              \
156         }                                                        \
157     } STMT_END
158 #else
159 #define ASSERT(x,y)
160 #endif
161 
162 /*
163  * Type markers.
164  */
165 
166 #define C(x) ((char) (x))	/* For markers with dynamic retrieval handling */
167 
168 #define SX_OBJECT	C(0)	/* Already stored object */
169 #define SX_LSCALAR	C(1)	/* Scalar (large binary) follows (length, data) */
170 #define SX_ARRAY	C(2)	/* Array forthcoming (size, item list) */
171 #define SX_HASH		C(3)	/* Hash forthcoming (size, key/value pair list) */
172 #define SX_REF		C(4)	/* Reference to object forthcoming */
173 #define SX_UNDEF	C(5)	/* Undefined scalar */
174 #define SX_INTEGER	C(6)	/* Integer forthcoming */
175 #define SX_DOUBLE	C(7)	/* Double forthcoming */
176 #define SX_BYTE		C(8)	/* (signed) byte forthcoming */
177 #define SX_NETINT	C(9)	/* Integer in network order forthcoming */
178 #define SX_SCALAR	C(10)	/* Scalar (binary, small) follows (length, data) */
179 #define SX_TIED_ARRAY	C(11)	/* Tied array forthcoming */
180 #define SX_TIED_HASH	C(12)	/* Tied hash forthcoming */
181 #define SX_TIED_SCALAR	C(13)	/* Tied scalar forthcoming */
182 #define SX_SV_UNDEF	C(14)	/* Perl's immortal PL_sv_undef */
183 #define SX_SV_YES	C(15)	/* Perl's immortal PL_sv_yes */
184 #define SX_SV_NO	C(16)	/* Perl's immortal PL_sv_no */
185 #define SX_BLESS	C(17)	/* Object is blessed */
186 #define SX_IX_BLESS	C(18)	/* Object is blessed, classname given by index */
187 #define SX_HOOK		C(19)	/* Stored via hook, user-defined */
188 #define SX_OVERLOAD	C(20)	/* Overloaded reference */
189 #define SX_TIED_KEY	C(21)	/* Tied magic key forthcoming */
190 #define SX_TIED_IDX	C(22)	/* Tied magic index forthcoming */
191 #define SX_UTF8STR	C(23)	/* UTF-8 string forthcoming (small) */
192 #define SX_LUTF8STR	C(24)	/* UTF-8 string forthcoming (large) */
193 #define SX_FLAG_HASH	C(25)	/* Hash with flags forthcoming (size, flags, key/flags/value triplet list) */
194 #define SX_CODE         C(26)   /* Code references as perl source code */
195 #define SX_WEAKREF	C(27)	/* Weak reference to object forthcoming */
196 #define SX_WEAKOVERLOAD	C(28)	/* Overloaded weak reference */
197 #define SX_VSTRING	C(29)	/* vstring forthcoming (small) */
198 #define SX_LVSTRING	C(30)	/* vstring forthcoming (large) */
199 #define SX_SVUNDEF_ELEM	C(31)	/* array element set to &PL_sv_undef */
200 #define SX_REGEXP	C(32)	/* Regexp */
201 #define SX_LOBJECT	C(33)	/* Large object: string, array or hash (size >2G) */
202 #define SX_LAST		C(34)	/* invalid. marker only */
203 
204 /*
205  * Those are only used to retrieve "old" pre-0.6 binary images.
206  */
207 #define SX_ITEM		'i'	/* An array item introducer */
208 #define SX_IT_UNDEF	'I'	/* Undefined array item */
209 #define SX_KEY		'k'	/* A hash key introducer */
210 #define SX_VALUE	'v'	/* A hash value introducer */
211 #define SX_VL_UNDEF	'V'	/* Undefined hash value */
212 
213 /*
214  * Those are only used to retrieve "old" pre-0.7 binary images
215  */
216 
217 #define SX_CLASS	'b'	/* Object is blessed, class name length <255 */
218 #define SX_LG_CLASS	'B'	/* Object is blessed, class name length >255 */
219 #define SX_STORED	'X'	/* End of object */
220 
221 /*
222  * Limits between short/long length representation.
223  */
224 
225 #define LG_SCALAR	255	/* Large scalar length limit */
226 #define LG_BLESS	127	/* Large classname bless limit */
227 
228 /*
229  * Operation types
230  */
231 
232 #define ST_STORE	0x1	/* Store operation */
233 #define ST_RETRIEVE	0x2	/* Retrieval operation */
234 #define ST_CLONE	0x4	/* Deep cloning operation */
235 
236 /*
237  * The following structure is used for hash table key retrieval. Since, when
238  * retrieving objects, we'll be facing blessed hash references, it's best
239  * to pre-allocate that buffer once and resize it as the need arises, never
240  * freeing it (keys will be saved away someplace else anyway, so even large
241  * keys are not enough a motivation to reclaim that space).
242  *
243  * This structure is also used for memory store/retrieve operations which
244  * happen in a fixed place before being malloc'ed elsewhere if persistence
245  * is required. Hence the aptr pointer.
246  */
247 struct extendable {
248     char *arena;	/* Will hold hash key strings, resized as needed */
249     STRLEN asiz;	/* Size of aforementioned buffer */
250     char *aptr;		/* Arena pointer, for in-place read/write ops */
251     char *aend;		/* First invalid address */
252 };
253 
254 /*
255  * At store time:
256  * A hash table records the objects which have already been stored.
257  * Those are referred to as SX_OBJECT in the file, and their "tag" (i.e.
258  * an arbitrary sequence number) is used to identify them.
259  *
260  * At retrieve time:
261  * An array table records the objects which have already been retrieved,
262  * as seen by the tag determined by counting the objects themselves. The
263  * reference to that retrieved object is kept in the table, and is returned
264  * when an SX_OBJECT is found bearing that same tag.
265  *
266  * The same processing is used to record "classname" for blessed objects:
267  * indexing by a hash at store time, and via an array at retrieve time.
268  */
269 
270 typedef unsigned long stag_t;	/* Used by pre-0.6 binary format */
271 
272 /*
273  * Make the tag type 64-bit on 64-bit platforms.
274  *
275  * If the tag number is low enough it's stored as a 32-bit value, but
276  * with very large arrays and hashes it's possible to go over 2**32
277  * scalars.
278  */
279 
280 typedef STRLEN ntag_t;
281 
282 /* used for where_is_undef - marks an unset value */
283 #define UNSET_NTAG_T (~(ntag_t)0)
284 
285 /*
286  * The following "thread-safe" related defines were contributed by
287  * Murray Nesbitt <murray@activestate.com> and integrated by RAM, who
288  * only renamed things a little bit to ensure consistency with surrounding
289  * code.	-- RAM, 14/09/1999
290  *
291  * The original patch suffered from the fact that the stcxt_t structure
292  * was global.  Murray tried to minimize the impact on the code as much as
293  * possible.
294  *
295  * Starting with 0.7, Storable can be re-entrant, via the STORABLE_xxx hooks
296  * on objects.  Therefore, the notion of context needs to be generalized,
297  * threading or not.
298  */
299 
300 #define MY_VERSION "Storable(" XS_VERSION ")"
301 
302 
303 /*
304  * Conditional UTF8 support.
305  *
306  */
307 #ifdef SvUTF8_on
308 #define STORE_UTF8STR(pv, len)	STORE_PV_LEN(pv, len, SX_UTF8STR, SX_LUTF8STR)
309 #define HAS_UTF8_SCALARS
310 #ifdef HeKUTF8
311 #define HAS_UTF8_HASHES
312 #define HAS_UTF8_ALL
313 #else
314 /* 5.6 perl has utf8 scalars but not hashes */
315 #endif
316 #else
317 #define SvUTF8(sv) 0
318 #define STORE_UTF8STR(pv, len) CROAK(("panic: storing UTF8 in non-UTF8 perl"))
319 #endif
320 #ifndef HAS_UTF8_ALL
321 #define UTF8_CROAK() CROAK(("Cannot retrieve UTF8 data in non-UTF8 perl"))
322 #endif
323 #ifndef SvWEAKREF
324 #define WEAKREF_CROAK() CROAK(("Cannot retrieve weak references in this perl"))
325 #endif
326 #ifndef SvVOK
327 #define VSTRING_CROAK() CROAK(("Cannot retrieve vstring in this perl"))
328 #endif
329 
330 #ifdef HvPLACEHOLDERS
331 #define HAS_RESTRICTED_HASHES
332 #else
333 #define HVhek_PLACEHOLD	0x200
334 #define RESTRICTED_HASH_CROAK() CROAK(("Cannot retrieve restricted hash"))
335 #endif
336 
337 #ifdef HvHASKFLAGS
338 #define HAS_HASH_KEY_FLAGS
339 #endif
340 
341 #ifdef ptr_table_new
342 #define USE_PTR_TABLE
343 #endif
344 
345 /* do we need/want to clear padding on NVs? */
346 #if defined(LONG_DOUBLEKIND) && defined(USE_LONG_DOUBLE)
347 #  if LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN || \
348       LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN
349 #    define NV_PADDING (NVSIZE - 10)
350 #  else
351 #    define NV_PADDING 0
352 #  endif
353 #else
354 /* This is kind of a guess - it means we'll get an unneeded clear on 128-bit NV
355    but an upgraded perl will fix that
356 */
357 #  if NVSIZE > 8
358 #    define NV_CLEAR
359 #  endif
360 #  define NV_PADDING 0
361 #endif
362 
363 typedef union {
364     NV nv;
365     U8 bytes[sizeof(NV)];
366 } NV_bytes;
367 
368 /* Needed for 32bit with lengths > 2G - 4G, and 64bit */
369 #if PTRSIZE > 4
370 #define HAS_U64
371 #endif
372 
373 /*
374  * Fields s_tainted and s_dirty are prefixed with s_ because Perl's include
375  * files remap tainted and dirty when threading is enabled.  That's bad for
376  * perl to remap such common words.	-- RAM, 29/09/00
377  */
378 
379 struct stcxt;
380 typedef struct stcxt {
381     int entry;		/* flags recursion */
382     int optype;		/* type of traversal operation */
383     /* which objects have been seen, store time.
384        tags are numbers, which are cast to (SV *) and stored directly */
385 #ifdef USE_PTR_TABLE
386     /* use pseen if we have ptr_tables. We have to store tag+1, because
387        tag numbers start at 0, and we can't store (SV *) 0 in a ptr_table
388        without it being confused for a fetch lookup failure.  */
389     struct ptr_tbl *pseen;
390     /* Still need hseen for the 0.6 file format code. */
391 #endif
392     HV *hseen;
393     AV *hook_seen;		/* which SVs were returned by STORABLE_freeze() */
394     AV *aseen;			/* which objects have been seen, retrieve time */
395     ntag_t where_is_undef;		/* index in aseen of PL_sv_undef */
396     HV *hclass;			/* which classnames have been seen, store time */
397     AV *aclass;			/* which classnames have been seen, retrieve time */
398     HV *hook;			/* cache for hook methods per class name */
399     IV tagnum;			/* incremented at store time for each seen object */
400     IV classnum;		/* incremented at store time for each seen classname */
401     int netorder;		/* true if network order used */
402     int s_tainted;		/* true if input source is tainted, at retrieve time */
403     int forgive_me;		/* whether to be forgiving... */
404     int deparse;		/* whether to deparse code refs */
405     SV *eval;			/* whether to eval source code */
406     int canonical;		/* whether to store hashes sorted by key */
407 #ifndef HAS_RESTRICTED_HASHES
408     int derestrict;		/* whether to downgrade restricted hashes */
409 #endif
410 #ifndef HAS_UTF8_ALL
411     int use_bytes;		/* whether to bytes-ify utf8 */
412 #endif
413     int accept_future_minor;	/* croak immediately on future minor versions?  */
414     int s_dirty;		/* context is dirty due to CROAK() -- can be cleaned */
415     int membuf_ro;		/* true means membuf is read-only and msaved is rw */
416     struct extendable keybuf;	/* for hash key retrieval */
417     struct extendable membuf;	/* for memory store/retrieve operations */
418     struct extendable msaved;	/* where potentially valid mbuf is saved */
419     PerlIO *fio;		/* where I/O are performed, NULL for memory */
420     int ver_major;		/* major of version for retrieved object */
421     int ver_minor;		/* minor of version for retrieved object */
422     SV *(**retrieve_vtbl)(pTHX_ struct stcxt *, const char *);	/* retrieve dispatch table */
423     SV *prev;			/* contexts chained backwards in real recursion */
424     SV *my_sv;			/* the blessed scalar who's SvPVX() I am */
425 
426     /* recur_sv:
427 
428        A hashref of hashrefs or arrayref of arrayrefs is actually a
429        chain of four SVs, eg for an array ref containing an array ref:
430 
431          RV -> AV (element) -> RV -> AV
432 
433        To make this depth appear natural from a perl level we only
434        want to count this as two levels, so store_ref() stores it's RV
435        into recur_sv and store_array()/store_hash() will only count
436        that level if the AV/HV *isn't* recur_sv.
437 
438        We can't just have store_hash()/store_array() not count that
439        level, since it's possible for XS code to store an AV or HV
440        directly as an element (though perl code trying to access such
441        an object will generally croak.)
442      */
443     SV *recur_sv;               /* check only one recursive SV */
444     int in_retrieve_overloaded; /* performance hack for retrieving overloaded objects */
445     int flags;			/* controls whether to bless or tie objects */
446     IV recur_depth;        	/* avoid stack overflows RT #97526 */
447     IV max_recur_depth;        /* limit for recur_depth */
448     IV max_recur_depth_hash;   /* limit for recur_depth for hashes */
449 #ifdef DEBUGME
450     int traceme;                /* TRACEME() produces output */
451 #endif
452 } stcxt_t;
453 
454 #define RECURSION_TOO_DEEP() \
455     (cxt->max_recur_depth != -1 && ++cxt->recur_depth > cxt->max_recur_depth)
456 
457 /* There's cases where we need to check whether the hash recursion
458    limit has been reached without bumping the recursion levels, so the
459    hash check doesn't bump the depth.
460 */
461 #define RECURSION_TOO_DEEP_HASH() \
462     (cxt->max_recur_depth_hash != -1 && cxt->recur_depth > cxt->max_recur_depth_hash)
463 #define MAX_DEPTH_ERROR "Max. recursion depth with nested structures exceeded"
464 
465 static int storable_free(pTHX_ SV *sv, MAGIC* mg);
466 
467 static MGVTBL vtbl_storable = {
468     NULL, /* get */
469     NULL, /* set */
470     NULL, /* len */
471     NULL, /* clear */
472     storable_free,
473 #ifdef MGf_COPY
474     NULL, /* copy */
475 #endif
476 #ifdef MGf_DUP
477     NULL, /* dup */
478 #endif
479 #ifdef MGf_LOCAL
480     NULL /* local */
481 #endif
482 };
483 
484 /* From Digest::MD5.  */
485 #ifndef sv_magicext
486 # define sv_magicext(sv, obj, type, vtbl, name, namlen)         \
487     THX_sv_magicext(aTHX_ sv, obj, type, vtbl, name, namlen)
THX_sv_magicext(pTHX_ SV * sv,SV * obj,int type,MGVTBL const * vtbl,char const * name,I32 namlen)488 static MAGIC *THX_sv_magicext(pTHX_
489 	SV *sv, SV *obj, int type,
490 	MGVTBL const *vtbl, char const *name, I32 namlen)
491 {
492     MAGIC *mg;
493     if (obj || namlen)
494         /* exceeded intended usage of this reserve implementation */
495         return NULL;
496     Newxz(mg, 1, MAGIC);
497     mg->mg_virtual = (MGVTBL*)vtbl;
498     mg->mg_type = type;
499     mg->mg_ptr = (char *)name;
500     mg->mg_len = -1;
501     (void) SvUPGRADE(sv, SVt_PVMG);
502     mg->mg_moremagic = SvMAGIC(sv);
503     SvMAGIC_set(sv, mg);
504     SvMAGICAL_off(sv);
505     mg_magical(sv);
506     return mg;
507 }
508 #endif
509 
510 #define NEW_STORABLE_CXT_OBJ(cxt)				\
511     STMT_START {						\
512         SV *self = newSV(sizeof(stcxt_t) - 1);                  \
513         SV *my_sv = newRV_noinc(self);                          \
514         sv_magicext(self, NULL, PERL_MAGIC_ext, &vtbl_storable, NULL, 0); \
515         cxt = (stcxt_t *)SvPVX(self);                           \
516         Zero(cxt, 1, stcxt_t);                                  \
517         cxt->my_sv = my_sv;                                     \
518     } STMT_END
519 
520 #if defined(MULTIPLICITY) || defined(PERL_OBJECT) || defined(PERL_CAPI)
521 
522 #if PERL_VERSION_LT(5,4,68)
523 #define dSTCXT_SV                                               \
524     SV *perinterp_sv = get_sv(MY_VERSION, 0)
525 #else	/* >= perl5.004_68 */
526 #define dSTCXT_SV						\
527     SV *perinterp_sv = *hv_fetch(PL_modglobal,                  \
528 				 MY_VERSION, sizeof(MY_VERSION)-1, TRUE)
529 #endif	/* < perl5.004_68 */
530 
531 #define dSTCXT_PTR(T,name)					\
532     T name = ((perinterp_sv                                     \
533                && SvIOK(perinterp_sv) && SvIVX(perinterp_sv)    \
534                ? (T)SvPVX(SvRV(INT2PTR(SV*,SvIVX(perinterp_sv)))) : (T) 0))
535 #define dSTCXT					\
536     dSTCXT_SV;                                  \
537     dSTCXT_PTR(stcxt_t *, cxt)
538 
539 #define INIT_STCXT					\
540     dSTCXT;                                             \
541     NEW_STORABLE_CXT_OBJ(cxt);                          \
542     assert(perinterp_sv);				\
543     sv_setiv(perinterp_sv, PTR2IV(cxt->my_sv))
544 
545 #define SET_STCXT(x)					\
546     STMT_START {					\
547         dSTCXT_SV;                                      \
548         sv_setiv(perinterp_sv, PTR2IV(x->my_sv));       \
549     } STMT_END
550 
551 #else /* !MULTIPLICITY && !PERL_OBJECT && !PERL_CAPI */
552 
553 static stcxt_t *Context_ptr = NULL;
554 #define dSTCXT			stcxt_t *cxt = Context_ptr
555 #define SET_STCXT(x)		Context_ptr = x
556 #define INIT_STCXT				\
557     dSTCXT;                                     \
558     NEW_STORABLE_CXT_OBJ(cxt);                  \
559     SET_STCXT(cxt)
560 
561 
562 #endif /* MULTIPLICITY || PERL_OBJECT || PERL_CAPI */
563 
564 /*
565  * KNOWN BUG:
566  *   Croaking implies a memory leak, since we don't use setjmp/longjmp
567  *   to catch the exit and free memory used during store or retrieve
568  *   operations.  This is not too difficult to fix, but I need to understand
569  *   how Perl does it, and croaking is exceptional anyway, so I lack the
570  *   motivation to do it.
571  *
572  * The current workaround is to mark the context as dirty when croaking,
573  * so that data structures can be freed whenever we renter Storable code
574  * (but only *then*: it's a workaround, not a fix).
575  *
576  * This is also imperfect, because we don't really know how far they trapped
577  * the croak(), and when we were recursing, we won't be able to clean anything
578  * but the topmost context stacked.
579  */
580 
581 #define CROAK(x)	STMT_START { cxt->s_dirty = 1; croak x; } STMT_END
582 
583 /*
584  * End of "thread-safe" related definitions.
585  */
586 
587 /*
588  * LOW_32BITS
589  *
590  * Keep only the low 32 bits of a pointer (used for tags, which are not
591  * really pointers).
592  */
593 
594 #if PTRSIZE <= 4
595 #define LOW_32BITS(x)	((I32) (x))
596 #else
597 #define LOW_32BITS(x)	((I32) ((STRLEN) (x) & 0xffffffffUL))
598 #endif
599 
600 /*
601  * PTR2TAG(x)
602  *
603  * Convert a pointer into an ntag_t.
604  */
605 
606 #define PTR2TAG(x) ((ntag_t)(x))
607 
608 #define TAG2PTR(x, type) ((y)(x))
609 
610 /*
611  * oI, oS, oC
612  *
613  * Hack for Crays, where sizeof(I32) == 8, and which are big-endians.
614  * Used in the WLEN and RLEN macros.
615  */
616 
617 #if INTSIZE > 4
618 #define oI(x)	((I32 *) ((char *) (x) + 4))
619 #define oS(x)	((x) - 4)
620 #define oL(x)	(x)
621 #define oC(x)	(x = 0)
622 #define CRAY_HACK
623 #else
624 #define oI(x)	(x)
625 #define oS(x)	(x)
626 #define oL(x)	(x)
627 #define oC(x)
628 #endif
629 
630 /*
631  * key buffer handling
632  */
633 #define kbuf	(cxt->keybuf).arena
634 #define ksiz	(cxt->keybuf).asiz
635 #define KBUFINIT()							\
636     STMT_START {							\
637         if (!kbuf) {                                                    \
638             TRACEME(("** allocating kbuf of 128 bytes"));               \
639             New(10003, kbuf, 128, char);                                \
640             ksiz = 128;                                                 \
641         }                                                               \
642     } STMT_END
643 #define KBUFCHK(x)							\
644     STMT_START {							\
645         if (x >= ksiz) {                                                \
646             if (x >= I32_MAX)                                           \
647                 CROAK(("Too large size > I32_MAX"));                    \
648             TRACEME(("** extending kbuf to %d bytes (had %d)",          \
649                      (int)(x+1), (int)ksiz));                           \
650             Renew(kbuf, x+1, char);                                     \
651             ksiz = x+1;                                                 \
652         }                                                               \
653     } STMT_END
654 
655 /*
656  * memory buffer handling
657  */
658 #define mbase	(cxt->membuf).arena
659 #define msiz	(cxt->membuf).asiz
660 #define mptr	(cxt->membuf).aptr
661 #define mend	(cxt->membuf).aend
662 
663 #define MGROW	(1 << 13)
664 #define MMASK	(MGROW - 1)
665 
666 #define round_mgrow(x)	\
667     ((STRLEN) (((STRLEN) (x) + MMASK) & ~MMASK))
668 #define trunc_int(x)	\
669     ((STRLEN) ((STRLEN) (x) & ~(sizeof(int)-1)))
670 #define int_aligned(x)	\
671     ((STRLEN)(x) == trunc_int(x))
672 
673 #define MBUF_INIT(x)							\
674     STMT_START {							\
675         if (!mbase) {                                                   \
676             TRACEME(("** allocating mbase of %d bytes", MGROW));        \
677             New(10003, mbase, (int)MGROW, char);                        \
678             msiz = (STRLEN)MGROW;                                       \
679         }                                                               \
680         mptr = mbase;                                                   \
681         if (x)                                                          \
682             mend = mbase + x;                                           \
683         else                                                            \
684             mend = mbase + msiz;                                        \
685     } STMT_END
686 
687 #define MBUF_TRUNC(x)	mptr = mbase + x
688 #define MBUF_SIZE()	(mptr - mbase)
689 
690 /*
691  * MBUF_SAVE_AND_LOAD
692  * MBUF_RESTORE
693  *
694  * Those macros are used in do_retrieve() to save the current memory
695  * buffer into cxt->msaved, before MBUF_LOAD() can be used to retrieve
696  * data from a string.
697  */
698 #define MBUF_SAVE_AND_LOAD(in)						\
699     STMT_START {							\
700         ASSERT(!cxt->membuf_ro, ("mbase not already saved"));           \
701         cxt->membuf_ro = 1;                                             \
702         TRACEME(("saving mbuf"));                                       \
703         StructCopy(&cxt->membuf, &cxt->msaved, struct extendable);      \
704         MBUF_LOAD(in);                                                  \
705     } STMT_END
706 
707 #define MBUF_RESTORE()							\
708     STMT_START {							\
709         ASSERT(cxt->membuf_ro, ("mbase is read-only"));                 \
710         cxt->membuf_ro = 0;                                             \
711         TRACEME(("restoring mbuf"));                                    \
712         StructCopy(&cxt->msaved, &cxt->membuf, struct extendable);      \
713     } STMT_END
714 
715 /*
716  * Use SvPOKp(), because SvPOK() fails on tainted scalars.
717  * See store_scalar() for other usage of this workaround.
718  */
719 #define MBUF_LOAD(v)						\
720     STMT_START {						\
721         ASSERT(cxt->membuf_ro, ("mbase is read-only"));         \
722         if (!SvPOKp(v))                                         \
723             CROAK(("Not a scalar string"));                     \
724         mptr = mbase = SvPV(v, msiz);                           \
725         mend = mbase + msiz;                                    \
726     } STMT_END
727 
728 #define MBUF_XTEND(x)						\
729     STMT_START {						\
730         STRLEN nsz = (STRLEN) round_mgrow((x)+msiz);            \
731         STRLEN offset = mptr - mbase;                           \
732         ASSERT(!cxt->membuf_ro, ("mbase is not read-only"));    \
733         TRACEME(("** extending mbase from %lu to %lu bytes (wants %lu new)", \
734                  (unsigned long)msiz, (unsigned long)nsz, (unsigned long)(x)));  \
735         Renew(mbase, nsz, char);                                \
736         msiz = nsz;                                             \
737         mptr = mbase + offset;                                  \
738         mend = mbase + nsz;                                     \
739     } STMT_END
740 
741 #define MBUF_CHK(x)				\
742     STMT_START {				\
743         if ((mptr + (x)) > mend)                \
744             MBUF_XTEND(x);                      \
745     } STMT_END
746 
747 #define MBUF_GETC(x)				\
748     STMT_START {				\
749         if (mptr < mend)                        \
750             x = (int) (unsigned char) *mptr++;  \
751         else                                    \
752             return (SV *) 0;                    \
753     } STMT_END
754 
755 #ifdef CRAY_HACK
756 #define MBUF_GETINT(x)				\
757     STMT_START {				\
758         oC(x);                                  \
759         if ((mptr + 4) <= mend) {               \
760             memcpy(oI(&x), mptr, 4);            \
761             mptr += 4;                          \
762         } else                                  \
763             return (SV *) 0;                    \
764     } STMT_END
765 #else
766 #define MBUF_GETINT(x)				\
767     STMT_START {				\
768         if ((mptr + sizeof(int)) <= mend) {     \
769             if (int_aligned(mptr))              \
770                 x = *(int *) mptr;              \
771             else                                \
772                 memcpy(&x, mptr, sizeof(int));  \
773             mptr += sizeof(int);                \
774         } else                                  \
775             return (SV *) 0;                    \
776     } STMT_END
777 #endif
778 
779 #define MBUF_READ(x,s)				\
780     STMT_START {				\
781         if ((mptr + (s)) <= mend) {             \
782             memcpy(x, mptr, s);                 \
783             mptr += s;                          \
784         } else                                  \
785             return (SV *) 0;                    \
786     } STMT_END
787 
788 #define MBUF_SAFEREAD(x,s,z)			\
789     STMT_START {				\
790         if ((mptr + (s)) <= mend) {             \
791             memcpy(x, mptr, s);                 \
792             mptr += s;                          \
793         } else {                                \
794             sv_free(z);                         \
795             return (SV *) 0;                    \
796         }                                       \
797     } STMT_END
798 
799 #define MBUF_SAFEPVREAD(x,s,z)			\
800     STMT_START {				\
801         if ((mptr + (s)) <= mend) {             \
802             memcpy(x, mptr, s);                 \
803             mptr += s;                          \
804         } else {                                \
805             Safefree(z);                        \
806             return (SV *) 0;                    \
807         }                                       \
808     } STMT_END
809 
810 #define MBUF_PUTC(c)				\
811     STMT_START {				\
812         if (mptr < mend)                        \
813             *mptr++ = (char) c;                 \
814         else {                                  \
815             MBUF_XTEND(1);                      \
816             *mptr++ = (char) c;                 \
817         }                                       \
818     } STMT_END
819 
820 #ifdef CRAY_HACK
821 #define MBUF_PUTINT(i)				\
822     STMT_START {				\
823         MBUF_CHK(4);                            \
824         memcpy(mptr, oI(&i), 4);                \
825         mptr += 4;                              \
826     } STMT_END
827 #else
828 #define MBUF_PUTINT(i) 				\
829     STMT_START {				\
830         MBUF_CHK(sizeof(int));                  \
831         if (int_aligned(mptr))                  \
832             *(int *) mptr = i;                  \
833         else                                    \
834             memcpy(mptr, &i, sizeof(int));      \
835         mptr += sizeof(int);                    \
836     } STMT_END
837 #endif
838 
839 #define MBUF_PUTLONG(l)				\
840     STMT_START {				\
841         MBUF_CHK(8);                            \
842         memcpy(mptr, &l, 8);                    \
843         mptr += 8;                              \
844     } STMT_END
845 #define MBUF_WRITE(x,s)				\
846     STMT_START {				\
847         MBUF_CHK(s);                            \
848         memcpy(mptr, x, s);                     \
849         mptr += s;                              \
850     } STMT_END
851 
852 /*
853  * Possible return values for sv_type().
854  */
855 
856 #define svis_REF		0
857 #define svis_SCALAR		1
858 #define svis_ARRAY		2
859 #define svis_HASH		3
860 #define svis_TIED		4
861 #define svis_TIED_ITEM		5
862 #define svis_CODE		6
863 #define svis_REGEXP		7
864 #define svis_OTHER		8
865 
866 /*
867  * Flags for SX_HOOK.
868  */
869 
870 #define SHF_TYPE_MASK		0x03
871 #define SHF_LARGE_CLASSLEN	0x04
872 #define SHF_LARGE_STRLEN	0x08
873 #define SHF_LARGE_LISTLEN	0x10
874 #define SHF_IDX_CLASSNAME	0x20
875 #define SHF_NEED_RECURSE	0x40
876 #define SHF_HAS_LIST		0x80
877 
878 /*
879  * Types for SX_HOOK (last 2 bits in flags).
880  */
881 
882 #define SHT_SCALAR		0
883 #define SHT_ARRAY		1
884 #define SHT_HASH		2
885 #define SHT_EXTRA		3	/* Read extra byte for type */
886 
887 /*
888  * The following are held in the "extra byte"...
889  */
890 
891 #define SHT_TSCALAR		4	/* 4 + 0 -- tied scalar */
892 #define SHT_TARRAY		5	/* 4 + 1 -- tied array */
893 #define SHT_THASH		6	/* 4 + 2 -- tied hash */
894 
895 /*
896  * per hash flags for flagged hashes
897  */
898 
899 #define SHV_RESTRICTED		0x01
900 
901 /*
902  * per key flags for flagged hashes
903  */
904 
905 #define SHV_K_UTF8		0x01
906 #define SHV_K_WASUTF8		0x02
907 #define SHV_K_LOCKED		0x04
908 #define SHV_K_ISSV		0x08
909 #define SHV_K_PLACEHOLDER	0x10
910 
911 /*
912  * flags to allow blessing and/or tieing data the data we load
913  */
914 #define FLAG_BLESS_OK 2
915 #define FLAG_TIE_OK   4
916 
917 /*
918  * Flags for SX_REGEXP.
919  */
920 
921 #define SHR_U32_RE_LEN		0x01
922 
923 /*
924  * Before 0.6, the magic string was "perl-store" (binary version number 0).
925  *
926  * Since 0.6 introduced many binary incompatibilities, the magic string has
927  * been changed to "pst0" to allow an old image to be properly retrieved by
928  * a newer Storable, but ensure a newer image cannot be retrieved with an
929  * older version.
930  *
931  * At 0.7, objects are given the ability to serialize themselves, and the
932  * set of markers is extended, backward compatibility is not jeopardized,
933  * so the binary version number could have remained unchanged.  To correctly
934  * spot errors if a file making use of 0.7-specific extensions is given to
935  * 0.6 for retrieval, the binary version was moved to "2".  And I'm introducing
936  * a "minor" version, to better track this kind of evolution from now on.
937  *
938  */
939 static const char old_magicstr[] = "perl-store"; /* Magic number before 0.6 */
940 static const char magicstr[] = "pst0";		 /* Used as a magic number */
941 
942 #define MAGICSTR_BYTES  'p','s','t','0'
943 #define OLDMAGICSTR_BYTES  'p','e','r','l','-','s','t','o','r','e'
944 
945 /* 5.6.x introduced the ability to have IVs as long long.
946    However, Configure still defined BYTEORDER based on the size of a long.
947    Storable uses the BYTEORDER value as part of the header, but doesn't
948    explicitly store sizeof(IV) anywhere in the header.  Hence on 5.6.x built
949    with IV as long long on a platform that uses Configure (ie most things
950    except VMS and Windows) headers are identical for the different IV sizes,
951    despite the files containing some fields based on sizeof(IV)
952    Erk. Broken-ness.
953    5.8 is consistent - the following redefinition kludge is only needed on
954    5.6.x, but the interwork is needed on 5.8 while data survives in files
955    with the 5.6 header.
956 
957 */
958 
959 #if defined (IVSIZE) && (IVSIZE == 8) && (LONGSIZE == 4)
960 #ifndef NO_56_INTERWORK_KLUDGE
961 #define USE_56_INTERWORK_KLUDGE
962 #endif
963 #if BYTEORDER == 0x1234
964 #undef BYTEORDER
965 #define BYTEORDER 0x12345678
966 #else
967 #if BYTEORDER == 0x4321
968 #undef BYTEORDER
969 #define BYTEORDER 0x87654321
970 #endif
971 #endif
972 #endif
973 
974 #if BYTEORDER == 0x1234
975 #define BYTEORDER_BYTES  '1','2','3','4'
976 #else
977 #if BYTEORDER == 0x12345678
978 #define BYTEORDER_BYTES  '1','2','3','4','5','6','7','8'
979 #ifdef USE_56_INTERWORK_KLUDGE
980 #define BYTEORDER_BYTES_56  '1','2','3','4'
981 #endif
982 #else
983 #if BYTEORDER == 0x87654321
984 #define BYTEORDER_BYTES  '8','7','6','5','4','3','2','1'
985 #ifdef USE_56_INTERWORK_KLUDGE
986 #define BYTEORDER_BYTES_56  '4','3','2','1'
987 #endif
988 #else
989 #if BYTEORDER == 0x4321
990 #define BYTEORDER_BYTES  '4','3','2','1'
991 #else
992 #error Unknown byteorder. Please append your byteorder to Storable.xs
993 #endif
994 #endif
995 #endif
996 #endif
997 
998 #ifndef INT32_MAX
999 # define INT32_MAX 2147483647
1000 #endif
1001 #if IVSIZE > 4 && !defined(INT64_MAX)
1002 # define INT64_MAX 9223372036854775807LL
1003 #endif
1004 
1005 static const char byteorderstr[] = {BYTEORDER_BYTES, 0};
1006 #ifdef USE_56_INTERWORK_KLUDGE
1007 static const char byteorderstr_56[] = {BYTEORDER_BYTES_56, 0};
1008 #endif
1009 
1010 #define STORABLE_BIN_MAJOR	2		/* Binary major "version" */
1011 #define STORABLE_BIN_MINOR	11		/* Binary minor "version" */
1012 
1013 #if PERL_VERSION_LT(5,6,0)
1014 #define STORABLE_BIN_WRITE_MINOR	4
1015 #elif !defined (SvVOK)
1016 /*
1017  * Perl 5.6.0-5.8.0 can do weak references, but not vstring magic.
1018 */
1019 #define STORABLE_BIN_WRITE_MINOR	8
1020 #elif PERL_VERSION_GE(5,19,0)
1021 /* Perl 5.19 takes away the special meaning of PL_sv_undef in arrays. */
1022 /* With 3.x we added LOBJECT */
1023 #define STORABLE_BIN_WRITE_MINOR	11
1024 #else
1025 #define STORABLE_BIN_WRITE_MINOR	9
1026 #endif /* PERL_VERSION_LT(5,6,0) */
1027 
1028 #if PERL_VERSION_LT(5,8,1)
1029 #define PL_sv_placeholder PL_sv_undef
1030 #endif
1031 
1032 /*
1033  * Useful store shortcuts...
1034  */
1035 
1036 /*
1037  * Note that if you put more than one mark for storing a particular
1038  * type of thing, *and* in the retrieve_foo() function you mark both
1039  * the thingy's you get off with SEEN(), you *must* increase the
1040  * tagnum with cxt->tagnum++ along with this macro!
1041  *     - samv 20Jan04
1042  */
1043 #define PUTMARK(x) 					\
1044     STMT_START {					\
1045         if (!cxt->fio)                                  \
1046             MBUF_PUTC(x);                               \
1047         else if (PerlIO_putc(cxt->fio, x) == EOF)       \
1048             return -1;                                  \
1049     } STMT_END
1050 
1051 #define WRITE_I32(x)						\
1052     STMT_START {						\
1053         ASSERT(sizeof(x) == sizeof(I32), ("writing an I32"));   \
1054         if (!cxt->fio)                                          \
1055             MBUF_PUTINT(x);                                     \
1056         else if (PerlIO_write(cxt->fio, oI(&x),                 \
1057                               oS(sizeof(x))) != oS(sizeof(x)))  \
1058             return -1;                                          \
1059     } STMT_END
1060 
1061 #define WRITE_U64(x)							\
1062     STMT_START {							\
1063         ASSERT(sizeof(x) == sizeof(UV), ("writing an UV"));		\
1064         if (!cxt->fio)                                                  \
1065             MBUF_PUTLONG(x);                                            \
1066         else if (PerlIO_write(cxt->fio, oL(&x),                         \
1067                               oS(sizeof(x))) != oS(sizeof(x)))          \
1068             return -1;                                                  \
1069     } STMT_END
1070 
1071 #ifdef HAS_HTONL
1072 #define WLEN(x)                                                         \
1073     STMT_START {							\
1074         ASSERT(sizeof(x) == sizeof(int), ("WLEN writing an int"));      \
1075         if (cxt->netorder) {                                            \
1076             int y = (int) htonl(x);                                     \
1077             if (!cxt->fio)                                              \
1078                 MBUF_PUTINT(y);                                         \
1079             else if (PerlIO_write(cxt->fio,oI(&y),oS(sizeof(y))) != oS(sizeof(y))) \
1080                 return -1;                                              \
1081         } else {                                                        \
1082             if (!cxt->fio)                                              \
1083                 MBUF_PUTINT(x);                                         \
1084             else if (PerlIO_write(cxt->fio,oI(&x),                      \
1085                                   oS(sizeof(x))) != oS(sizeof(x)))      \
1086                 return -1;                                              \
1087         }                                                               \
1088     } STMT_END
1089 
1090 #  ifdef HAS_U64
1091 
1092 #define W64LEN(x)							\
1093     STMT_START {							\
1094         ASSERT(sizeof(x) == 8, ("W64LEN writing a U64"));               \
1095         if (cxt->netorder) {                                            \
1096             U32 buf[2];      						\
1097             buf[1] = htonl(x & 0xffffffffUL);                           \
1098             buf[0] = htonl(x >> 32);                                    \
1099             if (!cxt->fio)                                              \
1100                 MBUF_PUTLONG(buf);                                      \
1101             else if (PerlIO_write(cxt->fio, buf,                        \
1102                                   sizeof(buf)) != sizeof(buf))          \
1103                 return -1;                                              \
1104         } else {                                                        \
1105             if (!cxt->fio)                                              \
1106                 MBUF_PUTLONG(x);                                        \
1107             else if (PerlIO_write(cxt->fio,oI(&x),                      \
1108                                   oS(sizeof(x))) != oS(sizeof(x)))      \
1109                 return -1;                                              \
1110         }                                                               \
1111     } STMT_END
1112 
1113 #  else
1114 
1115 #define W64LEN(x) CROAK(("No 64bit UVs"))
1116 
1117 #  endif
1118 
1119 #else
1120 #define WLEN(x)	WRITE_I32(x)
1121 #ifdef HAS_U64
1122 #define W64LEN(x) WRITE_U64(x)
1123 #else
1124 #define W64LEN(x) CROAK(("no 64bit UVs"))
1125 #endif
1126 #endif
1127 
1128 #define WRITE(x,y) 							\
1129     STMT_START {							\
1130         if (!cxt->fio)                                                  \
1131             MBUF_WRITE(x,y);                                            \
1132         else if (PerlIO_write(cxt->fio, x, y) != (SSize_t)y)            \
1133             return -1;                                                  \
1134     } STMT_END
1135 
1136 #define STORE_PV_LEN(pv, len, small, large)			\
1137     STMT_START {						\
1138         if (len <= LG_SCALAR) {                                 \
1139             int ilen = (int) len;                               \
1140             unsigned char clen = (unsigned char) len;           \
1141             PUTMARK(small);                                     \
1142             PUTMARK(clen);                                      \
1143             if (len)                                            \
1144                 WRITE(pv, ilen);                                \
1145         } else if (sizeof(len) > 4 && len > INT32_MAX) {        \
1146             PUTMARK(SX_LOBJECT);                                \
1147             PUTMARK(large);                                     \
1148             W64LEN(len);                                        \
1149             WRITE(pv, len);                                     \
1150         } else {                                                \
1151             int ilen = (int) len;                               \
1152             PUTMARK(large);                                     \
1153             WLEN(ilen);                                         \
1154             WRITE(pv, ilen);                                    \
1155         }                                                       \
1156     } STMT_END
1157 
1158 #define STORE_SCALAR(pv, len)	STORE_PV_LEN(pv, len, SX_SCALAR, SX_LSCALAR)
1159 
1160 /*
1161  * Store &PL_sv_undef in arrays without recursing through store().  We
1162  * actually use this to represent nonexistent elements, for historical
1163  * reasons.
1164  */
1165 #define STORE_SV_UNDEF() 					\
1166     STMT_START {                                                \
1167 	cxt->tagnum++;						\
1168 	PUTMARK(SX_SV_UNDEF);					\
1169     } STMT_END
1170 
1171 /*
1172  * Useful retrieve shortcuts...
1173  */
1174 
1175 #define GETCHAR() \
1176     (cxt->fio ? PerlIO_getc(cxt->fio)                   \
1177               : (mptr >= mend ? EOF : (int) *mptr++))
1178 
1179 #define GETMARK(x)							\
1180     STMT_START {							\
1181         if (!cxt->fio)                                                  \
1182             MBUF_GETC(x);                                               \
1183         else if ((int) (x = PerlIO_getc(cxt->fio)) == EOF)              \
1184             return (SV *) 0;                                            \
1185     } STMT_END
1186 
1187 #define READ_I32(x)							\
1188     STMT_START {							\
1189         ASSERT(sizeof(x) == sizeof(I32), ("reading an I32"));           \
1190         oC(x);                                                          \
1191         if (!cxt->fio)                                                  \
1192             MBUF_GETINT(x);                                             \
1193         else if (PerlIO_read(cxt->fio, oI(&x),                          \
1194                                  oS(sizeof(x))) != oS(sizeof(x)))       \
1195             return (SV *) 0;                                            \
1196     } STMT_END
1197 
1198 #ifdef HAS_NTOHL
1199 #define RLEN(x)                                                         \
1200     STMT_START {							\
1201         oC(x);                                                          \
1202         if (!cxt->fio)                                                  \
1203             MBUF_GETINT(x);                                             \
1204         else if (PerlIO_read(cxt->fio, oI(&x),                          \
1205                                  oS(sizeof(x))) != oS(sizeof(x)))       \
1206             return (SV *) 0;                                            \
1207         if (cxt->netorder)                                              \
1208             x = (int) ntohl(x);                                         \
1209     } STMT_END
1210 #else
1211 #define RLEN(x) READ_I32(x)
1212 #endif
1213 
1214 #define READ(x,y) 							\
1215     STMT_START {							\
1216 	if (!cxt->fio)							\
1217             MBUF_READ(x, y);                                            \
1218 	else if (PerlIO_read(cxt->fio, x, y) != (SSize_t)y)             \
1219             return (SV *) 0;                                            \
1220     } STMT_END
1221 
1222 #define SAFEREAD(x,y,z)                                                 \
1223     STMT_START {							\
1224         if (!cxt->fio)                                                  \
1225             MBUF_SAFEREAD(x,y,z);                                       \
1226         else if (PerlIO_read(cxt->fio, x, y) != (SSize_t)y) {           \
1227             sv_free(z);                                                 \
1228             return (SV *) 0;                                            \
1229         }                                                               \
1230     } STMT_END
1231 
1232 #define SAFEPVREAD(x,y,z)					\
1233     STMT_START {						\
1234         if (!cxt->fio)                                          \
1235             MBUF_SAFEPVREAD(x,y,z);                             \
1236         else if (PerlIO_read(cxt->fio, x, y) != y) {            \
1237             Safefree(z);                                        \
1238             return (SV *) 0;                                    \
1239         }                                                       \
1240     } STMT_END
1241 
1242 #ifdef HAS_U64
1243 
1244 #  if defined(HAS_NTOHL)
1245 #    define Sntohl(x) ntohl(x)
1246 #  elif BYTEORDER == 0x87654321 || BYTEORDER == 0x4321
1247 #    define Sntohl(x) (x)
1248 #  else
Sntohl(U32 x)1249 static U32 Sntohl(U32 x) {
1250     return ((x & 0xFF) << 24) + ((x * 0xFF00) << 8)
1251 	+ ((x & 0xFF0000) >> 8) + ((x & 0xFF000000) >> 24);
1252 }
1253 #  endif
1254 
1255 #  define READ_U64(x)                                                       \
1256     STMT_START {                                                          \
1257 	ASSERT(sizeof(x) == 8, ("R64LEN reading a U64"));                 \
1258 	if (cxt->netorder) {                                              \
1259 	    U32 buf[2];                                                   \
1260 	    READ((void *)buf, sizeof(buf));                               \
1261 	    (x) = ((UV)Sntohl(buf[0]) << 32) + Sntohl(buf[1]);		\
1262 	}                                                                 \
1263 	else {                                                            \
1264 	    READ(&(x), sizeof(x));                                        \
1265 	}                                                                 \
1266     } STMT_END
1267 
1268 #endif
1269 
1270 /*
1271  * SEEN() is used at retrieve time, to remember where object 'y', bearing a
1272  * given tag 'tagnum', has been retrieved. Next time we see an SX_OBJECT marker,
1273  * we'll therefore know where it has been retrieved and will be able to
1274  * share the same reference, as in the original stored memory image.
1275  *
1276  * We also need to bless objects ASAP for hooks (which may compute "ref $x"
1277  * on the objects given to STORABLE_thaw and expect that to be defined), and
1278  * also for overloaded objects (for which we might not find the stash if the
1279  * object is not blessed yet--this might occur for overloaded objects that
1280  * refer to themselves indirectly: if we blessed upon return from a sub
1281  * retrieve(), the SX_OBJECT marker we'd found could not have overloading
1282  * restored on it because the underlying object would not be blessed yet!).
1283  *
1284  * To achieve that, the class name of the last retrieved object is passed down
1285  * recursively, and the first SEEN() call for which the class name is not NULL
1286  * will bless the object.
1287  *
1288  * i should be true iff sv is immortal (ie PL_sv_yes, PL_sv_no or PL_sv_undef)
1289  *
1290  * SEEN0() is a short-cut where stash is always NULL.
1291  *
1292  * The _NN variants dont check for y being null
1293  */
1294 #define SEEN0_NN(y,i)							\
1295     STMT_START {							\
1296         if (av_store(cxt->aseen, cxt->tagnum++, i ? (SV*)(y)            \
1297                      : SvREFCNT_inc(y)) == 0)                           \
1298             return (SV *) 0;                                            \
1299         TRACEME(("aseen(#%d) = 0x%" UVxf " (refcnt=%d)",                \
1300                  (int)cxt->tagnum-1,                                    \
1301                  PTR2UV(y), (int)SvREFCNT(y)-1));                       \
1302     } STMT_END
1303 
1304 #define SEEN0(y,i)							\
1305     STMT_START {							\
1306         if (!y)                                                         \
1307             return (SV *) 0;                                            \
1308         SEEN0_NN(y,i);                                                  \
1309     } STMT_END
1310 
1311 #define SEEN_NN(y,stash,i)						\
1312     STMT_START {							\
1313         SEEN0_NN(y,i);                                                  \
1314         if (stash)							\
1315             BLESS((SV *)(y), (HV *)(stash));                            \
1316     } STMT_END
1317 
1318 #define SEEN(y,stash,i)							\
1319     STMT_START {                                                	\
1320         if (!y)                                                         \
1321             return (SV *) 0;                                            \
1322         SEEN_NN(y,stash, i);                                            \
1323     } STMT_END
1324 
1325 /*
1326  * Bless 's' in 'p', via a temporary reference, required by sv_bless().
1327  * "A" magic is added before the sv_bless for overloaded classes, this avoids
1328  * an expensive call to S_reset_amagic in sv_bless.
1329  */
1330 #define BLESS(s,stash)							\
1331     STMT_START {							\
1332         SV *ref;                                                        \
1333         if (cxt->flags & FLAG_BLESS_OK) {                               \
1334             TRACEME(("blessing 0x%" UVxf " in %s", PTR2UV(s),           \
1335                      HvNAME_get(stash)));                               \
1336             ref = newRV_noinc(s);                                       \
1337             if (cxt->in_retrieve_overloaded && Gv_AMG(stash)) {         \
1338                 cxt->in_retrieve_overloaded = 0;                        \
1339                 SvAMAGIC_on(ref);                                       \
1340             }                                                           \
1341             (void) sv_bless(ref, stash);                                \
1342             SvRV_set(ref, NULL);                                        \
1343             SvREFCNT_dec(ref);                                          \
1344         }                                                               \
1345         else {                                                          \
1346             TRACEME(("not blessing 0x%" UVxf " in %s", PTR2UV(s),       \
1347                      (HvNAME_get(stash))));                             \
1348         }                                                               \
1349     } STMT_END
1350 /*
1351  * sort (used in store_hash) - conditionally use qsort when
1352  * sortsv is not available ( <= 5.6.1 ).
1353  */
1354 
1355 #if PERL_VERSION_LT(5,7,0)
1356 
1357 #if defined(USE_ITHREADS)
1358 
1359 #define STORE_HASH_SORT						\
1360     ENTER; {                                                    \
1361         PerlInterpreter *orig_perl = PERL_GET_CONTEXT;          \
1362         SAVESPTR(orig_perl);                                    \
1363         PERL_SET_CONTEXT(aTHX);                                 \
1364         qsort((char *) AvARRAY(av), len, sizeof(SV *), sortcmp);\
1365     } LEAVE;
1366 
1367 #else /* ! USE_ITHREADS */
1368 
1369 #define STORE_HASH_SORT					\
1370     qsort((char *) AvARRAY(av), len, sizeof(SV *), sortcmp);
1371 
1372 #endif  /* USE_ITHREADS */
1373 
1374 #else /* PERL >= 5.7.0 */
1375 
1376 #define STORE_HASH_SORT \
1377     sortsv(AvARRAY(av), len, Perl_sv_cmp);
1378 
1379 #endif /* PERL_VERSION_LT(5,7,0) */
1380 
1381 static int store(pTHX_ stcxt_t *cxt, SV *sv);
1382 static SV *retrieve(pTHX_ stcxt_t *cxt, const char *cname);
1383 
1384 #define UNSEE()			\
1385     STMT_START {			\
1386         av_pop(cxt->aseen);             \
1387         cxt->tagnum--;                  \
1388     } STMT_END
1389 
1390 /*
1391  * Dynamic dispatching table for SV store.
1392  */
1393 
1394 static int store_ref(pTHX_ stcxt_t *cxt, SV *sv);
1395 static int store_scalar(pTHX_ stcxt_t *cxt, SV *sv);
1396 static int store_array(pTHX_ stcxt_t *cxt, AV *av);
1397 static int store_hash(pTHX_ stcxt_t *cxt, HV *hv);
1398 static int store_tied(pTHX_ stcxt_t *cxt, SV *sv);
1399 static int store_tied_item(pTHX_ stcxt_t *cxt, SV *sv);
1400 static int store_code(pTHX_ stcxt_t *cxt, CV *cv);
1401 static int store_regexp(pTHX_ stcxt_t *cxt, SV *sv);
1402 static int store_other(pTHX_ stcxt_t *cxt, SV *sv);
1403 static int store_blessed(pTHX_ stcxt_t *cxt, SV *sv, int type, HV *pkg);
1404 
1405 typedef int (*sv_store_t)(pTHX_ stcxt_t *cxt, SV *sv);
1406 
1407 static const sv_store_t sv_store[] = {
1408     (sv_store_t)store_ref,	/* svis_REF */
1409     (sv_store_t)store_scalar,	/* svis_SCALAR */
1410     (sv_store_t)store_array,	/* svis_ARRAY */
1411     (sv_store_t)store_hash,	/* svis_HASH */
1412     (sv_store_t)store_tied,	/* svis_TIED */
1413     (sv_store_t)store_tied_item,/* svis_TIED_ITEM */
1414     (sv_store_t)store_code,	/* svis_CODE */
1415     (sv_store_t)store_regexp,	/* svis_REGEXP */
1416     (sv_store_t)store_other,	/* svis_OTHER */
1417 };
1418 
1419 #define SV_STORE(x)	(*sv_store[x])
1420 
1421 /*
1422  * Dynamic dispatching tables for SV retrieval.
1423  */
1424 
1425 static SV *retrieve_lscalar(pTHX_ stcxt_t *cxt, const char *cname);
1426 static SV *retrieve_lutf8str(pTHX_ stcxt_t *cxt, const char *cname);
1427 static SV *old_retrieve_array(pTHX_ stcxt_t *cxt, const char *cname);
1428 static SV *old_retrieve_hash(pTHX_ stcxt_t *cxt, const char *cname);
1429 static SV *retrieve_ref(pTHX_ stcxt_t *cxt, const char *cname);
1430 static SV *retrieve_undef(pTHX_ stcxt_t *cxt, const char *cname);
1431 static SV *retrieve_integer(pTHX_ stcxt_t *cxt, const char *cname);
1432 static SV *retrieve_double(pTHX_ stcxt_t *cxt, const char *cname);
1433 static SV *retrieve_byte(pTHX_ stcxt_t *cxt, const char *cname);
1434 static SV *retrieve_netint(pTHX_ stcxt_t *cxt, const char *cname);
1435 static SV *retrieve_scalar(pTHX_ stcxt_t *cxt, const char *cname);
1436 static SV *retrieve_utf8str(pTHX_ stcxt_t *cxt, const char *cname);
1437 static SV *retrieve_tied_array(pTHX_ stcxt_t *cxt, const char *cname);
1438 static SV *retrieve_tied_hash(pTHX_ stcxt_t *cxt, const char *cname);
1439 static SV *retrieve_tied_scalar(pTHX_ stcxt_t *cxt, const char *cname);
1440 static SV *retrieve_other(pTHX_ stcxt_t *cxt, const char *cname);
1441 static SV *retrieve_lobject(pTHX_ stcxt_t *cxt, const char *cname);
1442 static SV *retrieve_regexp(pTHX_ stcxt_t *cxt, const char *cname);
1443 
1444 /* helpers for U64 lobjects */
1445 
1446 static SV *get_lstring(pTHX_ stcxt_t *cxt, UV len, int isutf8, const char *cname);
1447 #ifdef HAS_U64
1448 static SV *get_larray(pTHX_ stcxt_t *cxt, UV len, const char *cname);
1449 static SV *get_lhash(pTHX_ stcxt_t *cxt, UV len, int hash_flags, const char *cname);
1450 static int store_lhash(pTHX_ stcxt_t *cxt, HV *hv, unsigned char hash_flags);
1451 #endif
1452 static int store_hentry(pTHX_ stcxt_t *cxt, HV* hv, UV i, HE *he, unsigned char hash_flags);
1453 
1454 typedef SV* (*sv_retrieve_t)(pTHX_ stcxt_t *cxt, const char *name);
1455 
1456 static const sv_retrieve_t sv_old_retrieve[] = {
1457     0,					/* SX_OBJECT -- entry unused dynamically */
1458     (sv_retrieve_t)retrieve_lscalar,	/* SX_LSCALAR */
1459     (sv_retrieve_t)old_retrieve_array,	/* SX_ARRAY -- for pre-0.6 binaries */
1460     (sv_retrieve_t)old_retrieve_hash,	/* SX_HASH -- for pre-0.6 binaries */
1461     (sv_retrieve_t)retrieve_ref,	/* SX_REF */
1462     (sv_retrieve_t)retrieve_undef,	/* SX_UNDEF */
1463     (sv_retrieve_t)retrieve_integer,	/* SX_INTEGER */
1464     (sv_retrieve_t)retrieve_double,	/* SX_DOUBLE */
1465     (sv_retrieve_t)retrieve_byte,	/* SX_BYTE */
1466     (sv_retrieve_t)retrieve_netint,	/* SX_NETINT */
1467     (sv_retrieve_t)retrieve_scalar,	/* SX_SCALAR */
1468     (sv_retrieve_t)retrieve_tied_array,	/* SX_TIED_ARRAY */
1469     (sv_retrieve_t)retrieve_tied_hash,	/* SX_TIED_HASH */
1470     (sv_retrieve_t)retrieve_tied_scalar,/* SX_TIED_SCALAR */
1471     (sv_retrieve_t)retrieve_other,	/* SX_SV_UNDEF not supported */
1472     (sv_retrieve_t)retrieve_other,	/* SX_SV_YES not supported */
1473     (sv_retrieve_t)retrieve_other,	/* SX_SV_NO not supported */
1474     (sv_retrieve_t)retrieve_other,	/* SX_BLESS not supported */
1475     (sv_retrieve_t)retrieve_other,	/* SX_IX_BLESS not supported */
1476     (sv_retrieve_t)retrieve_other,	/* SX_HOOK not supported */
1477     (sv_retrieve_t)retrieve_other,	/* SX_OVERLOADED not supported */
1478     (sv_retrieve_t)retrieve_other,	/* SX_TIED_KEY not supported */
1479     (sv_retrieve_t)retrieve_other,	/* SX_TIED_IDX not supported */
1480     (sv_retrieve_t)retrieve_other,	/* SX_UTF8STR not supported */
1481     (sv_retrieve_t)retrieve_other,	/* SX_LUTF8STR not supported */
1482     (sv_retrieve_t)retrieve_other,	/* SX_FLAG_HASH not supported */
1483     (sv_retrieve_t)retrieve_other,	/* SX_CODE not supported */
1484     (sv_retrieve_t)retrieve_other,	/* SX_WEAKREF not supported */
1485     (sv_retrieve_t)retrieve_other,	/* SX_WEAKOVERLOAD not supported */
1486     (sv_retrieve_t)retrieve_other,	/* SX_VSTRING not supported */
1487     (sv_retrieve_t)retrieve_other,	/* SX_LVSTRING not supported */
1488     (sv_retrieve_t)retrieve_other,	/* SX_SVUNDEF_ELEM not supported */
1489     (sv_retrieve_t)retrieve_other,	/* SX_REGEXP */
1490     (sv_retrieve_t)retrieve_other,  	/* SX_LOBJECT not supported */
1491     (sv_retrieve_t)retrieve_other,  	/* SX_LAST */
1492 };
1493 
1494 static SV *retrieve_hook_common(pTHX_ stcxt_t *cxt, const char *cname, int large);
1495 
1496 static SV *retrieve_array(pTHX_ stcxt_t *cxt, const char *cname);
1497 static SV *retrieve_hash(pTHX_ stcxt_t *cxt, const char *cname);
1498 static SV *retrieve_sv_undef(pTHX_ stcxt_t *cxt, const char *cname);
1499 static SV *retrieve_sv_yes(pTHX_ stcxt_t *cxt, const char *cname);
1500 static SV *retrieve_sv_no(pTHX_ stcxt_t *cxt, const char *cname);
1501 static SV *retrieve_blessed(pTHX_ stcxt_t *cxt, const char *cname);
1502 static SV *retrieve_idx_blessed(pTHX_ stcxt_t *cxt, const char *cname);
1503 static SV *retrieve_hook(pTHX_ stcxt_t *cxt, const char *cname);
1504 static SV *retrieve_overloaded(pTHX_ stcxt_t *cxt, const char *cname);
1505 static SV *retrieve_tied_key(pTHX_ stcxt_t *cxt, const char *cname);
1506 static SV *retrieve_tied_idx(pTHX_ stcxt_t *cxt, const char *cname);
1507 static SV *retrieve_flag_hash(pTHX_ stcxt_t *cxt, const char *cname);
1508 static SV *retrieve_code(pTHX_ stcxt_t *cxt, const char *cname);
1509 static SV *retrieve_weakref(pTHX_ stcxt_t *cxt, const char *cname);
1510 static SV *retrieve_weakoverloaded(pTHX_ stcxt_t *cxt, const char *cname);
1511 static SV *retrieve_vstring(pTHX_ stcxt_t *cxt, const char *cname);
1512 static SV *retrieve_lvstring(pTHX_ stcxt_t *cxt, const char *cname);
1513 static SV *retrieve_svundef_elem(pTHX_ stcxt_t *cxt, const char *cname);
1514 
1515 static const sv_retrieve_t sv_retrieve[] = {
1516     0,					/* SX_OBJECT -- entry unused dynamically */
1517     (sv_retrieve_t)retrieve_lscalar,	/* SX_LSCALAR */
1518     (sv_retrieve_t)retrieve_array,	/* SX_ARRAY */
1519     (sv_retrieve_t)retrieve_hash,	/* SX_HASH */
1520     (sv_retrieve_t)retrieve_ref,	/* SX_REF */
1521     (sv_retrieve_t)retrieve_undef,	/* SX_UNDEF */
1522     (sv_retrieve_t)retrieve_integer,	/* SX_INTEGER */
1523     (sv_retrieve_t)retrieve_double,	/* SX_DOUBLE */
1524     (sv_retrieve_t)retrieve_byte,	/* SX_BYTE */
1525     (sv_retrieve_t)retrieve_netint,	/* SX_NETINT */
1526     (sv_retrieve_t)retrieve_scalar,	/* SX_SCALAR */
1527     (sv_retrieve_t)retrieve_tied_array,	/* SX_TIED_ARRAY */
1528     (sv_retrieve_t)retrieve_tied_hash,	/* SX_TIED_HASH */
1529     (sv_retrieve_t)retrieve_tied_scalar,/* SX_TIED_SCALAR */
1530     (sv_retrieve_t)retrieve_sv_undef,	/* SX_SV_UNDEF */
1531     (sv_retrieve_t)retrieve_sv_yes,	/* SX_SV_YES */
1532     (sv_retrieve_t)retrieve_sv_no,	/* SX_SV_NO */
1533     (sv_retrieve_t)retrieve_blessed,	/* SX_BLESS */
1534     (sv_retrieve_t)retrieve_idx_blessed,/* SX_IX_BLESS */
1535     (sv_retrieve_t)retrieve_hook,	/* SX_HOOK */
1536     (sv_retrieve_t)retrieve_overloaded,	/* SX_OVERLOAD */
1537     (sv_retrieve_t)retrieve_tied_key,	/* SX_TIED_KEY */
1538     (sv_retrieve_t)retrieve_tied_idx,	/* SX_TIED_IDX */
1539     (sv_retrieve_t)retrieve_utf8str,	/* SX_UTF8STR  */
1540     (sv_retrieve_t)retrieve_lutf8str,	/* SX_LUTF8STR */
1541     (sv_retrieve_t)retrieve_flag_hash,	/* SX_HASH */
1542     (sv_retrieve_t)retrieve_code,	/* SX_CODE */
1543     (sv_retrieve_t)retrieve_weakref,	/* SX_WEAKREF */
1544     (sv_retrieve_t)retrieve_weakoverloaded,/* SX_WEAKOVERLOAD */
1545     (sv_retrieve_t)retrieve_vstring,	/* SX_VSTRING */
1546     (sv_retrieve_t)retrieve_lvstring,	/* SX_LVSTRING */
1547     (sv_retrieve_t)retrieve_svundef_elem,/* SX_SVUNDEF_ELEM */
1548     (sv_retrieve_t)retrieve_regexp,	/* SX_REGEXP */
1549     (sv_retrieve_t)retrieve_lobject,	/* SX_LOBJECT */
1550     (sv_retrieve_t)retrieve_other,  	/* SX_LAST */
1551 };
1552 
1553 #define RETRIEVE(c,x) ((x) >= SX_LAST ? retrieve_other : *(c)->retrieve_vtbl[x])
1554 
1555 static SV *mbuf2sv(pTHX);
1556 
1557 /***
1558  *** Context management.
1559  ***/
1560 
1561 /*
1562  * init_perinterp
1563  *
1564  * Called once per "thread" (interpreter) to initialize some global context.
1565  */
init_perinterp(pTHX)1566 static void init_perinterp(pTHX)
1567 {
1568     INIT_STCXT;
1569     INIT_TRACEME;
1570     cxt->netorder = 0;		/* true if network order used */
1571     cxt->forgive_me = -1;	/* whether to be forgiving... */
1572     cxt->accept_future_minor = -1; /* would otherwise occur too late */
1573 }
1574 
1575 /*
1576  * reset_context
1577  *
1578  * Called at the end of every context cleaning, to perform common reset
1579  * operations.
1580  */
reset_context(stcxt_t * cxt)1581 static void reset_context(stcxt_t *cxt)
1582 {
1583     cxt->entry = 0;
1584     cxt->s_dirty = 0;
1585     cxt->recur_sv = NULL;
1586     cxt->recur_depth = 0;
1587     cxt->optype &= ~(ST_STORE|ST_RETRIEVE);	/* Leave ST_CLONE alone */
1588 }
1589 
1590 /*
1591  * init_store_context
1592  *
1593  * Initialize a new store context for real recursion.
1594  */
init_store_context(pTHX_ stcxt_t * cxt,PerlIO * f,int optype,int network_order)1595 static void init_store_context(pTHX_
1596 	stcxt_t *cxt,
1597         PerlIO *f,
1598         int optype,
1599         int network_order)
1600 {
1601     INIT_TRACEME;
1602 
1603     TRACEME(("init_store_context"));
1604 
1605     cxt->netorder = network_order;
1606     cxt->forgive_me = -1;		/* Fetched from perl if needed */
1607     cxt->deparse = -1;			/* Idem */
1608     cxt->eval = NULL;			/* Idem */
1609     cxt->canonical = -1;		/* Idem */
1610     cxt->tagnum = -1;			/* Reset tag numbers */
1611     cxt->classnum = -1;			/* Reset class numbers */
1612     cxt->fio = f;			/* Where I/O are performed */
1613     cxt->optype = optype;		/* A store, or a deep clone */
1614     cxt->entry = 1;			/* No recursion yet */
1615 
1616     /*
1617      * The 'hseen' table is used to keep track of each SV stored and their
1618      * associated tag numbers is special. It is "abused" because the
1619      * values stored are not real SV, just integers cast to (SV *),
1620      * which explains the freeing below.
1621      *
1622      * It is also one possible bottleneck to achieve good storing speed,
1623      * so the "shared keys" optimization is turned off (unlikely to be
1624      * of any use here), and the hash table is "pre-extended". Together,
1625      * those optimizations increase the throughput by 12%.
1626      */
1627 
1628 #ifdef USE_PTR_TABLE
1629     cxt->pseen = ptr_table_new();
1630     cxt->hseen = 0;
1631 #else
1632     cxt->hseen = newHV();	/* Table where seen objects are stored */
1633     HvSHAREKEYS_off(cxt->hseen);
1634 #endif
1635     /*
1636      * The following does not work well with perl5.004_04, and causes
1637      * a core dump later on, in a completely unrelated spot, which
1638      * makes me think there is a memory corruption going on.
1639      *
1640      * Calling hv_ksplit(hseen, HBUCKETS) instead of manually hacking
1641      * it below does not make any difference. It seems to work fine
1642      * with perl5.004_68 but given the probable nature of the bug,
1643      * that does not prove anything.
1644      *
1645      * It's a shame because increasing the amount of buckets raises
1646      * store() throughput by 5%, but until I figure this out, I can't
1647      * allow for this to go into production.
1648      *
1649      * It is reported fixed in 5.005, hence the #if.
1650      */
1651 #if PERL_VERSION_GE(5,5,0)
1652 #define HBUCKETS	4096		/* Buckets for %hseen */
1653 #ifndef USE_PTR_TABLE
1654     HvMAX(cxt->hseen) = HBUCKETS - 1;	/* keys %hseen = $HBUCKETS; */
1655 #endif
1656 #endif
1657 
1658     /*
1659      * The 'hclass' hash uses the same settings as 'hseen' above, but it is
1660      * used to assign sequential tags (numbers) to class names for blessed
1661      * objects.
1662      *
1663      * We turn the shared key optimization on.
1664      */
1665 
1666     cxt->hclass = newHV();		/* Where seen classnames are stored */
1667 
1668 #if PERL_VERSION_GE(5,5,0)
1669     HvMAX(cxt->hclass) = HBUCKETS - 1;	/* keys %hclass = $HBUCKETS; */
1670 #endif
1671 
1672     /*
1673      * The 'hook' hash table is used to keep track of the references on
1674      * the STORABLE_freeze hook routines, when found in some class name.
1675      *
1676      * It is assumed that the inheritance tree will not be changed during
1677      * storing, and that no new method will be dynamically created by the
1678      * hooks.
1679      */
1680 
1681     cxt->hook = newHV();		/* Table where hooks are cached */
1682 
1683     /*
1684      * The 'hook_seen' array keeps track of all the SVs returned by
1685      * STORABLE_freeze hooks for us to serialize, so that they are not
1686      * reclaimed until the end of the serialization process.  Each SV is
1687      * only stored once, the first time it is seen.
1688      */
1689 
1690     cxt->hook_seen = newAV(); /* Lists SVs returned by STORABLE_freeze */
1691 
1692     cxt->max_recur_depth = SvIV(get_sv("Storable::recursion_limit", GV_ADD));
1693     cxt->max_recur_depth_hash = SvIV(get_sv("Storable::recursion_limit_hash", GV_ADD));
1694 }
1695 
1696 /*
1697  * clean_store_context
1698  *
1699  * Clean store context by
1700  */
clean_store_context(pTHX_ stcxt_t * cxt)1701 static void clean_store_context(pTHX_ stcxt_t *cxt)
1702 {
1703     HE *he;
1704 
1705     TRACEMED(("clean_store_context"));
1706 
1707     ASSERT(cxt->optype & ST_STORE, ("was performing a store()"));
1708 
1709     /*
1710      * Insert real values into hashes where we stored faked pointers.
1711      */
1712 
1713 #ifndef USE_PTR_TABLE
1714     if (cxt->hseen) {
1715         hv_iterinit(cxt->hseen);
1716         while ((he = hv_iternext(cxt->hseen)))	/* Extra () for -Wall */
1717             HeVAL(he) = &PL_sv_undef;
1718     }
1719 #endif
1720 
1721     if (cxt->hclass) {
1722         hv_iterinit(cxt->hclass);
1723         while ((he = hv_iternext(cxt->hclass))) /* Extra () for -Wall */
1724             HeVAL(he) = &PL_sv_undef;
1725     }
1726 
1727     /*
1728      * And now dispose of them...
1729      *
1730      * The surrounding if() protection has been added because there might be
1731      * some cases where this routine is called more than once, during
1732      * exceptional events.  This was reported by Marc Lehmann when Storable
1733      * is executed from mod_perl, and the fix was suggested by him.
1734      * 		-- RAM, 20/12/2000
1735      */
1736 
1737 #ifdef USE_PTR_TABLE
1738     if (cxt->pseen) {
1739         struct ptr_tbl *pseen = cxt->pseen;
1740         cxt->pseen = 0;
1741         ptr_table_free(pseen);
1742     }
1743     assert(!cxt->hseen);
1744 #else
1745     if (cxt->hseen) {
1746         HV *hseen = cxt->hseen;
1747         cxt->hseen = 0;
1748         hv_undef(hseen);
1749         sv_free((SV *) hseen);
1750     }
1751 #endif
1752 
1753     if (cxt->hclass) {
1754         HV *hclass = cxt->hclass;
1755         cxt->hclass = 0;
1756         hv_undef(hclass);
1757         sv_free((SV *) hclass);
1758     }
1759 
1760     if (cxt->hook) {
1761         HV *hook = cxt->hook;
1762         cxt->hook = 0;
1763         hv_undef(hook);
1764         sv_free((SV *) hook);
1765     }
1766 
1767     if (cxt->hook_seen) {
1768         AV *hook_seen = cxt->hook_seen;
1769         cxt->hook_seen = 0;
1770         av_undef(hook_seen);
1771         sv_free((SV *) hook_seen);
1772     }
1773 
1774     cxt->forgive_me = -1;	/* Fetched from perl if needed */
1775     cxt->deparse = -1;		/* Idem */
1776     if (cxt->eval) {
1777         SvREFCNT_dec(cxt->eval);
1778     }
1779     cxt->eval = NULL;		/* Idem */
1780     cxt->canonical = -1;	/* Idem */
1781 
1782     reset_context(cxt);
1783 }
1784 
1785 /*
1786  * init_retrieve_context
1787  *
1788  * Initialize a new retrieve context for real recursion.
1789  */
init_retrieve_context(pTHX_ stcxt_t * cxt,int optype,int is_tainted)1790 static void init_retrieve_context(pTHX_
1791 	stcxt_t *cxt, int optype, int is_tainted)
1792 {
1793     INIT_TRACEME;
1794 
1795     TRACEME(("init_retrieve_context"));
1796 
1797     /*
1798      * The hook hash table is used to keep track of the references on
1799      * the STORABLE_thaw hook routines, when found in some class name.
1800      *
1801      * It is assumed that the inheritance tree will not be changed during
1802      * storing, and that no new method will be dynamically created by the
1803      * hooks.
1804      */
1805 
1806     cxt->hook  = newHV();			/* Caches STORABLE_thaw */
1807 
1808 #ifdef USE_PTR_TABLE
1809     cxt->pseen = 0;
1810 #endif
1811 
1812     /*
1813      * If retrieving an old binary version, the cxt->retrieve_vtbl variable
1814      * was set to sv_old_retrieve. We'll need a hash table to keep track of
1815      * the correspondence between the tags and the tag number used by the
1816      * new retrieve routines.
1817      */
1818 
1819     cxt->hseen = (((void*)cxt->retrieve_vtbl == (void*)sv_old_retrieve)
1820                   ? newHV() : 0);
1821 
1822     cxt->aseen = newAV();	/* Where retrieved objects are kept */
1823     cxt->where_is_undef = UNSET_NTAG_T;	/* Special case for PL_sv_undef */
1824     cxt->aclass = newAV();	/* Where seen classnames are kept */
1825     cxt->tagnum = 0;		/* Have to count objects... */
1826     cxt->classnum = 0;		/* ...and class names as well */
1827     cxt->optype = optype;
1828     cxt->s_tainted = is_tainted;
1829     cxt->entry = 1;		/* No recursion yet */
1830 #ifndef HAS_RESTRICTED_HASHES
1831     cxt->derestrict = -1;	/* Fetched from perl if needed */
1832 #endif
1833 #ifndef HAS_UTF8_ALL
1834     cxt->use_bytes = -1;	/* Fetched from perl if needed */
1835 #endif
1836     cxt->accept_future_minor = -1;/* Fetched from perl if needed */
1837     cxt->in_retrieve_overloaded = 0;
1838 
1839     cxt->max_recur_depth = SvIV(get_sv("Storable::recursion_limit", GV_ADD));
1840     cxt->max_recur_depth_hash = SvIV(get_sv("Storable::recursion_limit_hash", GV_ADD));
1841 }
1842 
1843 /*
1844  * clean_retrieve_context
1845  *
1846  * Clean retrieve context by
1847  */
clean_retrieve_context(pTHX_ stcxt_t * cxt)1848 static void clean_retrieve_context(pTHX_ stcxt_t *cxt)
1849 {
1850     TRACEMED(("clean_retrieve_context"));
1851 
1852     ASSERT(cxt->optype & ST_RETRIEVE, ("was performing a retrieve()"));
1853 
1854     if (cxt->aseen) {
1855         AV *aseen = cxt->aseen;
1856         cxt->aseen = 0;
1857         av_undef(aseen);
1858         sv_free((SV *) aseen);
1859     }
1860     cxt->where_is_undef = UNSET_NTAG_T;
1861 
1862     if (cxt->aclass) {
1863         AV *aclass = cxt->aclass;
1864         cxt->aclass = 0;
1865         av_undef(aclass);
1866         sv_free((SV *) aclass);
1867     }
1868 
1869     if (cxt->hook) {
1870         HV *hook = cxt->hook;
1871         cxt->hook = 0;
1872         hv_undef(hook);
1873         sv_free((SV *) hook);
1874     }
1875 
1876     if (cxt->hseen) {
1877         HV *hseen = cxt->hseen;
1878         cxt->hseen = 0;
1879         hv_undef(hseen);
1880         sv_free((SV *) hseen);	/* optional HV, for backward compat. */
1881     }
1882 
1883 #ifndef HAS_RESTRICTED_HASHES
1884     cxt->derestrict = -1;		/* Fetched from perl if needed */
1885 #endif
1886 #ifndef HAS_UTF8_ALL
1887     cxt->use_bytes = -1;		/* Fetched from perl if needed */
1888 #endif
1889     cxt->accept_future_minor = -1;	/* Fetched from perl if needed */
1890 
1891     cxt->in_retrieve_overloaded = 0;
1892     reset_context(cxt);
1893 }
1894 
1895 /*
1896  * clean_context
1897  *
1898  * A workaround for the CROAK bug: cleanup the last context.
1899  */
clean_context(pTHX_ stcxt_t * cxt)1900 static void clean_context(pTHX_ stcxt_t *cxt)
1901 {
1902     TRACEMED(("clean_context"));
1903 
1904     ASSERT(cxt->s_dirty, ("dirty context"));
1905 
1906     if (cxt->membuf_ro)
1907         MBUF_RESTORE();
1908 
1909     ASSERT(!cxt->membuf_ro, ("mbase is not read-only"));
1910 
1911     if (cxt->optype & ST_RETRIEVE)
1912         clean_retrieve_context(aTHX_ cxt);
1913     else if (cxt->optype & ST_STORE)
1914         clean_store_context(aTHX_ cxt);
1915     else
1916         reset_context(cxt);
1917 
1918     ASSERT(!cxt->s_dirty, ("context is clean"));
1919     ASSERT(cxt->entry == 0, ("context is reset"));
1920 }
1921 
1922 /*
1923  * allocate_context
1924  *
1925  * Allocate a new context and push it on top of the parent one.
1926  * This new context is made globally visible via SET_STCXT().
1927  */
allocate_context(pTHX_ stcxt_t * parent_cxt)1928 static stcxt_t *allocate_context(pTHX_ stcxt_t *parent_cxt)
1929 {
1930     stcxt_t *cxt;
1931 
1932     ASSERT(!parent_cxt->s_dirty, ("parent context clean"));
1933 
1934     NEW_STORABLE_CXT_OBJ(cxt);
1935     TRACEMED(("allocate_context"));
1936 
1937     cxt->prev = parent_cxt->my_sv;
1938     SET_STCXT(cxt);
1939 
1940     ASSERT(!cxt->s_dirty, ("clean context"));
1941 
1942     return cxt;
1943 }
1944 
1945 /*
1946  * free_context
1947  *
1948  * Free current context, which cannot be the "root" one.
1949  * Make the context underneath globally visible via SET_STCXT().
1950  */
free_context(pTHX_ stcxt_t * cxt)1951 static void free_context(pTHX_ stcxt_t *cxt)
1952 {
1953     stcxt_t *prev = (stcxt_t *)(cxt->prev ? SvPVX(SvRV(cxt->prev)) : 0);
1954 
1955     TRACEMED(("free_context"));
1956 
1957     ASSERT(!cxt->s_dirty, ("clean context"));
1958     ASSERT(prev, ("not freeing root context"));
1959     assert(prev);
1960 
1961     SvREFCNT_dec(cxt->my_sv);
1962     SET_STCXT(prev);
1963 
1964     ASSERT(cxt, ("context not void"));
1965 }
1966 
1967 /***
1968  *** Predicates.
1969  ***/
1970 
1971 /* these two functions are currently only used within asserts */
1972 #ifdef DASSERT
1973 /*
1974  * is_storing
1975  *
1976  * Tells whether we're in the middle of a store operation.
1977  */
is_storing(pTHX)1978 static int is_storing(pTHX)
1979 {
1980     dSTCXT;
1981 
1982     return cxt->entry && (cxt->optype & ST_STORE);
1983 }
1984 
1985 /*
1986  * is_retrieving
1987  *
1988  * Tells whether we're in the middle of a retrieve operation.
1989  */
is_retrieving(pTHX)1990 static int is_retrieving(pTHX)
1991 {
1992     dSTCXT;
1993 
1994     return cxt->entry && (cxt->optype & ST_RETRIEVE);
1995 }
1996 #endif
1997 
1998 /*
1999  * last_op_in_netorder
2000  *
2001  * Returns whether last operation was made using network order.
2002  *
2003  * This is typically out-of-band information that might prove useful
2004  * to people wishing to convert native to network order data when used.
2005  */
last_op_in_netorder(pTHX)2006 static int last_op_in_netorder(pTHX)
2007 {
2008     dSTCXT;
2009 
2010     assert(cxt);
2011     return cxt->netorder;
2012 }
2013 
2014 /***
2015  *** Hook lookup and calling routines.
2016  ***/
2017 
2018 /*
2019  * pkg_fetchmeth
2020  *
2021  * A wrapper on gv_fetchmethod_autoload() which caches results.
2022  *
2023  * Returns the routine reference as an SV*, or null if neither the package
2024  * nor its ancestors know about the method.
2025  */
pkg_fetchmeth(pTHX_ HV * cache,HV * pkg,const char * method)2026 static SV *pkg_fetchmeth(pTHX_
2027 	HV *cache,
2028 	HV *pkg,
2029 	const char *method)
2030 {
2031     GV *gv;
2032     SV *sv;
2033     const char *hvname = HvNAME_get(pkg);
2034 #ifdef DEBUGME
2035     dSTCXT;
2036 #endif
2037 
2038     /*
2039      * The following code is the same as the one performed by UNIVERSAL::can
2040      * in the Perl core.
2041      */
2042 
2043     gv = gv_fetchmethod_autoload(pkg, method, FALSE);
2044     if (gv && isGV(gv)) {
2045         sv = newRV_inc((SV*) GvCV(gv));
2046         TRACEME(("%s->%s: 0x%" UVxf, hvname, method, PTR2UV(sv)));
2047     } else {
2048         sv = newSVsv(&PL_sv_undef);
2049         TRACEME(("%s->%s: not found", hvname, method));
2050     }
2051 
2052     /*
2053      * Cache the result, ignoring failure: if we can't store the value,
2054      * it just won't be cached.
2055      */
2056 
2057     (void) hv_store(cache, hvname, strlen(hvname), sv, 0);
2058 
2059     return SvOK(sv) ? sv : (SV *) 0;
2060 }
2061 
2062 /*
2063  * pkg_hide
2064  *
2065  * Force cached value to be undef: hook ignored even if present.
2066  */
pkg_hide(pTHX_ HV * cache,HV * pkg,const char * method)2067 static void pkg_hide(pTHX_
2068 	HV *cache,
2069 	HV *pkg,
2070 	const char *method)
2071 {
2072     const char *hvname = HvNAME_get(pkg);
2073     PERL_UNUSED_ARG(method);
2074     (void) hv_store(cache,
2075                     hvname, strlen(hvname), newSVsv(&PL_sv_undef), 0);
2076 }
2077 
2078 /*
2079  * pkg_uncache
2080  *
2081  * Discard cached value: a whole fetch loop will be retried at next lookup.
2082  */
pkg_uncache(pTHX_ HV * cache,HV * pkg,const char * method)2083 static void pkg_uncache(pTHX_
2084 	HV *cache,
2085 	HV *pkg,
2086 	const char *method)
2087 {
2088     const char *hvname = HvNAME_get(pkg);
2089     PERL_UNUSED_ARG(method);
2090     (void) hv_delete(cache, hvname, strlen(hvname), G_DISCARD);
2091 }
2092 
2093 /*
2094  * pkg_can
2095  *
2096  * Our own "UNIVERSAL::can", which caches results.
2097  *
2098  * Returns the routine reference as an SV*, or null if the object does not
2099  * know about the method.
2100  */
pkg_can(pTHX_ HV * cache,HV * pkg,const char * method)2101 static SV *pkg_can(pTHX_
2102 	HV *cache,
2103 	HV *pkg,
2104 	const char *method)
2105 {
2106     SV **svh;
2107     SV *sv;
2108     const char *hvname = HvNAME_get(pkg);
2109 #ifdef DEBUGME
2110     dSTCXT;
2111 #endif
2112 
2113     TRACEME(("pkg_can for %s->%s", hvname, method));
2114 
2115     /*
2116      * Look into the cache to see whether we already have determined
2117      * where the routine was, if any.
2118      *
2119      * NOTA BENE: we don't use 'method' at all in our lookup, since we know
2120      * that only one hook (i.e. always the same) is cached in a given cache.
2121      */
2122 
2123     svh = hv_fetch(cache, hvname, strlen(hvname), FALSE);
2124     if (svh) {
2125         sv = *svh;
2126         if (!SvOK(sv)) {
2127             TRACEME(("cached %s->%s: not found", hvname, method));
2128             return (SV *) 0;
2129         } else {
2130             TRACEME(("cached %s->%s: 0x%" UVxf,
2131                      hvname, method, PTR2UV(sv)));
2132             return sv;
2133         }
2134     }
2135 
2136     TRACEME(("not cached yet"));
2137     return pkg_fetchmeth(aTHX_ cache, pkg, method);	/* Fetch and cache */
2138 }
2139 
2140 /*
2141  * scalar_call
2142  *
2143  * Call routine as obj->hook(av) in scalar context.
2144  * Propagates the single returned value if not called in void context.
2145  */
scalar_call(pTHX_ SV * obj,SV * hook,int cloning,AV * av,I32 flags)2146 static SV *scalar_call(pTHX_
2147 	SV *obj,
2148 	SV *hook,
2149 	int cloning,
2150 	AV *av,
2151 	I32 flags)
2152 {
2153     dSP;
2154     int count;
2155     SV *sv = 0;
2156 #ifdef DEBUGME
2157     dSTCXT;
2158 #endif
2159 
2160     TRACEME(("scalar_call (cloning=%d)", cloning));
2161 
2162     ENTER;
2163     SAVETMPS;
2164 
2165     PUSHMARK(sp);
2166     XPUSHs(obj);
2167     XPUSHs(sv_2mortal(newSViv(cloning)));		/* Cloning flag */
2168     if (av) {
2169         SV **ary = AvARRAY(av);
2170         SSize_t cnt = AvFILLp(av) + 1;
2171         SSize_t i;
2172         XPUSHs(ary[0]);					/* Frozen string */
2173         for (i = 1; i < cnt; i++) {
2174             TRACEME(("pushing arg #%d (0x%" UVxf ")...",
2175                      (int)i, PTR2UV(ary[i])));
2176             XPUSHs(sv_2mortal(newRV_inc(ary[i])));
2177         }
2178     }
2179     PUTBACK;
2180 
2181     TRACEME(("calling..."));
2182     count = call_sv(hook, flags);	/* Go back to Perl code */
2183     TRACEME(("count = %d", count));
2184 
2185     SPAGAIN;
2186 
2187     if (count) {
2188         sv = POPs;
2189         SvREFCNT_inc(sv); /* We're returning it, must stay alive! */
2190     }
2191 
2192     PUTBACK;
2193     FREETMPS;
2194     LEAVE;
2195 
2196     return sv;
2197 }
2198 
2199 /*
2200  * array_call
2201  *
2202  * Call routine obj->hook(cloning) in list context.
2203  * Returns the list of returned values in an array.
2204  */
array_call(pTHX_ SV * obj,SV * hook,int cloning)2205 static AV *array_call(pTHX_
2206 	SV *obj,
2207 	SV *hook,
2208 	int cloning)
2209 {
2210     dSP;
2211     int count;
2212     AV *av;
2213     int i;
2214 #ifdef DEBUGME
2215     dSTCXT;
2216 #endif
2217 
2218     TRACEME(("array_call (cloning=%d)", cloning));
2219 
2220     ENTER;
2221     SAVETMPS;
2222 
2223     PUSHMARK(sp);
2224     XPUSHs(obj);				/* Target object */
2225     XPUSHs(sv_2mortal(newSViv(cloning)));	/* Cloning flag */
2226     PUTBACK;
2227 
2228     count = call_sv(hook, G_ARRAY);	/* Go back to Perl code */
2229 
2230     SPAGAIN;
2231 
2232     av = newAV();
2233     for (i = count - 1; i >= 0; i--) {
2234         SV *sv = POPs;
2235         av_store(av, i, SvREFCNT_inc(sv));
2236     }
2237 
2238     PUTBACK;
2239     FREETMPS;
2240     LEAVE;
2241 
2242     return av;
2243 }
2244 
2245 #if PERL_VERSION_LT(5,15,0)
2246 static void
cleanup_recursive_av(pTHX_ AV * av)2247 cleanup_recursive_av(pTHX_ AV* av) {
2248     SSize_t i = AvFILLp(av);
2249     SV** arr = AvARRAY(av);
2250     if (SvMAGICAL(av)) return;
2251     while (i >= 0) {
2252         if (arr[i]) {
2253 #if PERL_VERSION_LT(5,14,0)
2254             arr[i] = NULL;
2255 #else
2256             SvREFCNT_dec(arr[i]);
2257 #endif
2258         }
2259         i--;
2260     }
2261 }
2262 
2263 #ifndef SvREFCNT_IMMORTAL
2264 #ifdef DEBUGGING
2265    /* exercise the immortal resurrection code in sv_free2() */
2266 #  define SvREFCNT_IMMORTAL 1000
2267 #else
2268 #  define SvREFCNT_IMMORTAL ((~(U32)0)/2)
2269 #endif
2270 #endif
2271 
2272 static void
cleanup_recursive_hv(pTHX_ HV * hv)2273 cleanup_recursive_hv(pTHX_ HV* hv) {
2274     SSize_t i = HvTOTALKEYS(hv);
2275     HE** arr = HvARRAY(hv);
2276     if (SvMAGICAL(hv)) return;
2277     while (i >= 0) {
2278         if (arr[i]) {
2279             SvREFCNT(HeVAL(arr[i])) = SvREFCNT_IMMORTAL;
2280             arr[i] = NULL; /* let it leak. too dangerous to clean it up here */
2281         }
2282         i--;
2283     }
2284 #if PERL_VERSION_LT(5,8,0)
2285     ((XPVHV*)SvANY(hv))->xhv_array = NULL;
2286 #else
2287     HvARRAY(hv) = NULL;
2288 #endif
2289     HvTOTALKEYS(hv) = 0;
2290 }
2291 static void
cleanup_recursive_rv(pTHX_ SV * sv)2292 cleanup_recursive_rv(pTHX_ SV* sv) {
2293     if (sv && SvROK(sv))
2294         SvREFCNT_dec(SvRV(sv));
2295 }
2296 static void
cleanup_recursive_data(pTHX_ SV * sv)2297 cleanup_recursive_data(pTHX_ SV* sv) {
2298     if (SvTYPE(sv) == SVt_PVAV) {
2299         cleanup_recursive_av(aTHX_ (AV*)sv);
2300     }
2301     else if (SvTYPE(sv) == SVt_PVHV) {
2302         cleanup_recursive_hv(aTHX_ (HV*)sv);
2303     }
2304     else {
2305         cleanup_recursive_rv(aTHX_ sv);
2306     }
2307 }
2308 #endif
2309 
2310 /*
2311  * known_class
2312  *
2313  * Lookup the class name in the 'hclass' table and either assign it a new ID
2314  * or return the existing one, by filling in 'classnum'.
2315  *
2316  * Return true if the class was known, false if the ID was just generated.
2317  */
known_class(pTHX_ stcxt_t * cxt,char * name,int len,I32 * classnum)2318 static int known_class(pTHX_
2319 	stcxt_t *cxt,
2320 	char *name,		/* Class name */
2321 	int len,		/* Name length */
2322 	I32 *classnum)
2323 {
2324     SV **svh;
2325     HV *hclass = cxt->hclass;
2326 
2327     TRACEME(("known_class (%s)", name));
2328 
2329     /*
2330      * Recall that we don't store pointers in this hash table, but tags.
2331      * Therefore, we need LOW_32BITS() to extract the relevant parts.
2332      */
2333 
2334     svh = hv_fetch(hclass, name, len, FALSE);
2335     if (svh) {
2336         *classnum = LOW_32BITS(*svh);
2337         return TRUE;
2338     }
2339 
2340     /*
2341      * Unknown classname, we need to record it.
2342      */
2343 
2344     cxt->classnum++;
2345     if (!hv_store(hclass, name, len, INT2PTR(SV*, cxt->classnum), 0))
2346         CROAK(("Unable to record new classname"));
2347 
2348     *classnum = cxt->classnum;
2349     return FALSE;
2350 }
2351 
2352 /***
2353  *** Specific store routines.
2354  ***/
2355 
2356 /*
2357  * store_ref
2358  *
2359  * Store a reference.
2360  * Layout is SX_REF <object> or SX_OVERLOAD <object>.
2361  */
store_ref(pTHX_ stcxt_t * cxt,SV * sv)2362 static int store_ref(pTHX_ stcxt_t *cxt, SV *sv)
2363 {
2364     int retval;
2365     int is_weak = 0;
2366     TRACEME(("store_ref (0x%" UVxf ")", PTR2UV(sv)));
2367 
2368     /*
2369      * Follow reference, and check if target is overloaded.
2370      */
2371 
2372 #ifdef SvWEAKREF
2373     if (SvWEAKREF(sv))
2374         is_weak = 1;
2375     TRACEME(("ref (0x%" UVxf ") is%s weak", PTR2UV(sv),
2376              is_weak ? "" : "n't"));
2377 #endif
2378     sv = SvRV(sv);
2379 
2380     if (SvOBJECT(sv)) {
2381         HV *stash = (HV *) SvSTASH(sv);
2382         if (stash && Gv_AMG(stash)) {
2383             TRACEME(("ref (0x%" UVxf ") is overloaded", PTR2UV(sv)));
2384             PUTMARK(is_weak ? SX_WEAKOVERLOAD : SX_OVERLOAD);
2385         } else
2386             PUTMARK(is_weak ? SX_WEAKREF : SX_REF);
2387     } else
2388         PUTMARK(is_weak ? SX_WEAKREF : SX_REF);
2389 
2390     cxt->recur_sv = sv;
2391 
2392     TRACEME((">ref recur_depth %" IVdf ", recur_sv (0x%" UVxf ") max %" IVdf, cxt->recur_depth,
2393              PTR2UV(cxt->recur_sv), cxt->max_recur_depth));
2394     if (RECURSION_TOO_DEEP()) {
2395 #if PERL_VERSION_LT(5,15,0)
2396         cleanup_recursive_data(aTHX_ (SV*)sv);
2397 #endif
2398         CROAK((MAX_DEPTH_ERROR));
2399     }
2400 
2401     retval = store(aTHX_ cxt, sv);
2402     if (cxt->max_recur_depth != -1 && cxt->recur_depth > 0) {
2403         TRACEME(("<ref recur_depth --%" IVdf, cxt->recur_depth));
2404         --cxt->recur_depth;
2405     }
2406     return retval;
2407 }
2408 
2409 /*
2410  * store_scalar
2411  *
2412  * Store a scalar.
2413  *
2414  * Layout is SX_LSCALAR <length> <data>, SX_SCALAR <length> <data> or SX_UNDEF.
2415  * SX_LUTF8STR and SX_UTF8STR are used for UTF-8 strings.
2416  * The <data> section is omitted if <length> is 0.
2417  *
2418  * For vstrings, the vstring portion is stored first with
2419  * SX_LVSTRING <length> <data> or SX_VSTRING <length> <data>, followed by
2420  * SX_(L)SCALAR or SX_(L)UTF8STR with the actual PV.
2421  *
2422  * If integer or double, the layout is SX_INTEGER <data> or SX_DOUBLE <data>.
2423  * Small integers (within [-127, +127]) are stored as SX_BYTE <byte>.
2424  *
2425  * For huge strings use SX_LOBJECT SX_type SX_U64 <type> <data>
2426  */
store_scalar(pTHX_ stcxt_t * cxt,SV * sv)2427 static int store_scalar(pTHX_ stcxt_t *cxt, SV *sv)
2428 {
2429     IV iv;
2430     char *pv;
2431     STRLEN len;
2432     U32 flags = SvFLAGS(sv);	/* "cc -O" may put it in register */
2433 
2434     TRACEME(("store_scalar (0x%" UVxf ")", PTR2UV(sv)));
2435 
2436     /*
2437      * For efficiency, break the SV encapsulation by peaking at the flags
2438      * directly without using the Perl macros to avoid dereferencing
2439      * sv->sv_flags each time we wish to check the flags.
2440      */
2441 
2442     if (!(flags & SVf_OK)) {			/* !SvOK(sv) */
2443         if (sv == &PL_sv_undef) {
2444             TRACEME(("immortal undef"));
2445             PUTMARK(SX_SV_UNDEF);
2446         } else {
2447             TRACEME(("undef at 0x%" UVxf, PTR2UV(sv)));
2448             PUTMARK(SX_UNDEF);
2449         }
2450         return 0;
2451     }
2452 
2453     /*
2454      * Always store the string representation of a scalar if it exists.
2455      * Gisle Aas provided me with this test case, better than a long speach:
2456      *
2457      *  perl -MDevel::Peek -le '$a="abc"; $a+0; Dump($a)'
2458      *  SV = PVNV(0x80c8520)
2459      *       REFCNT = 1
2460      *       FLAGS = (NOK,POK,pNOK,pPOK)
2461      *       IV = 0
2462      *       NV = 0
2463      *       PV = 0x80c83d0 "abc"\0
2464      *       CUR = 3
2465      *       LEN = 4
2466      *
2467      * Write SX_SCALAR, length, followed by the actual data.
2468      *
2469      * Otherwise, write an SX_BYTE, SX_INTEGER or an SX_DOUBLE as
2470      * appropriate, followed by the actual (binary) data. A double
2471      * is written as a string if network order, for portability.
2472      *
2473      * NOTE: instead of using SvNOK(sv), we test for SvNOKp(sv).
2474      * The reason is that when the scalar value is tainted, the SvNOK(sv)
2475      * value is false.
2476      *
2477      * The test for a read-only scalar with both POK and NOK set is meant
2478      * to quickly detect &PL_sv_yes and &PL_sv_no without having to pay the
2479      * address comparison for each scalar we store.
2480      */
2481 
2482 #define SV_MAYBE_IMMORTAL (SVf_READONLY|SVf_POK|SVf_NOK)
2483 
2484     if ((flags & SV_MAYBE_IMMORTAL) == SV_MAYBE_IMMORTAL) {
2485         if (sv == &PL_sv_yes) {
2486             TRACEME(("immortal yes"));
2487             PUTMARK(SX_SV_YES);
2488         } else if (sv == &PL_sv_no) {
2489             TRACEME(("immortal no"));
2490             PUTMARK(SX_SV_NO);
2491         } else {
2492             pv = SvPV(sv, len);		/* We know it's SvPOK */
2493             goto string;			/* Share code below */
2494         }
2495     } else if (flags & SVf_POK) {
2496         /* public string - go direct to string read.  */
2497         goto string_readlen;
2498     } else if (
2499 #if PERL_VERSION_LT(5,7,0)
2500                /* For 5.6 and earlier NV flag trumps IV flag, so only use integer
2501                   direct if NV flag is off.  */
2502                (flags & (SVf_NOK | SVf_IOK)) == SVf_IOK
2503 #else
2504                /* 5.7 rules are that if IV public flag is set, IV value is as
2505                   good, if not better, than NV value.  */
2506                flags & SVf_IOK
2507 #endif
2508                ) {
2509         iv = SvIV(sv);
2510         /*
2511          * Will come here from below with iv set if double is an integer.
2512          */
2513     integer:
2514 
2515         /* Sorry. This isn't in 5.005_56 (IIRC) or earlier.  */
2516 #ifdef SVf_IVisUV
2517         /* Need to do this out here, else 0xFFFFFFFF becomes iv of -1
2518          * (for example) and that ends up in the optimised small integer
2519          * case.
2520          */
2521         if ((flags & SVf_IVisUV) && SvUV(sv) > IV_MAX) {
2522             TRACEME(("large unsigned integer as string, value = %" UVuf,
2523                      SvUV(sv)));
2524             goto string_readlen;
2525         }
2526 #endif
2527         /*
2528          * Optimize small integers into a single byte, otherwise store as
2529          * a real integer (converted into network order if they asked).
2530          */
2531 
2532         if (iv >= -128 && iv <= 127) {
2533             unsigned char siv = (unsigned char) (iv + 128); /* [0,255] */
2534             PUTMARK(SX_BYTE);
2535             PUTMARK(siv);
2536             TRACEME(("small integer stored as %d", (int)siv));
2537         } else if (cxt->netorder) {
2538 #ifndef HAS_HTONL
2539             TRACEME(("no htonl, fall back to string for integer"));
2540             goto string_readlen;
2541 #else
2542             I32 niv;
2543 
2544 
2545 #if IVSIZE > 4
2546             if (
2547 #ifdef SVf_IVisUV
2548                 /* Sorry. This isn't in 5.005_56 (IIRC) or earlier.  */
2549                 ((flags & SVf_IVisUV) && SvUV(sv) > (UV)0x7FFFFFFF) ||
2550 #endif
2551                 (iv > (IV)0x7FFFFFFF) || (iv < -(IV)0x80000000)) {
2552                 /* Bigger than 32 bits.  */
2553                 TRACEME(("large network order integer as string, value = %" IVdf, iv));
2554                 goto string_readlen;
2555             }
2556 #endif
2557 
2558             niv = (I32) htonl((I32) iv);
2559             TRACEME(("using network order"));
2560             PUTMARK(SX_NETINT);
2561             WRITE_I32(niv);
2562 #endif
2563         } else {
2564             PUTMARK(SX_INTEGER);
2565             WRITE(&iv, sizeof(iv));
2566         }
2567 
2568         TRACEME(("ok (integer 0x%" UVxf ", value = %" IVdf ")", PTR2UV(sv), iv));
2569     } else if (flags & SVf_NOK) {
2570         NV_bytes nv;
2571 #ifdef NV_CLEAR
2572         /* if we can't tell if there's padding, clear the whole NV and hope the
2573            compiler leaves the padding alone
2574         */
2575         Zero(&nv, 1, NV_bytes);
2576 #endif
2577 #if PERL_VERSION_LT(5,7,0)
2578         nv.nv = SvNV(sv);
2579         /*
2580          * Watch for number being an integer in disguise.
2581          */
2582         if (nv.nv == (NV) (iv = I_V(nv.nv))) {
2583             TRACEME(("double %" NVff " is actually integer %" IVdf, nv, iv));
2584             goto integer;		/* Share code above */
2585         }
2586 #else
2587 
2588         SvIV_please(sv);
2589         if (SvIOK_notUV(sv)) {
2590             iv = SvIV(sv);
2591             goto integer;		/* Share code above */
2592         }
2593         nv.nv = SvNV(sv);
2594 #endif
2595 
2596         if (cxt->netorder) {
2597             TRACEME(("double %" NVff " stored as string", nv.nv));
2598             goto string_readlen;		/* Share code below */
2599         }
2600 #if NV_PADDING
2601         Zero(nv.bytes + NVSIZE - NV_PADDING, NV_PADDING, char);
2602 #endif
2603 
2604         PUTMARK(SX_DOUBLE);
2605         WRITE(&nv, sizeof(nv));
2606 
2607         TRACEME(("ok (double 0x%" UVxf ", value = %" NVff ")", PTR2UV(sv), nv.nv));
2608 
2609     } else if (flags & (SVp_POK | SVp_NOK | SVp_IOK)) {
2610 #ifdef SvVOK
2611         MAGIC *mg;
2612 #endif
2613         UV wlen; /* For 64-bit machines */
2614 
2615     string_readlen:
2616         pv = SvPV(sv, len);
2617 
2618         /*
2619          * Will come here from above  if it was readonly, POK and NOK but
2620          * neither &PL_sv_yes nor &PL_sv_no.
2621          */
2622     string:
2623 
2624 #ifdef SvVOK
2625         if (SvMAGICAL(sv) && (mg = mg_find(sv, 'V'))) {
2626             /* The macro passes this by address, not value, and a lot of
2627                called code assumes that it's 32 bits without checking.  */
2628             const SSize_t len = mg->mg_len;
2629             /* we no longer accept vstrings over I32_SIZE-1, so don't emit
2630                them, also, older Storables handle them badly.
2631             */
2632             if (len >= I32_MAX) {
2633                 CROAK(("vstring too large to freeze"));
2634             }
2635             STORE_PV_LEN((const char *)mg->mg_ptr,
2636                          len, SX_VSTRING, SX_LVSTRING);
2637         }
2638 #endif
2639 
2640         wlen = (Size_t)len;
2641         if (SvUTF8 (sv))
2642             STORE_UTF8STR(pv, wlen);
2643         else
2644             STORE_SCALAR(pv, wlen);
2645         TRACEME(("ok (scalar 0x%" UVxf " '%s', length = %" UVuf ")",
2646                  PTR2UV(sv), len >= 2048 ? "<string too long>" : SvPVX(sv),
2647                  (UV)len));
2648     } else {
2649         CROAK(("Can't determine type of %s(0x%" UVxf ")",
2650                sv_reftype(sv, FALSE),
2651                PTR2UV(sv)));
2652     }
2653     return 0;		/* Ok, no recursion on scalars */
2654 }
2655 
2656 /*
2657  * store_array
2658  *
2659  * Store an array.
2660  *
2661  * Layout is SX_ARRAY <size> followed by each item, in increasing index order.
2662  * Each item is stored as <object>.
2663  */
store_array(pTHX_ stcxt_t * cxt,AV * av)2664 static int store_array(pTHX_ stcxt_t *cxt, AV *av)
2665 {
2666     SV **sav;
2667     UV len = av_len(av) + 1;
2668     UV i;
2669     int ret;
2670     SV *const recur_sv = cxt->recur_sv;
2671 
2672     TRACEME(("store_array (0x%" UVxf ")", PTR2UV(av)));
2673 
2674 #ifdef HAS_U64
2675     if (len > 0x7fffffffu) {
2676         /*
2677          * Large array by emitting SX_LOBJECT 1 U64 data
2678          */
2679         PUTMARK(SX_LOBJECT);
2680         PUTMARK(SX_ARRAY);
2681         W64LEN(len);
2682         TRACEME(("lobject size = %lu", (unsigned long)len));
2683     } else
2684 #endif
2685     {
2686         /*
2687          * Normal array by emitting SX_ARRAY, followed by the array length.
2688          */
2689         I32 l = (I32)len;
2690         PUTMARK(SX_ARRAY);
2691         WLEN(l);
2692         TRACEME(("size = %d", (int)l));
2693     }
2694 
2695     TRACEME((">array recur_depth %" IVdf ", recur_sv (0x%" UVxf ") max %" IVdf, cxt->recur_depth,
2696              PTR2UV(cxt->recur_sv), cxt->max_recur_depth));
2697     if (recur_sv != (SV*)av) {
2698         if (RECURSION_TOO_DEEP()) {
2699             /* with <= 5.14 it recurses in the cleanup also, needing 2x stack size */
2700 #if PERL_VERSION_LT(5,15,0)
2701             cleanup_recursive_data(aTHX_ (SV*)av);
2702 #endif
2703             CROAK((MAX_DEPTH_ERROR));
2704         }
2705     }
2706 
2707     /*
2708      * Now store each item recursively.
2709      */
2710 
2711     for (i = 0; i < len; i++) {
2712         sav = av_fetch(av, i, 0);
2713         if (!sav) {
2714             TRACEME(("(#%d) nonexistent item", (int)i));
2715             STORE_SV_UNDEF();
2716             continue;
2717         }
2718 #if PERL_VERSION_GE(5,19,0)
2719         /* In 5.19.3 and up, &PL_sv_undef can actually be stored in
2720          * an array; it no longer represents nonexistent elements.
2721          * Historically, we have used SX_SV_UNDEF in arrays for
2722          * nonexistent elements, so we use SX_SVUNDEF_ELEM for
2723          * &PL_sv_undef itself. */
2724         if (*sav == &PL_sv_undef) {
2725             TRACEME(("(#%d) undef item", (int)i));
2726             cxt->tagnum++;
2727             PUTMARK(SX_SVUNDEF_ELEM);
2728             continue;
2729         }
2730 #endif
2731         TRACEME(("(#%d) item", (int)i));
2732         if ((ret = store(aTHX_ cxt, *sav)))	/* Extra () for -Wall */
2733             return ret;
2734     }
2735 
2736     if (recur_sv != (SV*)av) {
2737         assert(cxt->max_recur_depth == -1 || cxt->recur_depth > 0);
2738         if (cxt->max_recur_depth != -1 && cxt->recur_depth > 0) {
2739             TRACEME(("<array recur_depth --%" IVdf, cxt->recur_depth));
2740             --cxt->recur_depth;
2741         }
2742     }
2743     TRACEME(("ok (array)"));
2744 
2745     return 0;
2746 }
2747 
2748 
2749 #if PERL_VERSION_LT(5,7,0)
2750 
2751 /*
2752  * sortcmp
2753  *
2754  * Sort two SVs
2755  * Borrowed from perl source file pp_ctl.c, where it is used by pp_sort.
2756  */
2757 static int
sortcmp(const void * a,const void * b)2758 sortcmp(const void *a, const void *b)
2759 {
2760 #if defined(USE_ITHREADS)
2761     dTHX;
2762 #endif /* USE_ITHREADS */
2763     return sv_cmp(*(SV * const *) a, *(SV * const *) b);
2764 }
2765 
2766 #endif /* PERL_VERSION_LT(5,7,0) */
2767 
2768 /*
2769  * store_hash
2770  *
2771  * Store a hash table.
2772  *
2773  * For a "normal" hash (not restricted, no utf8 keys):
2774  *
2775  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
2776  * Values are stored as <object>.
2777  * Keys are stored as <length> <data>, the <data> section being omitted
2778  * if length is 0.
2779  *
2780  * For a "fancy" hash (restricted or utf8 keys):
2781  *
2782  * Layout is SX_FLAG_HASH <size> <hash flags> followed by each key/value pair,
2783  * in random order.
2784  * Values are stored as <object>.
2785  * Keys are stored as <flags> <length> <data>, the <data> section being omitted
2786  * if length is 0.
2787  * Currently the only hash flag is "restricted"
2788  * Key flags are as for hv.h
2789  */
store_hash(pTHX_ stcxt_t * cxt,HV * hv)2790 static int store_hash(pTHX_ stcxt_t *cxt, HV *hv)
2791 {
2792     dVAR;
2793     UV len = (UV)HvTOTALKEYS(hv);
2794     Size_t i;
2795     int ret = 0;
2796     I32 riter;
2797     HE *eiter;
2798     int flagged_hash = ((SvREADONLY(hv)
2799 #ifdef HAS_HASH_KEY_FLAGS
2800                          || HvHASKFLAGS(hv)
2801 #endif
2802                          ) ? 1 : 0);
2803     unsigned char hash_flags = (SvREADONLY(hv) ? SHV_RESTRICTED : 0);
2804     SV * const recur_sv = cxt->recur_sv;
2805 
2806     /*
2807      * Signal hash by emitting SX_HASH, followed by the table length.
2808      * Max number of keys per perl version:
2809      *    IV            - 5.12
2810      *    STRLEN  5.14  - 5.24   (size_t: U32/U64)
2811      *    SSize_t 5.22c - 5.24c  (I32/I64)
2812      *    U32     5.25c -
2813      */
2814 
2815     if (len > 0x7fffffffu) { /* keys > I32_MAX */
2816         /*
2817          * Large hash: SX_LOBJECT type hashflags? U64 data
2818          *
2819          * Stupid limitation:
2820          * Note that perl5 can store more than 2G keys, but only iterate
2821          * over 2G max. (cperl can)
2822          * We need to manually iterate over it then, unsorted.
2823          * But until perl itself cannot do that, skip that.
2824          */
2825         TRACEME(("lobject size = %lu", (unsigned long)len));
2826 #ifdef HAS_U64
2827         PUTMARK(SX_LOBJECT);
2828         if (flagged_hash) {
2829             PUTMARK(SX_FLAG_HASH);
2830             PUTMARK(hash_flags);
2831         } else {
2832             PUTMARK(SX_HASH);
2833         }
2834         W64LEN(len);
2835         return store_lhash(aTHX_ cxt, hv, hash_flags);
2836 #else
2837         /* <5.12 you could store larger hashes, but cannot iterate over them.
2838            So we reject them, it's a bug. */
2839         CROAK(("Cannot store large objects on a 32bit system"));
2840 #endif
2841     } else {
2842         I32 l = (I32)len;
2843         if (flagged_hash) {
2844             TRACEME(("store_hash (0x%" UVxf ") (flags %x)", PTR2UV(hv),
2845                      (unsigned int)hash_flags));
2846             PUTMARK(SX_FLAG_HASH);
2847             PUTMARK(hash_flags);
2848         } else {
2849             TRACEME(("store_hash (0x%" UVxf ")", PTR2UV(hv)));
2850             PUTMARK(SX_HASH);
2851         }
2852         WLEN(l);
2853         TRACEME(("size = %d, used = %d", (int)l, (int)HvUSEDKEYS(hv)));
2854     }
2855 
2856     TRACEME((">hash recur_depth %" IVdf ", recur_sv (0x%" UVxf ") max %" IVdf, cxt->recur_depth,
2857              PTR2UV(cxt->recur_sv), cxt->max_recur_depth_hash));
2858     if (recur_sv != (SV*)hv && cxt->max_recur_depth_hash != -1) {
2859         ++cxt->recur_depth;
2860     }
2861     if (RECURSION_TOO_DEEP_HASH()) {
2862 #if PERL_VERSION_LT(5,15,0)
2863         cleanup_recursive_data(aTHX_ (SV*)hv);
2864 #endif
2865         CROAK((MAX_DEPTH_ERROR));
2866     }
2867 
2868     /*
2869      * Save possible iteration state via each() on that table.
2870      *
2871      * Note that perl as of 5.24 *can* store more than 2G keys, but *not*
2872      * iterate over it.
2873      * Lengths of hash keys are also limited to I32, which is good.
2874      */
2875 
2876     riter = HvRITER_get(hv);
2877     eiter = HvEITER_get(hv);
2878     hv_iterinit(hv);
2879 
2880     /*
2881      * Now store each item recursively.
2882      *
2883      * If canonical is defined to some true value then store each
2884      * key/value pair in sorted order otherwise the order is random.
2885      * Canonical order is irrelevant when a deep clone operation is performed.
2886      *
2887      * Fetch the value from perl only once per store() operation, and only
2888      * when needed.
2889      */
2890 
2891     if (
2892         !(cxt->optype & ST_CLONE)
2893         && (cxt->canonical == 1
2894             || (cxt->canonical < 0
2895                 && (cxt->canonical =
2896                     (SvTRUE(get_sv("Storable::canonical", GV_ADD))
2897                      ? 1 : 0))))
2898 	) {
2899         /*
2900          * Storing in order, sorted by key.
2901          * Run through the hash, building up an array of keys in a
2902          * mortal array, sort the array and then run through the
2903          * array.
2904          */
2905         AV *av = newAV();
2906         av_extend (av, len);
2907 
2908         TRACEME(("using canonical order"));
2909 
2910         for (i = 0; i < len; i++) {
2911 #ifdef HAS_RESTRICTED_HASHES
2912             HE *he = hv_iternext_flags(hv, HV_ITERNEXT_WANTPLACEHOLDERS);
2913 #else
2914             HE *he = hv_iternext(hv);
2915 #endif
2916             av_store(av, i, hv_iterkeysv(he));
2917         }
2918 
2919         STORE_HASH_SORT;
2920 
2921         for (i = 0; i < len; i++) {
2922 #ifdef HAS_RESTRICTED_HASHES
2923             int placeholders = (int)HvPLACEHOLDERS_get(hv);
2924 #endif
2925             unsigned char flags = 0;
2926             char *keyval;
2927             STRLEN keylen_tmp;
2928             I32 keylen;
2929             SV *key = av_shift(av);
2930             /* This will fail if key is a placeholder.
2931                Track how many placeholders we have, and error if we
2932                "see" too many.  */
2933             HE *he  = hv_fetch_ent(hv, key, 0, 0);
2934             SV *val;
2935 
2936             if (he) {
2937                 if (!(val =  HeVAL(he))) {
2938                     /* Internal error, not I/O error */
2939                     return 1;
2940                 }
2941             } else {
2942 #ifdef HAS_RESTRICTED_HASHES
2943                 /* Should be a placeholder.  */
2944                 if (placeholders-- < 0) {
2945                     /* This should not happen - number of
2946                        retrieves should be identical to
2947                        number of placeholders.  */
2948                     return 1;
2949                 }
2950                 /* Value is never needed, and PL_sv_undef is
2951                    more space efficient to store.  */
2952                 val = &PL_sv_undef;
2953                 ASSERT (flags == 0,
2954                         ("Flags not 0 but %d", (int)flags));
2955                 flags = SHV_K_PLACEHOLDER;
2956 #else
2957                 return 1;
2958 #endif
2959             }
2960 
2961             /*
2962              * Store value first.
2963              */
2964 
2965             TRACEME(("(#%d) value 0x%" UVxf, (int)i, PTR2UV(val)));
2966 
2967             if ((ret = store(aTHX_ cxt, val)))	/* Extra () for -Wall, grr... */
2968                 goto out;
2969 
2970             /*
2971              * Write key string.
2972              * Keys are written after values to make sure retrieval
2973              * can be optimal in terms of memory usage, where keys are
2974              * read into a fixed unique buffer called kbuf.
2975              * See retrieve_hash() for details.
2976              */
2977 
2978             /* Implementation of restricted hashes isn't nicely
2979                abstracted:  */
2980             if ((hash_flags & SHV_RESTRICTED)
2981                 && SvTRULYREADONLY(val)) {
2982                 flags |= SHV_K_LOCKED;
2983             }
2984 
2985             keyval = SvPV(key, keylen_tmp);
2986             keylen = keylen_tmp;
2987 #ifdef HAS_UTF8_HASHES
2988             /* If you build without optimisation on pre 5.6
2989                then nothing spots that SvUTF8(key) is always 0,
2990                so the block isn't optimised away, at which point
2991                the linker dislikes the reference to
2992                bytes_from_utf8.  */
2993             if (SvUTF8(key)) {
2994                 const char *keysave = keyval;
2995                 bool is_utf8 = TRUE;
2996 
2997                 /* Just casting the &klen to (STRLEN) won't work
2998                    well if STRLEN and I32 are of different widths.
2999                    --jhi */
3000                 keyval = (char*)bytes_from_utf8((U8*)keyval,
3001                                                 &keylen_tmp,
3002                                                 &is_utf8);
3003 
3004                 /* If we were able to downgrade here, then than
3005                    means that we have  a key which only had chars
3006                    0-255, but was utf8 encoded.  */
3007 
3008                 if (keyval != keysave) {
3009                     keylen = keylen_tmp;
3010                     flags |= SHV_K_WASUTF8;
3011                 } else {
3012                     /* keylen_tmp can't have changed, so no need
3013                        to assign back to keylen.  */
3014                     flags |= SHV_K_UTF8;
3015                 }
3016             }
3017 #endif
3018 
3019             if (flagged_hash) {
3020                 PUTMARK(flags);
3021                 TRACEME(("(#%d) key '%s' flags %x %u", (int)i, keyval, flags, *keyval));
3022             } else {
3023                 /* This is a workaround for a bug in 5.8.0
3024                    that causes the HEK_WASUTF8 flag to be
3025                    set on an HEK without the hash being
3026                    marked as having key flags. We just
3027                    cross our fingers and drop the flag.
3028                    AMS 20030901 */
3029                 assert (flags == 0 || flags == SHV_K_WASUTF8);
3030                 TRACEME(("(#%d) key '%s'", (int)i, keyval));
3031             }
3032             WLEN(keylen);
3033             if (keylen)
3034                 WRITE(keyval, keylen);
3035             if (flags & SHV_K_WASUTF8)
3036                 Safefree (keyval);
3037         }
3038 
3039         /*
3040          * Free up the temporary array
3041          */
3042 
3043         av_undef(av);
3044         sv_free((SV *) av);
3045 
3046     } else {
3047 
3048         /*
3049          * Storing in "random" order (in the order the keys are stored
3050          * within the hash).  This is the default and will be faster!
3051          */
3052 
3053         for (i = 0; i < len; i++) {
3054 #ifdef HV_ITERNEXT_WANTPLACEHOLDERS
3055             HE *he = hv_iternext_flags(hv, HV_ITERNEXT_WANTPLACEHOLDERS);
3056 #else
3057             HE *he = hv_iternext(hv);
3058 #endif
3059             SV *val = (he ? hv_iterval(hv, he) : 0);
3060 
3061             if (val == 0)
3062                 return 1;		/* Internal error, not I/O error */
3063 
3064             if ((ret = store_hentry(aTHX_ cxt, hv, i, he, hash_flags)))
3065                 goto out;
3066 #if 0
3067             /* Implementation of restricted hashes isn't nicely
3068                abstracted:  */
3069             flags = (((hash_flags & SHV_RESTRICTED)
3070                       && SvTRULYREADONLY(val))
3071                      ? SHV_K_LOCKED : 0);
3072 
3073             if (val == &PL_sv_placeholder) {
3074                 flags |= SHV_K_PLACEHOLDER;
3075                 val = &PL_sv_undef;
3076             }
3077 
3078             /*
3079              * Store value first.
3080              */
3081 
3082             TRACEME(("(#%d) value 0x%" UVxf, (int)i, PTR2UV(val)));
3083 
3084             if ((ret = store(aTHX_ cxt, val)))	/* Extra () for -Wall */
3085                 goto out;
3086 
3087 
3088             hek = HeKEY_hek(he);
3089             len = HEK_LEN(hek);
3090             if (len == HEf_SVKEY) {
3091                 /* This is somewhat sick, but the internal APIs are
3092                  * such that XS code could put one of these in
3093                  * a regular hash.
3094                  * Maybe we should be capable of storing one if
3095                  * found.
3096                  */
3097                 key_sv = HeKEY_sv(he);
3098                 flags |= SHV_K_ISSV;
3099             } else {
3100                 /* Regular string key. */
3101 #ifdef HAS_HASH_KEY_FLAGS
3102                 if (HEK_UTF8(hek))
3103                     flags |= SHV_K_UTF8;
3104                 if (HEK_WASUTF8(hek))
3105                     flags |= SHV_K_WASUTF8;
3106 #endif
3107                 key = HEK_KEY(hek);
3108             }
3109             /*
3110              * Write key string.
3111              * Keys are written after values to make sure retrieval
3112              * can be optimal in terms of memory usage, where keys are
3113              * read into a fixed unique buffer called kbuf.
3114              * See retrieve_hash() for details.
3115              */
3116 
3117             if (flagged_hash) {
3118                 PUTMARK(flags);
3119                 TRACEME(("(#%d) key '%s' flags %x", (int)i, key, flags));
3120             } else {
3121                 /* This is a workaround for a bug in 5.8.0
3122                    that causes the HEK_WASUTF8 flag to be
3123                    set on an HEK without the hash being
3124                    marked as having key flags. We just
3125                    cross our fingers and drop the flag.
3126                    AMS 20030901 */
3127                 assert (flags == 0 || flags == SHV_K_WASUTF8);
3128                 TRACEME(("(#%d) key '%s'", (int)i, key));
3129             }
3130             if (flags & SHV_K_ISSV) {
3131                 int ret;
3132                 if ((ret = store(aTHX_ cxt, key_sv)))
3133                     goto out;
3134             } else {
3135                 WLEN(len);
3136                 if (len)
3137                     WRITE(key, len);
3138             }
3139 #endif
3140         }
3141     }
3142 
3143     TRACEME(("ok (hash 0x%" UVxf ")", PTR2UV(hv)));
3144 
3145  out:
3146     assert(cxt->max_recur_depth_hash != -1 && cxt->recur_depth > 0);
3147     TRACEME(("<hash recur_depth --%" IVdf , cxt->recur_depth));
3148     if (cxt->max_recur_depth_hash != -1 && recur_sv != (SV*)hv && cxt->recur_depth > 0) {
3149         --cxt->recur_depth;
3150     }
3151     HvRITER_set(hv, riter);		/* Restore hash iterator state */
3152     HvEITER_set(hv, eiter);
3153 
3154     return ret;
3155 }
3156 
store_hentry(pTHX_ stcxt_t * cxt,HV * hv,UV i,HE * he,unsigned char hash_flags)3157 static int store_hentry(pTHX_
3158 	stcxt_t *cxt, HV* hv, UV i, HE *he, unsigned char hash_flags)
3159 {
3160     int ret = 0;
3161     SV* val = hv_iterval(hv, he);
3162     int flagged_hash = ((SvREADONLY(hv)
3163 #ifdef HAS_HASH_KEY_FLAGS
3164                          || HvHASKFLAGS(hv)
3165 #endif
3166                          ) ? 1 : 0);
3167     unsigned char flags = (((hash_flags & SHV_RESTRICTED)
3168                             && SvTRULYREADONLY(val))
3169                            ? SHV_K_LOCKED : 0);
3170 #ifndef DEBUGME
3171     PERL_UNUSED_ARG(i);
3172 #endif
3173     if (val == &PL_sv_placeholder) {
3174         flags |= SHV_K_PLACEHOLDER;
3175         val = &PL_sv_undef;
3176     }
3177 
3178     /*
3179      * Store value first.
3180      */
3181 
3182     TRACEME(("(#%d) value 0x%" UVxf, (int)i, PTR2UV(val)));
3183 
3184     {
3185         HEK* hek = HeKEY_hek(he);
3186         I32  len = HEK_LEN(hek);
3187         SV *key_sv = NULL;
3188         char *key = 0;
3189 
3190         if ((ret = store(aTHX_ cxt, val)))
3191             return ret;
3192         if (len == HEf_SVKEY) {
3193             key_sv = HeKEY_sv(he);
3194             flags |= SHV_K_ISSV;
3195         } else {
3196             /* Regular string key. */
3197 #ifdef HAS_HASH_KEY_FLAGS
3198             if (HEK_UTF8(hek))
3199                 flags |= SHV_K_UTF8;
3200             if (HEK_WASUTF8(hek))
3201                 flags |= SHV_K_WASUTF8;
3202 #endif
3203             key = HEK_KEY(hek);
3204         }
3205         /*
3206          * Write key string.
3207          * Keys are written after values to make sure retrieval
3208          * can be optimal in terms of memory usage, where keys are
3209          * read into a fixed unique buffer called kbuf.
3210          * See retrieve_hash() for details.
3211          */
3212 
3213         if (flagged_hash) {
3214             PUTMARK(flags);
3215             TRACEME(("(#%d) key '%s' flags %x", (int)i, key, flags));
3216         } else {
3217             /* This is a workaround for a bug in 5.8.0
3218                that causes the HEK_WASUTF8 flag to be
3219                set on an HEK without the hash being
3220                marked as having key flags. We just
3221                cross our fingers and drop the flag.
3222                AMS 20030901 */
3223             assert (flags == 0 || flags == SHV_K_WASUTF8);
3224             TRACEME(("(#%d) key '%s'", (int)i, key));
3225         }
3226         if (flags & SHV_K_ISSV) {
3227             if ((ret = store(aTHX_ cxt, key_sv)))
3228                 return ret;
3229         } else {
3230             WLEN(len);
3231             if (len)
3232                 WRITE(key, len);
3233         }
3234     }
3235     return ret;
3236 }
3237 
3238 
3239 #ifdef HAS_U64
3240 /*
3241  * store_lhash
3242  *
3243  * Store a overlong hash table, with >2G keys, which we cannot iterate
3244  * over with perl5. xhv_eiter is only I32 there. (only cperl can)
3245  * and we also do not want to sort it.
3246  * So we walk the buckets and chains manually.
3247  *
3248  * type, len and flags are already written.
3249  */
3250 
store_lhash(pTHX_ stcxt_t * cxt,HV * hv,unsigned char hash_flags)3251 static int store_lhash(pTHX_ stcxt_t *cxt, HV *hv, unsigned char hash_flags)
3252 {
3253     dVAR;
3254     int ret = 0;
3255     Size_t i;
3256     UV ix = 0;
3257     HE** array;
3258 #ifdef DEBUGME
3259     UV len = (UV)HvTOTALKEYS(hv);
3260 #endif
3261     SV * const recur_sv = cxt->recur_sv;
3262     if (hash_flags) {
3263         TRACEME(("store_lhash (0x%" UVxf ") (flags %x)", PTR2UV(hv),
3264                  (int) hash_flags));
3265     } else {
3266         TRACEME(("store_lhash (0x%" UVxf ")", PTR2UV(hv)));
3267     }
3268     TRACEME(("size = %" UVuf ", used = %" UVuf, len, (UV)HvUSEDKEYS(hv)));
3269 
3270     TRACEME(("recur_depth %" IVdf ", recur_sv (0x%" UVxf ")", cxt->recur_depth,
3271              PTR2UV(cxt->recur_sv)));
3272     if (recur_sv != (SV*)hv && cxt->max_recur_depth_hash != -1) {
3273         ++cxt->recur_depth;
3274     }
3275     if (RECURSION_TOO_DEEP_HASH()) {
3276 #if PERL_VERSION_LT(5,15,0)
3277         cleanup_recursive_data(aTHX_ (SV*)hv);
3278 #endif
3279         CROAK((MAX_DEPTH_ERROR));
3280     }
3281 
3282     array = HvARRAY(hv);
3283     for (i = 0; i <= (Size_t)HvMAX(hv); i++) {
3284         HE* entry = array[i];
3285         if (!entry) continue;
3286         if ((ret = store_hentry(aTHX_ cxt, hv, ix++, entry, hash_flags)))
3287             return ret;
3288         while ((entry = HeNEXT(entry))) {
3289             if ((ret = store_hentry(aTHX_ cxt, hv, ix++, entry, hash_flags)))
3290                 return ret;
3291         }
3292     }
3293     if (recur_sv == (SV*)hv && cxt->max_recur_depth_hash != -1 && cxt->recur_depth > 0) {
3294         TRACEME(("recur_depth --%" IVdf, cxt->recur_depth));
3295         --cxt->recur_depth;
3296     }
3297     assert(ix == len);
3298     return ret;
3299 }
3300 #endif
3301 
3302 /*
3303  * store_code
3304  *
3305  * Store a code reference.
3306  *
3307  * Layout is SX_CODE <length> followed by a scalar containing the perl
3308  * source code of the code reference.
3309  */
store_code(pTHX_ stcxt_t * cxt,CV * cv)3310 static int store_code(pTHX_ stcxt_t *cxt, CV *cv)
3311 {
3312 #if PERL_VERSION_LT(5,6,0)
3313     /*
3314      * retrieve_code does not work with perl 5.005 or less
3315      */
3316     return store_other(aTHX_ cxt, (SV*)cv);
3317 #else
3318     dSP;
3319     STRLEN len;
3320     STRLEN count, reallen;
3321     SV *text, *bdeparse;
3322 
3323     TRACEME(("store_code (0x%" UVxf ")", PTR2UV(cv)));
3324 
3325     if (
3326         cxt->deparse == 0 ||
3327         (cxt->deparse < 0 &&
3328          !(cxt->deparse =
3329            SvTRUE(get_sv("Storable::Deparse", GV_ADD)) ? 1 : 0))
3330 	) {
3331         return store_other(aTHX_ cxt, (SV*)cv);
3332     }
3333 
3334     /*
3335      * Require B::Deparse. At least B::Deparse 0.61 is needed for
3336      * blessed code references.
3337      */
3338     /* Ownership of both SVs is passed to load_module, which frees them. */
3339     load_module(PERL_LOADMOD_NOIMPORT, newSVpvs("B::Deparse"), newSVnv(0.61));
3340     SPAGAIN;
3341 
3342     ENTER;
3343     SAVETMPS;
3344 
3345     /*
3346      * create the B::Deparse object
3347      */
3348 
3349     PUSHMARK(sp);
3350     XPUSHs(newSVpvs_flags("B::Deparse", SVs_TEMP));
3351     PUTBACK;
3352     count = call_method("new", G_SCALAR);
3353     SPAGAIN;
3354     if (count != 1)
3355         CROAK(("Unexpected return value from B::Deparse::new\n"));
3356     bdeparse = POPs;
3357 
3358     /*
3359      * call the coderef2text method
3360      */
3361 
3362     PUSHMARK(sp);
3363     XPUSHs(bdeparse); /* XXX is this already mortal? */
3364     XPUSHs(sv_2mortal(newRV_inc((SV*)cv)));
3365     PUTBACK;
3366     count = call_method("coderef2text", G_SCALAR);
3367     SPAGAIN;
3368     if (count != 1)
3369         CROAK(("Unexpected return value from B::Deparse::coderef2text\n"));
3370 
3371     text = POPs;
3372     len = SvCUR(text);
3373     reallen = strlen(SvPV_nolen(text));
3374 
3375     /*
3376      * Empty code references or XS functions are deparsed as
3377      * "(prototype) ;" or ";".
3378      */
3379 
3380     if (len == 0 || *(SvPV_nolen(text)+reallen-1) == ';') {
3381         CROAK(("The result of B::Deparse::coderef2text was empty - maybe you're trying to serialize an XS function?\n"));
3382     }
3383 
3384     /*
3385      * Signal code by emitting SX_CODE.
3386      */
3387 
3388     PUTMARK(SX_CODE);
3389     cxt->tagnum++;   /* necessary, as SX_CODE is a SEEN() candidate */
3390     TRACEME(("size = %d", (int)len));
3391     TRACEME(("code = %s", SvPV_nolen(text)));
3392 
3393     /*
3394      * Now store the source code.
3395      */
3396 
3397     if(SvUTF8 (text))
3398         STORE_UTF8STR(SvPV_nolen(text), len);
3399     else
3400         STORE_SCALAR(SvPV_nolen(text), len);
3401 
3402     FREETMPS;
3403     LEAVE;
3404 
3405     TRACEME(("ok (code)"));
3406 
3407     return 0;
3408 #endif
3409 }
3410 
3411 #if PERL_VERSION_LT(5,8,0)
3412 #   define PERL_MAGIC_qr                  'r' /* precompiled qr// regex */
3413 #   define BFD_Svs_SMG_OR_RMG SVs_RMG
3414 #elif PERL_VERSION_GE(5,8,1)
3415 #   define BFD_Svs_SMG_OR_RMG SVs_SMG
3416 #   define MY_PLACEHOLDER PL_sv_placeholder
3417 #else
3418 #   define BFD_Svs_SMG_OR_RMG SVs_RMG
3419 #   define MY_PLACEHOLDER PL_sv_undef
3420 #endif
3421 
get_regexp(pTHX_ stcxt_t * cxt,SV * sv,SV ** re,SV ** flags)3422 static int get_regexp(pTHX_ stcxt_t *cxt, SV* sv, SV **re, SV **flags) {
3423     dSP;
3424     SV* rv;
3425 #if PERL_VERSION_GE(5,12,0)
3426     CV *cv = get_cv("re::regexp_pattern", 0);
3427 #else
3428     CV *cv = get_cv("Storable::_regexp_pattern", 0);
3429 #endif
3430     I32 count;
3431 
3432     assert(cv);
3433 
3434     ENTER;
3435     SAVETMPS;
3436     rv = sv_2mortal((SV*)newRV_inc(sv));
3437     PUSHMARK(sp);
3438     XPUSHs(rv);
3439     PUTBACK;
3440     /* optimize to call the XS directly later */
3441     count = call_sv((SV*)cv, G_ARRAY);
3442     SPAGAIN;
3443     if (count < 2)
3444       CROAK(("re::regexp_pattern returned only %d results", (int)count));
3445     *flags = POPs;
3446     SvREFCNT_inc(*flags);
3447     *re = POPs;
3448     SvREFCNT_inc(*re);
3449 
3450     PUTBACK;
3451     FREETMPS;
3452     LEAVE;
3453 
3454     return 1;
3455 }
3456 
store_regexp(pTHX_ stcxt_t * cxt,SV * sv)3457 static int store_regexp(pTHX_ stcxt_t *cxt, SV *sv) {
3458     SV *re = NULL;
3459     SV *flags = NULL;
3460     const char *re_pv;
3461     const char *flags_pv;
3462     STRLEN re_len;
3463     STRLEN flags_len;
3464     U8 op_flags = 0;
3465 
3466     if (!get_regexp(aTHX_ cxt, sv, &re, &flags))
3467       return -1;
3468 
3469     re_pv = SvPV(re, re_len);
3470     flags_pv = SvPV(flags, flags_len);
3471 
3472     if (re_len > 0xFF) {
3473       op_flags |= SHR_U32_RE_LEN;
3474     }
3475 
3476     PUTMARK(SX_REGEXP);
3477     PUTMARK(op_flags);
3478     if (op_flags & SHR_U32_RE_LEN) {
3479       U32 re_len32 = re_len;
3480       WLEN(re_len32);
3481     }
3482     else
3483       PUTMARK(re_len);
3484     WRITE(re_pv, re_len);
3485     PUTMARK(flags_len);
3486     WRITE(flags_pv, flags_len);
3487 
3488     return 0;
3489 }
3490 
3491 /*
3492  * store_tied
3493  *
3494  * When storing a tied object (be it a tied scalar, array or hash), we lay out
3495  * a special mark, followed by the underlying tied object. For instance, when
3496  * dealing with a tied hash, we store SX_TIED_HASH <hash object>, where
3497  * <hash object> stands for the serialization of the tied hash.
3498  */
store_tied(pTHX_ stcxt_t * cxt,SV * sv)3499 static int store_tied(pTHX_ stcxt_t *cxt, SV *sv)
3500 {
3501     MAGIC *mg;
3502     SV *obj = NULL;
3503     int ret = 0;
3504     int svt = SvTYPE(sv);
3505     char mtype = 'P';
3506 
3507     TRACEME(("store_tied (0x%" UVxf ")", PTR2UV(sv)));
3508 
3509     /*
3510      * We have a small run-time penalty here because we chose to factorise
3511      * all tieds objects into the same routine, and not have a store_tied_hash,
3512      * a store_tied_array, etc...
3513      *
3514      * Don't use a switch() statement, as most compilers don't optimize that
3515      * well for 2/3 values. An if() else if() cascade is just fine. We put
3516      * tied hashes first, as they are the most likely beasts.
3517      */
3518 
3519     if (svt == SVt_PVHV) {
3520         TRACEME(("tied hash"));
3521         PUTMARK(SX_TIED_HASH);		/* Introduces tied hash */
3522     } else if (svt == SVt_PVAV) {
3523         TRACEME(("tied array"));
3524         PUTMARK(SX_TIED_ARRAY);		/* Introduces tied array */
3525     } else {
3526         TRACEME(("tied scalar"));
3527         PUTMARK(SX_TIED_SCALAR);	/* Introduces tied scalar */
3528         mtype = 'q';
3529     }
3530 
3531     if (!(mg = mg_find(sv, mtype)))
3532         CROAK(("No magic '%c' found while storing tied %s", mtype,
3533                (svt == SVt_PVHV) ? "hash" :
3534                (svt == SVt_PVAV) ? "array" : "scalar"));
3535 
3536     /*
3537      * The mg->mg_obj found by mg_find() above actually points to the
3538      * underlying tied Perl object implementation. For instance, if the
3539      * original SV was that of a tied array, then mg->mg_obj is an AV.
3540      *
3541      * Note that we store the Perl object as-is. We don't call its FETCH
3542      * method along the way. At retrieval time, we won't call its STORE
3543      * method either, but the tieing magic will be re-installed. In itself,
3544      * that ensures that the tieing semantics are preserved since further
3545      * accesses on the retrieved object will indeed call the magic methods...
3546      */
3547 
3548     /* [#17040] mg_obj is NULL for scalar self-ties. AMS 20030416 */
3549     obj = mg->mg_obj ? mg->mg_obj : newSV(0);
3550     if ((ret = store(aTHX_ cxt, obj)))
3551         return ret;
3552 
3553     TRACEME(("ok (tied)"));
3554 
3555     return 0;
3556 }
3557 
3558 /*
3559  * store_tied_item
3560  *
3561  * Stores a reference to an item within a tied structure:
3562  *
3563  *  . \$h{key}, stores both the (tied %h) object and 'key'.
3564  *  . \$a[idx], stores both the (tied @a) object and 'idx'.
3565  *
3566  * Layout is therefore either:
3567  *     SX_TIED_KEY <object> <key>
3568  *     SX_TIED_IDX <object> <index>
3569  */
store_tied_item(pTHX_ stcxt_t * cxt,SV * sv)3570 static int store_tied_item(pTHX_ stcxt_t *cxt, SV *sv)
3571 {
3572     MAGIC *mg;
3573     int ret;
3574 
3575     TRACEME(("store_tied_item (0x%" UVxf ")", PTR2UV(sv)));
3576 
3577     if (!(mg = mg_find(sv, 'p')))
3578         CROAK(("No magic 'p' found while storing reference to tied item"));
3579 
3580     /*
3581      * We discriminate between \$h{key} and \$a[idx] via mg_ptr.
3582      */
3583 
3584     if (mg->mg_ptr) {
3585         TRACEME(("store_tied_item: storing a ref to a tied hash item"));
3586         PUTMARK(SX_TIED_KEY);
3587         TRACEME(("store_tied_item: storing OBJ 0x%" UVxf, PTR2UV(mg->mg_obj)));
3588 
3589         if ((ret = store(aTHX_ cxt, mg->mg_obj)))	/* Extra () for -Wall, grr... */
3590             return ret;
3591 
3592         TRACEME(("store_tied_item: storing PTR 0x%" UVxf, PTR2UV(mg->mg_ptr)));
3593 
3594         if ((ret = store(aTHX_ cxt, (SV *) mg->mg_ptr))) /* Idem, for -Wall */
3595             return ret;
3596     } else {
3597         I32 idx = mg->mg_len;
3598 
3599         TRACEME(("store_tied_item: storing a ref to a tied array item "));
3600         PUTMARK(SX_TIED_IDX);
3601         TRACEME(("store_tied_item: storing OBJ 0x%" UVxf, PTR2UV(mg->mg_obj)));
3602 
3603         if ((ret = store(aTHX_ cxt, mg->mg_obj)))	/* Idem, for -Wall */
3604             return ret;
3605 
3606         TRACEME(("store_tied_item: storing IDX %d", (int)idx));
3607 
3608         WLEN(idx);
3609     }
3610 
3611     TRACEME(("ok (tied item)"));
3612 
3613     return 0;
3614 }
3615 
3616 /*
3617  * store_hook		-- dispatched manually, not via sv_store[]
3618  *
3619  * The blessed SV is serialized by a hook.
3620  *
3621  * Simple Layout is:
3622  *
3623  *     SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
3624  *
3625  * where <flags> indicates how long <len>, <len2> and <len3> are, whether
3626  * the trailing part [] is present, the type of object (scalar, array or hash).
3627  * There is also a bit which says how the classname is stored between:
3628  *
3629  *     <len> <classname>
3630  *     <index>
3631  *
3632  * and when the <index> form is used (classname already seen), the "large
3633  * classname" bit in <flags> indicates how large the <index> is.
3634  *
3635  * The serialized string returned by the hook is of length <len2> and comes
3636  * next.  It is an opaque string for us.
3637  *
3638  * Those <len3> object IDs which are listed last represent the extra references
3639  * not directly serialized by the hook, but which are linked to the object.
3640  *
3641  * When recursion is mandated to resolve object-IDs not yet seen, we have
3642  * instead, with <header> being flags with bits set to indicate the object type
3643  * and that recursion was indeed needed:
3644  *
3645  *     SX_HOOK <header> <object> <header> <object> <flags>
3646  *
3647  * that same header being repeated between serialized objects obtained through
3648  * recursion, until we reach flags indicating no recursion, at which point
3649  * we know we've resynchronized with a single layout, after <flags>.
3650  *
3651  * When storing a blessed ref to a tied variable, the following format is
3652  * used:
3653  *
3654  *     SX_HOOK <flags> <extra> ... [<len3> <object-IDs>] <magic object>
3655  *
3656  * The first <flags> indication carries an object of type SHT_EXTRA, and the
3657  * real object type is held in the <extra> flag.  At the very end of the
3658  * serialization stream, the underlying magic object is serialized, just like
3659  * any other tied variable.
3660  */
store_hook(pTHX_ stcxt_t * cxt,SV * sv,int type,HV * pkg,SV * hook)3661 static int store_hook(
3662                       pTHX_
3663                       stcxt_t *cxt,
3664                       SV *sv,
3665                       int type,
3666                       HV *pkg,
3667                       SV *hook)
3668 {
3669     I32 len;
3670     char *classname;
3671     STRLEN len2;
3672     SV *ref;
3673     AV *av;
3674     SV **ary;
3675     IV count;			/* really len3 + 1 */
3676     unsigned char flags;
3677     char *pv;
3678     int i;
3679     int recursed = 0;		/* counts recursion */
3680     int obj_type;		/* object type, on 2 bits */
3681     I32 classnum;
3682     int ret;
3683     int clone = cxt->optype & ST_CLONE;
3684     char mtype = '\0';		/* for blessed ref to tied structures */
3685     unsigned char eflags = '\0'; /* used when object type is SHT_EXTRA */
3686 #ifdef HAS_U64
3687     int need_large_oids = 0;
3688 #endif
3689 
3690     TRACEME(("store_hook, classname \"%s\", tagged #%d", HvNAME_get(pkg), (int)cxt->tagnum));
3691 
3692     /*
3693      * Determine object type on 2 bits.
3694      */
3695 
3696     switch (type) {
3697     case svis_REF:
3698     case svis_SCALAR:
3699         obj_type = SHT_SCALAR;
3700         break;
3701     case svis_ARRAY:
3702         obj_type = SHT_ARRAY;
3703         break;
3704     case svis_HASH:
3705         obj_type = SHT_HASH;
3706         break;
3707     case svis_TIED:
3708         /*
3709          * Produced by a blessed ref to a tied data structure, $o in the
3710          * following Perl code.
3711          *
3712          * 	my %h;
3713          *  tie %h, 'FOO';
3714          *	my $o = bless \%h, 'BAR';
3715          *
3716          * Signal the tie-ing magic by setting the object type as SHT_EXTRA
3717          * (since we have only 2 bits in <flags> to store the type), and an
3718          * <extra> byte flag will be emitted after the FIRST <flags> in the
3719          * stream, carrying what we put in 'eflags'.
3720          */
3721         obj_type = SHT_EXTRA;
3722         switch (SvTYPE(sv)) {
3723         case SVt_PVHV:
3724             eflags = (unsigned char) SHT_THASH;
3725             mtype = 'P';
3726             break;
3727         case SVt_PVAV:
3728             eflags = (unsigned char) SHT_TARRAY;
3729             mtype = 'P';
3730             break;
3731         default:
3732             eflags = (unsigned char) SHT_TSCALAR;
3733             mtype = 'q';
3734             break;
3735         }
3736         break;
3737     default:
3738         CROAK(("Unexpected object type (%d) in store_hook()", type));
3739     }
3740     flags = SHF_NEED_RECURSE | obj_type;
3741 
3742     classname = HvNAME_get(pkg);
3743     len = strlen(classname);
3744 
3745     /*
3746      * To call the hook, we need to fake a call like:
3747      *
3748      *    $object->STORABLE_freeze($cloning);
3749      *
3750      * but we don't have the $object here.  For instance, if $object is
3751      * a blessed array, what we have in 'sv' is the array, and we can't
3752      * call a method on those.
3753      *
3754      * Therefore, we need to create a temporary reference to the object and
3755      * make the call on that reference.
3756      */
3757 
3758     TRACEME(("about to call STORABLE_freeze on class %s", classname));
3759 
3760     ref = newRV_inc(sv);		/* Temporary reference */
3761     av = array_call(aTHX_ ref, hook, clone); /* @a = $object->STORABLE_freeze($c) */
3762     SvREFCNT_dec(ref);			/* Reclaim temporary reference */
3763 
3764     count = AvFILLp(av) + 1;
3765     TRACEME(("store_hook, array holds %" IVdf " items", count));
3766 
3767     /*
3768      * If they return an empty list, it means they wish to ignore the
3769      * hook for this class (and not just this instance -- that's for them
3770      * to handle if they so wish).
3771      *
3772      * Simply disable the cached entry for the hook (it won't be recomputed
3773      * since it's present in the cache) and recurse to store_blessed().
3774      */
3775 
3776     if (!count) {
3777         /* free empty list returned by the hook */
3778         av_undef(av);
3779         sv_free((SV *) av);
3780 
3781         /*
3782          * They must not change their mind in the middle of a serialization.
3783          */
3784 
3785         if (hv_fetch(cxt->hclass, classname, len, FALSE))
3786             CROAK(("Too late to ignore hooks for %s class \"%s\"",
3787                    (cxt->optype & ST_CLONE) ? "cloning" : "storing",
3788                    classname));
3789 
3790         pkg_hide(aTHX_ cxt->hook, pkg, "STORABLE_freeze");
3791 
3792         ASSERT(!pkg_can(aTHX_ cxt->hook, pkg, "STORABLE_freeze"),
3793                ("hook invisible"));
3794         TRACEME(("ignoring STORABLE_freeze in class \"%s\"", classname));
3795 
3796         return store_blessed(aTHX_ cxt, sv, type, pkg);
3797     }
3798 
3799     /*
3800      * Get frozen string.
3801      */
3802 
3803     ary = AvARRAY(av);
3804     pv = SvPV(ary[0], len2);
3805     /* We can't use pkg_can here because it only caches one method per
3806      * package */
3807     {
3808         GV* gv = gv_fetchmethod_autoload(pkg, "STORABLE_attach", FALSE);
3809         if (gv && isGV(gv)) {
3810             if (count > 1)
3811                 CROAK(("Freeze cannot return references if %s class is using STORABLE_attach", classname));
3812             goto check_done;
3813         }
3814     }
3815 
3816 #ifdef HAS_U64
3817     if (count > I32_MAX) {
3818 	CROAK(("Too many references returned by STORABLE_freeze()"));
3819     }
3820 #endif
3821 
3822     /*
3823      * If they returned more than one item, we need to serialize some
3824      * extra references if not already done.
3825      *
3826      * Loop over the array, starting at position #1, and for each item,
3827      * ensure it is a reference, serialize it if not already done, and
3828      * replace the entry with the tag ID of the corresponding serialized
3829      * object.
3830      *
3831      * We CHEAT by not calling av_fetch() and read directly within the
3832      * array, for speed.
3833      */
3834 
3835     for (i = 1; i < count; i++) {
3836 #ifdef USE_PTR_TABLE
3837         char *fake_tag;
3838 #else
3839         SV **svh;
3840 #endif
3841         SV *rsv = ary[i];
3842         SV *xsv;
3843         SV *tag;
3844         AV *av_hook = cxt->hook_seen;
3845 
3846         if (!SvROK(rsv))
3847             CROAK(("Item #%d returned by STORABLE_freeze "
3848                    "for %s is not a reference", (int)i, classname));
3849         xsv = SvRV(rsv);	/* Follow ref to know what to look for */
3850 
3851         /*
3852          * Look in hseen and see if we have a tag already.
3853          * Serialize entry if not done already, and get its tag.
3854          */
3855 
3856 #ifdef USE_PTR_TABLE
3857         /* Fakery needed because ptr_table_fetch returns zero for a
3858            failure, whereas the existing code assumes that it can
3859            safely store a tag zero. So for ptr_tables we store tag+1
3860         */
3861         if ((fake_tag = (char *)ptr_table_fetch(cxt->pseen, xsv)))
3862             goto sv_seen;	/* Avoid moving code too far to the right */
3863 #else
3864         if ((svh = hv_fetch(cxt->hseen, (char *) &xsv, sizeof(xsv), FALSE)))
3865             goto sv_seen;	/* Avoid moving code too far to the right */
3866 #endif
3867 
3868         TRACEME(("listed object %d at 0x%" UVxf " is unknown", i-1,
3869                  PTR2UV(xsv)));
3870 
3871         /*
3872          * We need to recurse to store that object and get it to be known
3873          * so that we can resolve the list of object-IDs at retrieve time.
3874          *
3875          * The first time we do this, we need to emit the proper header
3876          * indicating that we recursed, and what the type of object is (the
3877          * object we're storing via a user-hook).  Indeed, during retrieval,
3878          * we'll have to create the object before recursing to retrieve the
3879          * others, in case those would point back at that object.
3880          */
3881 
3882         /* [SX_HOOK] <flags> [<extra>] <object>*/
3883         if (!recursed++) {
3884 #ifdef HAS_U64
3885             if (len2 > INT32_MAX)
3886                 PUTMARK(SX_LOBJECT);
3887 #endif
3888 	    PUTMARK(SX_HOOK);
3889             PUTMARK(flags);
3890             if (obj_type == SHT_EXTRA)
3891                 PUTMARK(eflags);
3892         } else
3893             PUTMARK(flags);
3894 
3895         if ((ret = store(aTHX_ cxt, xsv)))	/* Given by hook for us to store */
3896             return ret;
3897 
3898 #ifdef USE_PTR_TABLE
3899         fake_tag = (char *)ptr_table_fetch(cxt->pseen, xsv);
3900         if (!fake_tag)
3901             CROAK(("Could not serialize item #%d from hook in %s",
3902                    (int)i, classname));
3903 #else
3904         svh = hv_fetch(cxt->hseen, (char *) &xsv, sizeof(xsv), FALSE);
3905         if (!svh)
3906             CROAK(("Could not serialize item #%d from hook in %s",
3907                    (int)i, classname));
3908 #endif
3909         /*
3910          * It was the first time we serialized 'xsv'.
3911          *
3912          * Keep this SV alive until the end of the serialization: if we
3913          * disposed of it right now by decrementing its refcount, and it was
3914          * a temporary value, some next temporary value allocated during
3915          * another STORABLE_freeze might take its place, and we'd wrongly
3916          * assume that new SV was already serialized, based on its presence
3917          * in cxt->hseen.
3918          *
3919          * Therefore, push it away in cxt->hook_seen.
3920          */
3921 
3922         av_store(av_hook, AvFILLp(av_hook)+1, SvREFCNT_inc(xsv));
3923 
3924     sv_seen:
3925         /*
3926          * Dispose of the REF they returned.  If we saved the 'xsv' away
3927          * in the array of returned SVs, that will not cause the underlying
3928          * referenced SV to be reclaimed.
3929          */
3930 
3931         ASSERT(SvREFCNT(xsv) > 1, ("SV will survive disposal of its REF"));
3932         SvREFCNT_dec(rsv);		/* Dispose of reference */
3933 
3934         /*
3935          * Replace entry with its tag (not a real SV, so no refcnt increment)
3936          */
3937 
3938 #ifdef USE_PTR_TABLE
3939         tag = (SV *)--fake_tag;
3940 #else
3941         tag = *svh;
3942 #endif
3943         ary[i] = tag;
3944         TRACEME(("listed object %d at 0x%" UVxf " is tag #%" UVuf,
3945                  i-1, PTR2UV(xsv), PTR2UV(tag)));
3946 #ifdef HAS_U64
3947        if ((U32)PTR2TAG(tag) != PTR2TAG(tag))
3948            need_large_oids = 1;
3949 #endif
3950     }
3951 
3952     /*
3953      * Allocate a class ID if not already done.
3954      *
3955      * This needs to be done after the recursion above, since at retrieval
3956      * time, we'll see the inner objects first.  Many thanks to
3957      * Salvador Ortiz Garcia <sog@msg.com.mx> who spot that bug and
3958      * proposed the right fix.  -- RAM, 15/09/2000
3959      */
3960 
3961  check_done:
3962     if (!known_class(aTHX_ cxt, classname, len, &classnum)) {
3963         TRACEME(("first time we see class %s, ID = %d", classname, (int)classnum));
3964         classnum = -1;			/* Mark: we must store classname */
3965     } else {
3966         TRACEME(("already seen class %s, ID = %d", classname, (int)classnum));
3967     }
3968 
3969     /*
3970      * Compute leading flags.
3971      */
3972 
3973     flags = obj_type;
3974     if (((classnum == -1) ? len : classnum) > LG_SCALAR)
3975         flags |= SHF_LARGE_CLASSLEN;
3976     if (classnum != -1)
3977         flags |= SHF_IDX_CLASSNAME;
3978     if (len2 > LG_SCALAR)
3979         flags |= SHF_LARGE_STRLEN;
3980     if (count > 1)
3981         flags |= SHF_HAS_LIST;
3982     if (count > (LG_SCALAR + 1))
3983         flags |= SHF_LARGE_LISTLEN;
3984 #ifdef HAS_U64
3985     if (need_large_oids)
3986         flags |= SHF_LARGE_LISTLEN;
3987 #endif
3988 
3989     /*
3990      * We're ready to emit either serialized form:
3991      *
3992      *   SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
3993      *   SX_HOOK <flags> <index>           <len2> <str> [<len3> <object-IDs>]
3994      *
3995      * If we recursed, the SX_HOOK has already been emitted.
3996      */
3997 
3998     TRACEME(("SX_HOOK (recursed=%d) flags=0x%x "
3999              "class=%" IVdf " len=%" IVdf " len2=%" IVdf " len3=%" IVdf,
4000              recursed, flags, (IV)classnum, (IV)len, (IV)len2, count-1));
4001 
4002     /* SX_HOOK <flags> [<extra>] */
4003     if (!recursed) {
4004 #ifdef HAS_U64
4005         if (len2 > INT32_MAX)
4006 	    PUTMARK(SX_LOBJECT);
4007 #endif
4008 	PUTMARK(SX_HOOK);
4009         PUTMARK(flags);
4010         if (obj_type == SHT_EXTRA)
4011             PUTMARK(eflags);
4012     } else
4013         PUTMARK(flags);
4014 
4015     /* <len> <classname> or <index> */
4016     if (flags & SHF_IDX_CLASSNAME) {
4017         if (flags & SHF_LARGE_CLASSLEN)
4018             WLEN(classnum);
4019         else {
4020             unsigned char cnum = (unsigned char) classnum;
4021             PUTMARK(cnum);
4022         }
4023     } else {
4024         if (flags & SHF_LARGE_CLASSLEN)
4025             WLEN(len);
4026         else {
4027             unsigned char clen = (unsigned char) len;
4028             PUTMARK(clen);
4029         }
4030         WRITE(classname, len);		/* Final \0 is omitted */
4031     }
4032 
4033     /* <len2> <frozen-str> */
4034 #ifdef HAS_U64
4035     if (len2 > INT32_MAX) {
4036         W64LEN(len2);
4037     }
4038     else
4039 #endif
4040     if (flags & SHF_LARGE_STRLEN) {
4041         U32 wlen2 = len2;		/* STRLEN might be 8 bytes */
4042         WLEN(wlen2);			/* Must write an I32 for 64-bit machines */
4043     } else {
4044         unsigned char clen = (unsigned char) len2;
4045         PUTMARK(clen);
4046     }
4047     if (len2)
4048         WRITE(pv, (SSize_t)len2);	/* Final \0 is omitted */
4049 
4050     /* [<len3> <object-IDs>] */
4051     if (flags & SHF_HAS_LIST) {
4052         int len3 = count - 1;
4053         if (flags & SHF_LARGE_LISTLEN) {
4054 #ifdef HAS_U64
4055   	    int tlen3 = need_large_oids ? -len3 : len3;
4056 	    WLEN(tlen3);
4057 #else
4058             WLEN(len3);
4059 #endif
4060 	}
4061         else {
4062             unsigned char clen = (unsigned char) len3;
4063             PUTMARK(clen);
4064         }
4065 
4066         /*
4067          * NOTA BENE, for 64-bit machines: the ary[i] below does not yield a
4068          * real pointer, rather a tag number, well under the 32-bit limit.
4069          * Which is wrong... if we have more than 2**32 SVs we can get ids over
4070          * the 32-bit limit.
4071          */
4072 
4073         for (i = 1; i < count; i++) {
4074 #ifdef HAS_U64
4075             if (need_large_oids) {
4076                 ntag_t tag = PTR2TAG(ary[i]);
4077                 W64LEN(tag);
4078                 TRACEME(("object %d, tag #%" UVuf, i-1, (UV)tag));
4079             }
4080             else
4081 #endif
4082             {
4083                 I32 tagval = htonl(LOW_32BITS(ary[i]));
4084                 WRITE_I32(tagval);
4085                 TRACEME(("object %d, tag #%d", i-1, ntohl(tagval)));
4086             }
4087         }
4088     }
4089 
4090     /*
4091      * Free the array.  We need extra care for indices after 0, since they
4092      * don't hold real SVs but integers cast.
4093      */
4094 
4095     if (count > 1)
4096         AvFILLp(av) = 0;	/* Cheat, nothing after 0 interests us */
4097     av_undef(av);
4098     sv_free((SV *) av);
4099 
4100     /*
4101      * If object was tied, need to insert serialization of the magic object.
4102      */
4103 
4104     if (obj_type == SHT_EXTRA) {
4105         MAGIC *mg;
4106 
4107         if (!(mg = mg_find(sv, mtype))) {
4108             int svt = SvTYPE(sv);
4109             CROAK(("No magic '%c' found while storing ref to tied %s with hook",
4110                    mtype, (svt == SVt_PVHV) ? "hash" :
4111                    (svt == SVt_PVAV) ? "array" : "scalar"));
4112         }
4113 
4114         TRACEME(("handling the magic object 0x%" UVxf " part of 0x%" UVxf,
4115                  PTR2UV(mg->mg_obj), PTR2UV(sv)));
4116 
4117         /*
4118          * [<magic object>]
4119          */
4120         if ((ret = store(aTHX_ cxt, mg->mg_obj)))
4121             return ret;
4122     }
4123 
4124     return 0;
4125 }
4126 
4127 /*
4128  * store_blessed	-- dispatched manually, not via sv_store[]
4129  *
4130  * Check whether there is a STORABLE_xxx hook defined in the class or in one
4131  * of its ancestors.  If there is, then redispatch to store_hook();
4132  *
4133  * Otherwise, the blessed SV is stored using the following layout:
4134  *
4135  *    SX_BLESS <flag> <len> <classname> <object>
4136  *
4137  * where <flag> indicates whether <len> is stored on 0 or 4 bytes, depending
4138  * on the high-order bit in flag: if 1, then length follows on 4 bytes.
4139  * Otherwise, the low order bits give the length, thereby giving a compact
4140  * representation for class names less than 127 chars long.
4141  *
4142  * Each <classname> seen is remembered and indexed, so that the next time
4143  * an object in the blessed in the same <classname> is stored, the following
4144  * will be emitted:
4145  *
4146  *    SX_IX_BLESS <flag> <index> <object>
4147  *
4148  * where <index> is the classname index, stored on 0 or 4 bytes depending
4149  * on the high-order bit in flag (same encoding as above for <len>).
4150  */
store_blessed(pTHX_ stcxt_t * cxt,SV * sv,int type,HV * pkg)4151 static int store_blessed(
4152                          pTHX_
4153                          stcxt_t *cxt,
4154                          SV *sv,
4155                          int type,
4156                          HV *pkg)
4157 {
4158     SV *hook;
4159     char *classname;
4160     I32 len;
4161     I32 classnum;
4162 
4163     TRACEME(("store_blessed, type %d, class \"%s\"", type, HvNAME_get(pkg)));
4164 
4165     /*
4166      * Look for a hook for this blessed SV and redirect to store_hook()
4167      * if needed.
4168      */
4169 
4170     hook = pkg_can(aTHX_ cxt->hook, pkg, "STORABLE_freeze");
4171     if (hook)
4172         return store_hook(aTHX_ cxt, sv, type, pkg, hook);
4173 
4174     /*
4175      * This is a blessed SV without any serialization hook.
4176      */
4177 
4178     classname = HvNAME_get(pkg);
4179     len = strlen(classname);
4180 
4181     TRACEME(("blessed 0x%" UVxf " in %s, no hook: tagged #%d",
4182              PTR2UV(sv), classname, (int)cxt->tagnum));
4183 
4184     /*
4185      * Determine whether it is the first time we see that class name (in which
4186      * case it will be stored in the SX_BLESS form), or whether we already
4187      * saw that class name before (in which case the SX_IX_BLESS form will be
4188      * used).
4189      */
4190 
4191     if (known_class(aTHX_ cxt, classname, len, &classnum)) {
4192         TRACEME(("already seen class %s, ID = %d", classname, (int)classnum));
4193         PUTMARK(SX_IX_BLESS);
4194         if (classnum <= LG_BLESS) {
4195             unsigned char cnum = (unsigned char) classnum;
4196             PUTMARK(cnum);
4197         } else {
4198             unsigned char flag = (unsigned char) 0x80;
4199             PUTMARK(flag);
4200             WLEN(classnum);
4201         }
4202     } else {
4203         TRACEME(("first time we see class %s, ID = %d", classname,
4204                  (int)classnum));
4205         PUTMARK(SX_BLESS);
4206         if (len <= LG_BLESS) {
4207             unsigned char clen = (unsigned char) len;
4208             PUTMARK(clen);
4209         } else {
4210             unsigned char flag = (unsigned char) 0x80;
4211             PUTMARK(flag);
4212             WLEN(len);	/* Don't BER-encode, this should be rare */
4213         }
4214         WRITE(classname, len);	/* Final \0 is omitted */
4215     }
4216 
4217     /*
4218      * Now emit the <object> part.
4219      */
4220 
4221     return SV_STORE(type)(aTHX_ cxt, sv);
4222 }
4223 
4224 /*
4225  * store_other
4226  *
4227  * We don't know how to store the item we reached, so return an error condition.
4228  * (it's probably a GLOB, some CODE reference, etc...)
4229  *
4230  * If they defined the 'forgive_me' variable at the Perl level to some
4231  * true value, then don't croak, just warn, and store a placeholder string
4232  * instead.
4233  */
store_other(pTHX_ stcxt_t * cxt,SV * sv)4234 static int store_other(pTHX_ stcxt_t *cxt, SV *sv)
4235 {
4236     STRLEN len;
4237     char buf[80];
4238 
4239     TRACEME(("store_other"));
4240 
4241     /*
4242      * Fetch the value from perl only once per store() operation.
4243      */
4244 
4245     if (
4246         cxt->forgive_me == 0 ||
4247         (cxt->forgive_me < 0 &&
4248          !(cxt->forgive_me = SvTRUE
4249            (get_sv("Storable::forgive_me", GV_ADD)) ? 1 : 0))
4250 	)
4251         CROAK(("Can't store %s items", sv_reftype(sv, FALSE)));
4252 
4253     warn("Can't store item %s(0x%" UVxf ")",
4254          sv_reftype(sv, FALSE), PTR2UV(sv));
4255 
4256     /*
4257      * Store placeholder string as a scalar instead...
4258      */
4259 
4260     (void) sprintf(buf, "You lost %s(0x%" UVxf ")%c", sv_reftype(sv, FALSE),
4261                    PTR2UV(sv), (char) 0);
4262 
4263     len = strlen(buf);
4264     if (len < 80)
4265         STORE_SCALAR(buf, len);
4266     TRACEME(("ok (dummy \"%s\", length = %" IVdf ")", buf, (IV) len));
4267 
4268     return 0;
4269 }
4270 
4271 /***
4272  *** Store driving routines
4273  ***/
4274 
4275 /*
4276  * sv_type
4277  *
4278  * WARNING: partially duplicates Perl's sv_reftype for speed.
4279  *
4280  * Returns the type of the SV, identified by an integer. That integer
4281  * may then be used to index the dynamic routine dispatch table.
4282  */
sv_type(pTHX_ SV * sv)4283 static int sv_type(pTHX_ SV *sv)
4284 {
4285     switch (SvTYPE(sv)) {
4286     case SVt_NULL:
4287 #if PERL_VERSION_LT(5,11,0)
4288     case SVt_IV:
4289 #endif
4290     case SVt_NV:
4291         /*
4292          * No need to check for ROK, that can't be set here since there
4293          * is no field capable of hodling the xrv_rv reference.
4294          */
4295         return svis_SCALAR;
4296     case SVt_PV:
4297 #if PERL_VERSION_LT(5,11,0)
4298     case SVt_RV:
4299 #else
4300     case SVt_IV:
4301 #endif
4302     case SVt_PVIV:
4303     case SVt_PVNV:
4304         /*
4305          * Starting from SVt_PV, it is possible to have the ROK flag
4306          * set, the pointer to the other SV being either stored in
4307          * the xrv_rv (in the case of a pure SVt_RV), or as the
4308          * xpv_pv field of an SVt_PV and its heirs.
4309          *
4310          * However, those SV cannot be magical or they would be an
4311          * SVt_PVMG at least.
4312          */
4313         return SvROK(sv) ? svis_REF : svis_SCALAR;
4314     case SVt_PVMG:
4315 #if PERL_VERSION_LT(5,11,0)
4316         if ((SvFLAGS(sv) & (SVs_OBJECT|SVf_OK|SVs_GMG|SVs_SMG|SVs_RMG))
4317 	          == (SVs_OBJECT|BFD_Svs_SMG_OR_RMG)
4318 	    && mg_find(sv, PERL_MAGIC_qr)) {
4319 	      return svis_REGEXP;
4320 	}
4321 #endif
4322     case SVt_PVLV:		/* Workaround for perl5.004_04 "LVALUE" bug */
4323         if ((SvFLAGS(sv) & (SVs_GMG|SVs_SMG|SVs_RMG)) ==
4324             (SVs_GMG|SVs_SMG|SVs_RMG) &&
4325             (mg_find(sv, 'p')))
4326             return svis_TIED_ITEM;
4327         /* FALL THROUGH */
4328 #if PERL_VERSION_LT(5,9,0)
4329     case SVt_PVBM:
4330 #endif
4331         if ((SvFLAGS(sv) & (SVs_GMG|SVs_SMG|SVs_RMG)) ==
4332             (SVs_GMG|SVs_SMG|SVs_RMG) &&
4333             (mg_find(sv, 'q')))
4334             return svis_TIED;
4335         return SvROK(sv) ? svis_REF : svis_SCALAR;
4336     case SVt_PVAV:
4337         if (SvRMAGICAL(sv) && (mg_find(sv, 'P')))
4338             return svis_TIED;
4339         return svis_ARRAY;
4340     case SVt_PVHV:
4341         if (SvRMAGICAL(sv) && (mg_find(sv, 'P')))
4342             return svis_TIED;
4343         return svis_HASH;
4344     case SVt_PVCV:
4345         return svis_CODE;
4346 #if PERL_VERSION_GE(5,9,0)
4347 	/* case SVt_INVLIST: */
4348 #endif
4349 #if PERL_VERSION_GE(5,11,0)
4350     case SVt_REGEXP:
4351         return svis_REGEXP;
4352 #endif
4353     default:
4354         break;
4355     }
4356 
4357     return svis_OTHER;
4358 }
4359 
4360 /*
4361  * store
4362  *
4363  * Recursively store objects pointed to by the sv to the specified file.
4364  *
4365  * Layout is <content> or SX_OBJECT <tagnum> if we reach an already stored
4366  * object (one for which storage has started -- it may not be over if we have
4367  * a self-referenced structure). This data set forms a stored <object>.
4368  */
store(pTHX_ stcxt_t * cxt,SV * sv)4369 static int store(pTHX_ stcxt_t *cxt, SV *sv)
4370 {
4371     SV **svh;
4372     int ret;
4373     int type;
4374 #ifdef USE_PTR_TABLE
4375     struct ptr_tbl *pseen = cxt->pseen;
4376 #else
4377     HV *hseen = cxt->hseen;
4378 #endif
4379 
4380     TRACEME(("store (0x%" UVxf ")", PTR2UV(sv)));
4381 
4382     /*
4383      * If object has already been stored, do not duplicate data.
4384      * Simply emit the SX_OBJECT marker followed by its tag data.
4385      * The tag is always written in network order.
4386      *
4387      * NOTA BENE, for 64-bit machines: the "*svh" below does not yield a
4388      * real pointer, rather a tag number (watch the insertion code below).
4389      * That means it probably safe to assume it is well under the 32-bit
4390      * limit, and makes the truncation safe.
4391      *		-- RAM, 14/09/1999
4392      */
4393 
4394 #ifdef USE_PTR_TABLE
4395     svh = (SV **)ptr_table_fetch(pseen, sv);
4396 #else
4397     svh = hv_fetch(hseen, (char *) &sv, sizeof(sv), FALSE);
4398 #endif
4399     if (svh) {
4400 	ntag_t tagval;
4401 	if (sv == &PL_sv_undef) {
4402             /* We have seen PL_sv_undef before, but fake it as
4403                if we have not.
4404 
4405                Not the simplest solution to making restricted
4406                hashes work on 5.8.0, but it does mean that
4407                repeated references to the one true undef will
4408                take up less space in the output file.
4409             */
4410             /* Need to jump past the next hv_store, because on the
4411                second store of undef the old hash value will be
4412                SvREFCNT_dec()ed, and as Storable cheats horribly
4413                by storing non-SVs in the hash a SEGV will ensure.
4414                Need to increase the tag number so that the
4415                receiver has no idea what games we're up to.  This
4416                special casing doesn't affect hooks that store
4417                undef, as the hook routine does its own lookup into
4418                hseen.  Also this means that any references back
4419                to PL_sv_undef (from the pathological case of hooks
4420                storing references to it) will find the seen hash
4421                entry for the first time, as if we didn't have this
4422                hackery here. (That hseen lookup works even on 5.8.0
4423                because it's a key of &PL_sv_undef and a value
4424                which is a tag number, not a value which is
4425                PL_sv_undef.)  */
4426             cxt->tagnum++;
4427             type = svis_SCALAR;
4428             goto undef_special_case;
4429         }
4430 
4431 #ifdef USE_PTR_TABLE
4432 	tagval = PTR2TAG(((char *)svh)-1);
4433 #else
4434 	tagval = PTR2TAG(*svh);
4435 #endif
4436 #ifdef HAS_U64
4437 
4438        /* older versions of Storable streat the tag as a signed value
4439           used in an array lookup, corrupting the data structure.
4440           Ensure only a newer Storable will be able to parse this tag id
4441           if it's over the 2G mark.
4442         */
4443 	if (tagval > I32_MAX) {
4444 
4445 	    TRACEME(("object 0x%" UVxf " seen as #%" UVuf, PTR2UV(sv),
4446 		     (UV)tagval));
4447 
4448 	    PUTMARK(SX_LOBJECT);
4449 	    PUTMARK(SX_OBJECT);
4450 	    W64LEN(tagval);
4451 	    return 0;
4452 	}
4453 	else
4454 #endif
4455 	{
4456 	    I32 ltagval;
4457 
4458 	    ltagval = htonl((I32)tagval);
4459 
4460 	    TRACEME(("object 0x%" UVxf " seen as #%d", PTR2UV(sv),
4461 		     ntohl(ltagval)));
4462 
4463 	    PUTMARK(SX_OBJECT);
4464 	    WRITE_I32(ltagval);
4465 	    return 0;
4466 	}
4467     }
4468 
4469     /*
4470      * Allocate a new tag and associate it with the address of the sv being
4471      * stored, before recursing...
4472      *
4473      * In order to avoid creating new SvIVs to hold the tagnum we just
4474      * cast the tagnum to an SV pointer and store that in the hash.  This
4475      * means that we must clean up the hash manually afterwards, but gives
4476      * us a 15% throughput increase.
4477      *
4478      */
4479 
4480     cxt->tagnum++;
4481 #ifdef USE_PTR_TABLE
4482     ptr_table_store(pseen, sv, INT2PTR(SV*, 1 + cxt->tagnum));
4483 #else
4484     if (!hv_store(hseen,
4485                   (char *) &sv, sizeof(sv), INT2PTR(SV*, cxt->tagnum), 0))
4486         return -1;
4487 #endif
4488 
4489     /*
4490      * Store 'sv' and everything beneath it, using appropriate routine.
4491      * Abort immediately if we get a non-zero status back.
4492      */
4493 
4494     type = sv_type(aTHX_ sv);
4495 
4496  undef_special_case:
4497     TRACEME(("storing 0x%" UVxf " tag #%d, type %d...",
4498              PTR2UV(sv), (int)cxt->tagnum, (int)type));
4499 
4500     if (SvOBJECT(sv)) {
4501         HV *pkg = SvSTASH(sv);
4502         ret = store_blessed(aTHX_ cxt, sv, type, pkg);
4503     } else
4504         ret = SV_STORE(type)(aTHX_ cxt, sv);
4505 
4506     TRACEME(("%s (stored 0x%" UVxf ", refcnt=%d, %s)",
4507              ret ? "FAILED" : "ok", PTR2UV(sv),
4508              (int)SvREFCNT(sv), sv_reftype(sv, FALSE)));
4509 
4510     return ret;
4511 }
4512 
4513 /*
4514  * magic_write
4515  *
4516  * Write magic number and system information into the file.
4517  * Layout is <magic> <network> [<len> <byteorder> <sizeof int> <sizeof long>
4518  * <sizeof ptr>] where <len> is the length of the byteorder hexa string.
4519  * All size and lengths are written as single characters here.
4520  *
4521  * Note that no byte ordering info is emitted when <network> is true, since
4522  * integers will be emitted in network order in that case.
4523  */
magic_write(pTHX_ stcxt_t * cxt)4524 static int magic_write(pTHX_ stcxt_t *cxt)
4525 {
4526     /*
4527      * Starting with 0.6, the "use_network_order" byte flag is also used to
4528      * indicate the version number of the binary image, encoded in the upper
4529      * bits. The bit 0 is always used to indicate network order.
4530      */
4531     /*
4532      * Starting with 0.7, a full byte is dedicated to the minor version of
4533      * the binary format, which is incremented only when new markers are
4534      * introduced, for instance, but when backward compatibility is preserved.
4535      */
4536 
4537     /* Make these at compile time.  The WRITE() macro is sufficiently complex
4538        that it saves about 200 bytes doing it this way and only using it
4539        once.  */
4540     static const unsigned char network_file_header[] = {
4541         MAGICSTR_BYTES,
4542         (STORABLE_BIN_MAJOR << 1) | 1,
4543         STORABLE_BIN_WRITE_MINOR
4544     };
4545     static const unsigned char file_header[] = {
4546         MAGICSTR_BYTES,
4547         (STORABLE_BIN_MAJOR << 1) | 0,
4548         STORABLE_BIN_WRITE_MINOR,
4549         /* sizeof the array includes the 0 byte at the end:  */
4550         (char) sizeof (byteorderstr) - 1,
4551         BYTEORDER_BYTES,
4552         (unsigned char) sizeof(int),
4553         (unsigned char) sizeof(long),
4554         (unsigned char) sizeof(char *),
4555         (unsigned char) sizeof(NV)
4556     };
4557 #ifdef USE_56_INTERWORK_KLUDGE
4558     static const unsigned char file_header_56[] = {
4559         MAGICSTR_BYTES,
4560         (STORABLE_BIN_MAJOR << 1) | 0,
4561         STORABLE_BIN_WRITE_MINOR,
4562         /* sizeof the array includes the 0 byte at the end:  */
4563         (char) sizeof (byteorderstr_56) - 1,
4564         BYTEORDER_BYTES_56,
4565         (unsigned char) sizeof(int),
4566         (unsigned char) sizeof(long),
4567         (unsigned char) sizeof(char *),
4568         (unsigned char) sizeof(NV)
4569     };
4570 #endif
4571     const unsigned char *header;
4572     SSize_t length;
4573 
4574     TRACEME(("magic_write on fd=%d", cxt->fio ? PerlIO_fileno(cxt->fio) : -1));
4575 
4576     if (cxt->netorder) {
4577         header = network_file_header;
4578         length = sizeof (network_file_header);
4579     } else {
4580 #ifdef USE_56_INTERWORK_KLUDGE
4581         if (SvTRUE(get_sv("Storable::interwork_56_64bit", GV_ADD))) {
4582             header = file_header_56;
4583             length = sizeof (file_header_56);
4584         } else
4585 #endif
4586             {
4587                 header = file_header;
4588                 length = sizeof (file_header);
4589             }
4590     }
4591 
4592     if (!cxt->fio) {
4593         /* sizeof the array includes the 0 byte at the end.  */
4594         header += sizeof (magicstr) - 1;
4595         length -= sizeof (magicstr) - 1;
4596     }
4597 
4598     WRITE( (unsigned char*) header, length);
4599 
4600     if (!cxt->netorder) {
4601         TRACEME(("ok (magic_write byteorder = 0x%lx [%d], I%d L%d P%d D%d)",
4602                  (unsigned long) BYTEORDER, (int) sizeof (byteorderstr) - 1,
4603                  (int) sizeof(int), (int) sizeof(long),
4604                  (int) sizeof(char *), (int) sizeof(NV)));
4605     }
4606     return 0;
4607 }
4608 
4609 /*
4610  * do_store
4611  *
4612  * Common code for store operations.
4613  *
4614  * When memory store is requested (f = NULL) and a non null SV* is given in
4615  * 'res', it is filled with a new SV created out of the memory buffer.
4616  *
4617  * It is required to provide a non-null 'res' when the operation type is not
4618  * dclone() and store() is performed to memory.
4619  */
do_store(pTHX_ PerlIO * f,SV * sv,int optype,int network_order,SV ** res)4620 static int do_store(pTHX_
4621 	PerlIO *f,
4622         SV *sv,
4623         int optype,
4624         int network_order,
4625         SV **res)
4626 {
4627     dSTCXT;
4628     int status;
4629 
4630     ASSERT(!(f == 0 && !(optype & ST_CLONE)) || res,
4631            ("must supply result SV pointer for real recursion to memory"));
4632 
4633     TRACEMED(("do_store (optype=%d, netorder=%d)",
4634              optype, network_order));
4635 
4636     optype |= ST_STORE;
4637 
4638     /*
4639      * Workaround for CROAK leak: if they enter with a "dirty" context,
4640      * free up memory for them now.
4641      */
4642 
4643     assert(cxt);
4644     if (cxt->s_dirty)
4645         clean_context(aTHX_ cxt);
4646 
4647     /*
4648      * Now that STORABLE_xxx hooks exist, it is possible that they try to
4649      * re-enter store() via the hooks.  We need to stack contexts.
4650      */
4651 
4652     if (cxt->entry)
4653         cxt = allocate_context(aTHX_ cxt);
4654 
4655     INIT_TRACEME;
4656 
4657     cxt->entry++;
4658 
4659     ASSERT(cxt->entry == 1, ("starting new recursion"));
4660     ASSERT(!cxt->s_dirty, ("clean context"));
4661 
4662     /*
4663      * Ensure sv is actually a reference. From perl, we called something
4664      * like:
4665      *       pstore(aTHX_ FILE, \@array);
4666      * so we must get the scalar value behind that reference.
4667      */
4668 
4669     if (!SvROK(sv))
4670         CROAK(("Not a reference"));
4671     sv = SvRV(sv);		/* So follow it to know what to store */
4672 
4673     /*
4674      * If we're going to store to memory, reset the buffer.
4675      */
4676 
4677     if (!f)
4678         MBUF_INIT(0);
4679 
4680     /*
4681      * Prepare context and emit headers.
4682      */
4683 
4684     init_store_context(aTHX_ cxt, f, optype, network_order);
4685 
4686     if (-1 == magic_write(aTHX_ cxt))	/* Emit magic and ILP info */
4687         return 0;			/* Error */
4688 
4689     /*
4690      * Recursively store object...
4691      */
4692 
4693     ASSERT(is_storing(aTHX), ("within store operation"));
4694 
4695     status = store(aTHX_ cxt, sv);	/* Just do it! */
4696 
4697     /*
4698      * If they asked for a memory store and they provided an SV pointer,
4699      * make an SV string out of the buffer and fill their pointer.
4700      *
4701      * When asking for ST_REAL, it's MANDATORY for the caller to provide
4702      * an SV, since context cleanup might free the buffer if we did recurse.
4703      * (unless caller is dclone(), which is aware of that).
4704      */
4705 
4706     if (!cxt->fio && res)
4707         *res = mbuf2sv(aTHX);
4708 
4709     TRACEME(("do_store returns %d", status));
4710 
4711     /*
4712      * Final cleanup.
4713      *
4714      * The "root" context is never freed, since it is meant to be always
4715      * handy for the common case where no recursion occurs at all (i.e.
4716      * we enter store() outside of any Storable code and leave it, period).
4717      * We know it's the "root" context because there's nothing stacked
4718      * underneath it.
4719      *
4720      * OPTIMIZATION:
4721      *
4722      * When deep cloning, we don't free the context: doing so would force
4723      * us to copy the data in the memory buffer.  Sicne we know we're
4724      * about to enter do_retrieve...
4725      */
4726 
4727     clean_store_context(aTHX_ cxt);
4728     if (cxt->prev && !(cxt->optype & ST_CLONE))
4729         free_context(aTHX_ cxt);
4730 
4731     return status == 0;
4732 }
4733 
4734 /***
4735  *** Memory stores.
4736  ***/
4737 
4738 /*
4739  * mbuf2sv
4740  *
4741  * Build a new SV out of the content of the internal memory buffer.
4742  */
mbuf2sv(pTHX)4743 static SV *mbuf2sv(pTHX)
4744 {
4745     dSTCXT;
4746 
4747     assert(cxt);
4748     return newSVpv(mbase, MBUF_SIZE());
4749 }
4750 
4751 /***
4752  *** Specific retrieve callbacks.
4753  ***/
4754 
4755 /*
4756  * retrieve_other
4757  *
4758  * Return an error via croak, since it is not possible that we get here
4759  * under normal conditions, when facing a file produced via pstore().
4760  */
retrieve_other(pTHX_ stcxt_t * cxt,const char * cname)4761 static SV *retrieve_other(pTHX_ stcxt_t *cxt, const char *cname)
4762 {
4763     PERL_UNUSED_ARG(cname);
4764     if (
4765         cxt->ver_major != STORABLE_BIN_MAJOR &&
4766         cxt->ver_minor != STORABLE_BIN_MINOR
4767 	) {
4768         CROAK(("Corrupted storable %s (binary v%d.%d), current is v%d.%d",
4769                cxt->fio ? "file" : "string",
4770                cxt->ver_major, cxt->ver_minor,
4771                STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR));
4772     } else {
4773         CROAK(("Corrupted storable %s (binary v%d.%d)",
4774                cxt->fio ? "file" : "string",
4775                cxt->ver_major, cxt->ver_minor));
4776     }
4777 
4778     return (SV *) 0;		/* Just in case */
4779 }
4780 
4781 /*
4782  * retrieve_idx_blessed
4783  *
4784  * Layout is SX_IX_BLESS <index> <object> with SX_IX_BLESS already read.
4785  * <index> can be coded on either 1 or 5 bytes.
4786  */
retrieve_idx_blessed(pTHX_ stcxt_t * cxt,const char * cname)4787 static SV *retrieve_idx_blessed(pTHX_ stcxt_t *cxt, const char *cname)
4788 {
4789     I32 idx;
4790     const char *classname;
4791     SV **sva;
4792     SV *sv;
4793 
4794     PERL_UNUSED_ARG(cname);
4795     TRACEME(("retrieve_idx_blessed (#%d)", (int)cxt->tagnum));
4796     ASSERT(!cname, ("no bless-into class given here, got %s", cname));
4797 
4798     GETMARK(idx);			/* Index coded on a single char? */
4799     if (idx & 0x80)
4800         RLEN(idx);
4801 
4802     /*
4803      * Fetch classname in 'aclass'
4804      */
4805 
4806     sva = av_fetch(cxt->aclass, idx, FALSE);
4807     if (!sva)
4808         CROAK(("Class name #%" IVdf " should have been seen already",
4809                (IV) idx));
4810 
4811     classname = SvPVX(*sva);	/* We know it's a PV, by construction */
4812 
4813     TRACEME(("class ID %d => %s", (int)idx, classname));
4814 
4815     /*
4816      * Retrieve object and bless it.
4817      */
4818 
4819     sv = retrieve(aTHX_ cxt, classname); /* First SV which is SEEN
4820                                             will be blessed */
4821 
4822     return sv;
4823 }
4824 
4825 /*
4826  * retrieve_blessed
4827  *
4828  * Layout is SX_BLESS <len> <classname> <object> with SX_BLESS already read.
4829  * <len> can be coded on either 1 or 5 bytes.
4830  */
retrieve_blessed(pTHX_ stcxt_t * cxt,const char * cname)4831 static SV *retrieve_blessed(pTHX_ stcxt_t *cxt, const char *cname)
4832 {
4833     U32 len;
4834     SV *sv;
4835     char buf[LG_BLESS + 1];		/* Avoid malloc() if possible */
4836     char *classname = buf;
4837     char *malloced_classname = NULL;
4838 
4839     PERL_UNUSED_ARG(cname);
4840     TRACEME(("retrieve_blessed (#%d)", (int)cxt->tagnum));
4841     ASSERT(!cname, ("no bless-into class given here, got %s", cname));
4842 
4843     /*
4844      * Decode class name length and read that name.
4845      *
4846      * Short classnames have two advantages: their length is stored on one
4847      * single byte, and the string can be read on the stack.
4848      */
4849 
4850     GETMARK(len);			/* Length coded on a single char? */
4851     if (len & 0x80) {
4852         RLEN(len);
4853         TRACEME(("** allocating %ld bytes for class name", (long)len+1));
4854         if (len > I32_MAX)
4855             CROAK(("Corrupted classname length %lu", (long)len));
4856         PL_nomemok = TRUE; /* handle error by ourselves */
4857         New(10003, classname, len+1, char);
4858         PL_nomemok = FALSE;
4859         if (!classname)
4860             CROAK(("Out of memory with len %ld", (long)len));
4861         PL_nomemok = FALSE;
4862         malloced_classname = classname;
4863     }
4864     SAFEPVREAD(classname, (I32)len, malloced_classname);
4865     classname[len] = '\0';		/* Mark string end */
4866 
4867     /*
4868      * It's a new classname, otherwise it would have been an SX_IX_BLESS.
4869      */
4870 
4871     TRACEME(("new class name \"%s\" will bear ID = %d", classname,
4872              (int)cxt->classnum));
4873 
4874     if (!av_store(cxt->aclass, cxt->classnum++, newSVpvn(classname, len))) {
4875         Safefree(malloced_classname);
4876         return (SV *) 0;
4877     }
4878 
4879     /*
4880      * Retrieve object and bless it.
4881      */
4882 
4883     sv = retrieve(aTHX_ cxt, classname); /* First SV which is SEEN will be blessed */
4884     if (malloced_classname)
4885         Safefree(malloced_classname);
4886 
4887     return sv;
4888 }
4889 
4890 /*
4891  * retrieve_hook
4892  *
4893  * Layout: SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
4894  * with leading mark already read, as usual.
4895  *
4896  * When recursion was involved during serialization of the object, there
4897  * is an unknown amount of serialized objects after the SX_HOOK mark.  Until
4898  * we reach a <flags> marker with the recursion bit cleared.
4899  *
4900  * If the first <flags> byte contains a type of SHT_EXTRA, then the real type
4901  * is held in the <extra> byte, and if the object is tied, the serialized
4902  * magic object comes at the very end:
4903  *
4904  *     SX_HOOK <flags> <extra> ... [<len3> <object-IDs>] <magic object>
4905  *
4906  * This means the STORABLE_thaw hook will NOT get a tied variable during its
4907  * processing (since we won't have seen the magic object by the time the hook
4908  * is called).  See comments below for why it was done that way.
4909  */
retrieve_hook_common(pTHX_ stcxt_t * cxt,const char * cname,int large)4910 static SV *retrieve_hook_common(pTHX_ stcxt_t *cxt, const char *cname, int large)
4911 {
4912     U32 len;
4913     char buf[LG_BLESS + 1];		/* Avoid malloc() if possible */
4914     char *classname = buf;
4915     unsigned int flags;
4916     STRLEN len2;
4917     SV *frozen;
4918     I32 len3 = 0;
4919     AV *av = 0;
4920     SV *hook;
4921     SV *sv;
4922     SV *rv;
4923     GV *attach;
4924     HV *stash;
4925     int obj_type;
4926     int clone = cxt->optype & ST_CLONE;
4927     char mtype = '\0';
4928     unsigned int extra_type = 0;
4929 #ifdef HAS_U64
4930     int has_large_oids = 0;
4931 #endif
4932 
4933     PERL_UNUSED_ARG(cname);
4934     TRACEME(("retrieve_hook (#%d)", (int)cxt->tagnum));
4935     ASSERT(!cname, ("no bless-into class given here, got %s", cname));
4936 
4937 #ifndef HAS_U64
4938     assert(!large);
4939     PERL_UNUSED_ARG(large);
4940 #endif
4941 
4942     /*
4943      * Read flags, which tell us about the type, and whether we need
4944      * to recurse.
4945      */
4946 
4947     GETMARK(flags);
4948 
4949     /*
4950      * Create the (empty) object, and mark it as seen.
4951      *
4952      * This must be done now, because tags are incremented, and during
4953      * serialization, the object tag was affected before recursion could
4954      * take place.
4955      */
4956 
4957     obj_type = flags & SHF_TYPE_MASK;
4958     switch (obj_type) {
4959     case SHT_SCALAR:
4960         sv = newSV(0);
4961         break;
4962     case SHT_ARRAY:
4963         sv = (SV *) newAV();
4964         break;
4965     case SHT_HASH:
4966         sv = (SV *) newHV();
4967         break;
4968     case SHT_EXTRA:
4969         /*
4970          * Read <extra> flag to know the type of the object.
4971          * Record associated magic type for later.
4972          */
4973         GETMARK(extra_type);
4974         switch (extra_type) {
4975         case SHT_TSCALAR:
4976             sv = newSV(0);
4977             mtype = 'q';
4978             break;
4979         case SHT_TARRAY:
4980             sv = (SV *) newAV();
4981             mtype = 'P';
4982             break;
4983         case SHT_THASH:
4984             sv = (SV *) newHV();
4985             mtype = 'P';
4986             break;
4987         default:
4988             return retrieve_other(aTHX_ cxt, 0);/* Let it croak */
4989         }
4990         break;
4991     default:
4992         return retrieve_other(aTHX_ cxt, 0);	/* Let it croak */
4993     }
4994     SEEN0_NN(sv, 0);				/* Don't bless yet */
4995 
4996     /*
4997      * Whilst flags tell us to recurse, do so.
4998      *
4999      * We don't need to remember the addresses returned by retrieval, because
5000      * all the references will be obtained through indirection via the object
5001      * tags in the object-ID list.
5002      *
5003      * We need to decrement the reference count for these objects
5004      * because, if the user doesn't save a reference to them in the hook,
5005      * they must be freed when this context is cleaned.
5006      */
5007 
5008     while (flags & SHF_NEED_RECURSE) {
5009         TRACEME(("retrieve_hook recursing..."));
5010         rv = retrieve(aTHX_ cxt, 0);
5011         if (!rv)
5012             return (SV *) 0;
5013         SvREFCNT_dec(rv);
5014         TRACEME(("retrieve_hook back with rv=0x%" UVxf,
5015                  PTR2UV(rv)));
5016         GETMARK(flags);
5017     }
5018 
5019     if (flags & SHF_IDX_CLASSNAME) {
5020         SV **sva;
5021         I32 idx;
5022 
5023         /*
5024          * Fetch index from 'aclass'
5025          */
5026 
5027         if (flags & SHF_LARGE_CLASSLEN)
5028             RLEN(idx);
5029         else
5030             GETMARK(idx);
5031 
5032         sva = av_fetch(cxt->aclass, idx, FALSE);
5033         if (!sva)
5034             CROAK(("Class name #%" IVdf " should have been seen already",
5035                    (IV) idx));
5036 
5037         classname = SvPVX(*sva);	/* We know it's a PV, by construction */
5038         TRACEME(("class ID %d => %s", (int)idx, classname));
5039 
5040     } else {
5041         /*
5042          * Decode class name length and read that name.
5043          *
5044          * NOTA BENE: even if the length is stored on one byte, we don't read
5045          * on the stack.  Just like retrieve_blessed(), we limit the name to
5046          * LG_BLESS bytes.  This is an arbitrary decision.
5047          */
5048         char *malloced_classname = NULL;
5049 
5050         if (flags & SHF_LARGE_CLASSLEN)
5051             RLEN(len);
5052         else
5053             GETMARK(len);
5054 
5055         TRACEME(("** allocating %ld bytes for class name", (long)len+1));
5056         if (len > I32_MAX) /* security */
5057             CROAK(("Corrupted classname length %lu", (long)len));
5058         else if (len > LG_BLESS) { /* security: signed len */
5059             PL_nomemok = TRUE;     /* handle error by ourselves */
5060             New(10003, classname, len+1, char);
5061             PL_nomemok = FALSE;
5062             if (!classname)
5063                 CROAK(("Out of memory with len %u", (unsigned)len+1));
5064             malloced_classname = classname;
5065         }
5066 
5067         SAFEPVREAD(classname, (I32)len, malloced_classname);
5068         classname[len] = '\0';		/* Mark string end */
5069 
5070         /*
5071          * Record new classname.
5072          */
5073 
5074         if (!av_store(cxt->aclass, cxt->classnum++,
5075                       newSVpvn(classname, len))) {
5076             Safefree(malloced_classname);
5077             return (SV *) 0;
5078         }
5079     }
5080 
5081     TRACEME(("class name: %s", classname));
5082 
5083     /*
5084      * Decode user-frozen string length and read it in an SV.
5085      *
5086      * For efficiency reasons, we read data directly into the SV buffer.
5087      * To understand that code, read retrieve_scalar()
5088      */
5089 
5090 #ifdef HAS_U64
5091     if (large) {
5092         READ_U64(len2);
5093     }
5094     else
5095 #endif
5096     if (flags & SHF_LARGE_STRLEN) {
5097         U32 len32;
5098         RLEN(len32);
5099         len2 = len32;
5100     }
5101     else
5102         GETMARK(len2);
5103 
5104     frozen = NEWSV(10002, len2 ? len2 : 1);
5105     if (len2) {
5106         SAFEREAD(SvPVX(frozen), len2, frozen);
5107     }
5108     SvCUR_set(frozen, len2);
5109     *SvEND(frozen) = '\0';
5110     (void) SvPOK_only(frozen);		/* Validates string pointer */
5111     if (cxt->s_tainted)			/* Is input source tainted? */
5112         SvTAINT(frozen);
5113 
5114     TRACEME(("frozen string: %d bytes", (int)len2));
5115 
5116     /*
5117      * Decode object-ID list length, if present.
5118      */
5119 
5120     if (flags & SHF_HAS_LIST) {
5121         if (flags & SHF_LARGE_LISTLEN) {
5122             RLEN(len3);
5123 	    if (len3 < 0) {
5124 #ifdef HAS_U64
5125 	        ++has_large_oids;
5126 		len3 = -len3;
5127 #else
5128 		CROAK(("Large object ids in hook data not supported on 32-bit platforms"));
5129 #endif
5130 
5131 	    }
5132 	}
5133 	else
5134             GETMARK(len3);
5135         if (len3) {
5136             av = newAV();
5137             av_extend(av, len3 + 1);	/* Leave room for [0] */
5138             AvFILLp(av) = len3;		/* About to be filled anyway */
5139         }
5140     }
5141 
5142     TRACEME(("has %d object IDs to link", (int)len3));
5143 
5144     /*
5145      * Read object-ID list into array.
5146      * Because we pre-extended it, we can cheat and fill it manually.
5147      *
5148      * We read object tags and we can convert them into SV* on the fly
5149      * because we know all the references listed in there (as tags)
5150      * have been already serialized, hence we have a valid correspondence
5151      * between each of those tags and the recreated SV.
5152      */
5153 
5154     if (av) {
5155         SV **ary = AvARRAY(av);
5156         int i;
5157         for (i = 1; i <= len3; i++) {	/* We leave [0] alone */
5158             ntag_t tag;
5159             SV **svh;
5160             SV *xsv;
5161 
5162 #ifdef HAS_U64
5163 	    if (has_large_oids) {
5164 		READ_U64(tag);
5165 	    }
5166 	    else {
5167 		U32 tmp;
5168 		READ_I32(tmp);
5169 		tag = ntohl(tmp);
5170 	    }
5171 #else
5172 	    READ_I32(tag);
5173 	    tag = ntohl(tag);
5174 #endif
5175 
5176             svh = av_fetch(cxt->aseen, tag, FALSE);
5177             if (!svh) {
5178                 if (tag == cxt->where_is_undef) {
5179                     /* av_fetch uses PL_sv_undef internally, hence this
5180                        somewhat gruesome hack. */
5181                     xsv = &PL_sv_undef;
5182                     svh = &xsv;
5183                 } else {
5184                     CROAK(("Object #%" IVdf
5185                            " should have been retrieved already",
5186                            (IV) tag));
5187                 }
5188             }
5189             xsv = *svh;
5190             ary[i] = SvREFCNT_inc(xsv);
5191         }
5192     }
5193 
5194     /*
5195      * Look up the STORABLE_attach hook
5196      * If blessing is disabled, just return what we've got.
5197      */
5198     if (!(cxt->flags & FLAG_BLESS_OK)) {
5199         TRACEME(("skipping bless because flags is %d", cxt->flags));
5200         return sv;
5201     }
5202 
5203     /*
5204      * Bless the object and look up the STORABLE_thaw hook.
5205      */
5206     stash = gv_stashpv(classname, GV_ADD);
5207 
5208     /* Handle attach case; again can't use pkg_can because it only
5209      * caches one method */
5210     attach = gv_fetchmethod_autoload(stash, "STORABLE_attach", FALSE);
5211     if (attach && isGV(attach)) {
5212         SV* attached;
5213         SV* attach_hook = newRV_inc((SV*) GvCV(attach));
5214 
5215         if (av)
5216             CROAK(("STORABLE_attach called with unexpected references"));
5217         av = newAV();
5218         av_extend(av, 1);
5219         AvFILLp(av) = 0;
5220         AvARRAY(av)[0] = SvREFCNT_inc(frozen);
5221         rv = newSVpv(classname, 0);
5222         attached = scalar_call(aTHX_ rv, attach_hook, clone, av, G_SCALAR);
5223         /* Free memory after a call */
5224         SvREFCNT_dec(rv);
5225         SvREFCNT_dec(frozen);
5226         av_undef(av);
5227         sv_free((SV *) av);
5228         SvREFCNT_dec(attach_hook);
5229         if (attached &&
5230             SvROK(attached) &&
5231             sv_derived_from(attached, classname)
5232             ) {
5233             UNSEE();
5234             /* refcnt of unneeded sv is 2 at this point
5235                (one from newHV, second from SEEN call) */
5236             SvREFCNT_dec(sv);
5237             SvREFCNT_dec(sv);
5238             /* we need to free RV but preserve value that RV point to */
5239             sv = SvRV(attached);
5240             SEEN0_NN(sv, 0);
5241             SvRV_set(attached, NULL);
5242             SvREFCNT_dec(attached);
5243             if (!(flags & SHF_IDX_CLASSNAME) && classname != buf)
5244                 Safefree(classname);
5245             return sv;
5246         }
5247         CROAK(("STORABLE_attach did not return a %s object", classname));
5248     }
5249 
5250     /*
5251      * Bless the object and look up the STORABLE_thaw hook.
5252      */
5253 
5254     BLESS(sv, stash);
5255 
5256     hook = pkg_can(aTHX_ cxt->hook, stash, "STORABLE_thaw");
5257     if (!hook) {
5258         /*
5259          * Hook not found.  Maybe they did not require the module where this
5260          * hook is defined yet?
5261          *
5262          * If the load below succeeds, we'll be able to find the hook.
5263          * Still, it only works reliably when each class is defined in a
5264          * file of its own.
5265          */
5266 
5267         TRACEME(("No STORABLE_thaw defined for objects of class %s", classname));
5268         TRACEME(("Going to load module '%s'", classname));
5269         load_module(PERL_LOADMOD_NOIMPORT, newSVpv(classname, 0), Nullsv);
5270 
5271         /*
5272          * We cache results of pkg_can, so we need to uncache before attempting
5273          * the lookup again.
5274          */
5275 
5276         pkg_uncache(aTHX_ cxt->hook, SvSTASH(sv), "STORABLE_thaw");
5277         hook = pkg_can(aTHX_ cxt->hook, SvSTASH(sv), "STORABLE_thaw");
5278 
5279         if (!hook)
5280             CROAK(("No STORABLE_thaw defined for objects of class %s "
5281                    "(even after a \"require %s;\")", classname, classname));
5282     }
5283 
5284     /*
5285      * If we don't have an 'av' yet, prepare one.
5286      * Then insert the frozen string as item [0].
5287      */
5288 
5289     if (!av) {
5290         av = newAV();
5291         av_extend(av, 1);
5292         AvFILLp(av) = 0;
5293     }
5294     AvARRAY(av)[0] = SvREFCNT_inc(frozen);
5295 
5296     /*
5297      * Call the hook as:
5298      *
5299      *   $object->STORABLE_thaw($cloning, $frozen, @refs);
5300      *
5301      * where $object is our blessed (empty) object, $cloning is a boolean
5302      * telling whether we're running a deep clone, $frozen is the frozen
5303      * string the user gave us in his serializing hook, and @refs, which may
5304      * be empty, is the list of extra references he returned along for us
5305      * to serialize.
5306      *
5307      * In effect, the hook is an alternate creation routine for the class,
5308      * the object itself being already created by the runtime.
5309      */
5310 
5311     TRACEME(("calling STORABLE_thaw on %s at 0x%" UVxf " (%" IVdf " args)",
5312              classname, PTR2UV(sv), (IV) AvFILLp(av) + 1));
5313 
5314     rv = newRV_inc(sv);
5315     (void) scalar_call(aTHX_ rv, hook, clone, av, G_SCALAR|G_DISCARD);
5316     SvREFCNT_dec(rv);
5317 
5318     /*
5319      * Final cleanup.
5320      */
5321 
5322     SvREFCNT_dec(frozen);
5323     av_undef(av);
5324     sv_free((SV *) av);
5325     if (!(flags & SHF_IDX_CLASSNAME) && classname != buf)
5326         Safefree(classname);
5327 
5328     /*
5329      * If we had an <extra> type, then the object was not as simple, and
5330      * we need to restore extra magic now.
5331      */
5332 
5333     if (!extra_type)
5334         return sv;
5335 
5336     TRACEME(("retrieving magic object for 0x%" UVxf "...", PTR2UV(sv)));
5337 
5338     rv = retrieve(aTHX_ cxt, 0);	/* Retrieve <magic object> */
5339 
5340     TRACEME(("restoring the magic object 0x%" UVxf " part of 0x%" UVxf,
5341              PTR2UV(rv), PTR2UV(sv)));
5342 
5343     switch (extra_type) {
5344     case SHT_TSCALAR:
5345         sv_upgrade(sv, SVt_PVMG);
5346         break;
5347     case SHT_TARRAY:
5348         sv_upgrade(sv, SVt_PVAV);
5349         AvREAL_off((AV *)sv);
5350         break;
5351     case SHT_THASH:
5352         sv_upgrade(sv, SVt_PVHV);
5353         break;
5354     default:
5355         CROAK(("Forgot to deal with extra type %d", extra_type));
5356         break;
5357     }
5358 
5359     /*
5360      * Adding the magic only now, well after the STORABLE_thaw hook was called
5361      * means the hook cannot know it deals with an object whose variable is
5362      * tied.  But this is happening when retrieving $o in the following case:
5363      *
5364      *	my %h;
5365      *  tie %h, 'FOO';
5366      *	my $o = bless \%h, 'BAR';
5367      *
5368      * The 'BAR' class is NOT the one where %h is tied into.  Therefore, as
5369      * far as the 'BAR' class is concerned, the fact that %h is not a REAL
5370      * hash but a tied one should not matter at all, and remain transparent.
5371      * This means the magic must be restored by Storable AFTER the hook is
5372      * called.
5373      *
5374      * That looks very reasonable to me, but then I've come up with this
5375      * after a bug report from David Nesting, who was trying to store such
5376      * an object and caused Storable to fail.  And unfortunately, it was
5377      * also the easiest way to retrofit support for blessed ref to tied objects
5378      * into the existing design.  -- RAM, 17/02/2001
5379      */
5380 
5381     sv_magic(sv, rv, mtype, (char *)NULL, 0);
5382     SvREFCNT_dec(rv);			/* Undo refcnt inc from sv_magic() */
5383 
5384     return sv;
5385 }
5386 
retrieve_hook(pTHX_ stcxt_t * cxt,const char * cname)5387 static SV *retrieve_hook(pTHX_ stcxt_t *cxt, const char *cname) {
5388     return retrieve_hook_common(aTHX_ cxt, cname, FALSE);
5389 }
5390 
5391 /*
5392  * retrieve_ref
5393  *
5394  * Retrieve reference to some other scalar.
5395  * Layout is SX_REF <object>, with SX_REF already read.
5396  */
retrieve_ref(pTHX_ stcxt_t * cxt,const char * cname)5397 static SV *retrieve_ref(pTHX_ stcxt_t *cxt, const char *cname)
5398 {
5399     SV *rv;
5400     SV *sv;
5401     HV *stash;
5402 
5403     TRACEME(("retrieve_ref (#%d)", (int)cxt->tagnum));
5404 
5405     /*
5406      * We need to create the SV that holds the reference to the yet-to-retrieve
5407      * object now, so that we may record the address in the seen table.
5408      * Otherwise, if the object to retrieve references us, we won't be able
5409      * to resolve the SX_OBJECT we'll see at that point! Hence we cannot
5410      * do the retrieve first and use rv = newRV(sv) since it will be too late
5411      * for SEEN() recording.
5412      */
5413 
5414     rv = NEWSV(10002, 0);
5415     if (cname)
5416         stash = gv_stashpv(cname, GV_ADD);
5417     else
5418         stash = 0;
5419     SEEN_NN(rv, stash, 0);	/* Will return if rv is null */
5420     sv = retrieve(aTHX_ cxt, 0);/* Retrieve <object> */
5421     if (!sv)
5422         return (SV *) 0;	/* Failed */
5423 
5424     /*
5425      * WARNING: breaks RV encapsulation.
5426      *
5427      * Now for the tricky part. We have to upgrade our existing SV, so that
5428      * it is now an RV on sv... Again, we cheat by duplicating the code
5429      * held in newSVrv(), since we already got our SV from retrieve().
5430      *
5431      * We don't say:
5432      *
5433      *		SvRV(rv) = SvREFCNT_inc(sv);
5434      *
5435      * here because the reference count we got from retrieve() above is
5436      * already correct: if the object was retrieved from the file, then
5437      * its reference count is one. Otherwise, if it was retrieved via
5438      * an SX_OBJECT indication, a ref count increment was done.
5439      */
5440 
5441     if (cname) {
5442         /* No need to do anything, as rv will already be PVMG.  */
5443         assert (SvTYPE(rv) == SVt_RV || SvTYPE(rv) >= SVt_PV);
5444     } else {
5445         sv_upgrade(rv, SVt_RV);
5446     }
5447 
5448     SvRV_set(rv, sv);		/* $rv = \$sv */
5449     SvROK_on(rv);
5450     /*if (cxt->entry && ++cxt->ref_cnt > MAX_REF_CNT) {
5451         CROAK(("Max. recursion depth with nested refs exceeded"));
5452     }*/
5453 
5454     TRACEME(("ok (retrieve_ref at 0x%" UVxf ")", PTR2UV(rv)));
5455 
5456     return rv;
5457 }
5458 
5459 /*
5460  * retrieve_weakref
5461  *
5462  * Retrieve weak reference to some other scalar.
5463  * Layout is SX_WEAKREF <object>, with SX_WEAKREF already read.
5464  */
retrieve_weakref(pTHX_ stcxt_t * cxt,const char * cname)5465 static SV *retrieve_weakref(pTHX_ stcxt_t *cxt, const char *cname)
5466 {
5467     SV *sv;
5468 
5469     TRACEME(("retrieve_weakref (#%d)", (int)cxt->tagnum));
5470 
5471     sv = retrieve_ref(aTHX_ cxt, cname);
5472     if (sv) {
5473 #ifdef SvWEAKREF
5474         sv_rvweaken(sv);
5475 #else
5476         WEAKREF_CROAK();
5477 #endif
5478     }
5479     return sv;
5480 }
5481 
5482 /*
5483  * retrieve_overloaded
5484  *
5485  * Retrieve reference to some other scalar with overloading.
5486  * Layout is SX_OVERLOAD <object>, with SX_OVERLOAD already read.
5487  */
retrieve_overloaded(pTHX_ stcxt_t * cxt,const char * cname)5488 static SV *retrieve_overloaded(pTHX_ stcxt_t *cxt, const char *cname)
5489 {
5490     SV *rv;
5491     SV *sv;
5492     HV *stash;
5493 
5494     TRACEME(("retrieve_overloaded (#%d)", (int)cxt->tagnum));
5495 
5496     /*
5497      * Same code as retrieve_ref(), duplicated to avoid extra call.
5498      */
5499 
5500     rv = NEWSV(10002, 0);
5501     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5502     SEEN_NN(rv, stash, 0);	     /* Will return if rv is null */
5503     cxt->in_retrieve_overloaded = 1; /* so sv_bless doesn't call S_reset_amagic */
5504     sv = retrieve(aTHX_ cxt, 0);     /* Retrieve <object> */
5505     cxt->in_retrieve_overloaded = 0;
5506     if (!sv)
5507         return (SV *) 0;	/* Failed */
5508 
5509     /*
5510      * WARNING: breaks RV encapsulation.
5511      */
5512 
5513     SvUPGRADE(rv, SVt_RV);
5514     SvRV_set(rv, sv);		/* $rv = \$sv */
5515     SvROK_on(rv);
5516 
5517     /*
5518      * Restore overloading magic.
5519      */
5520 
5521     stash = SvTYPE(sv) ? (HV *) SvSTASH (sv) : 0;
5522     if (!stash) {
5523         CROAK(("Cannot restore overloading on %s(0x%" UVxf
5524                ") (package <unknown>)",
5525                sv_reftype(sv, FALSE),
5526                PTR2UV(sv)));
5527     }
5528     if (!Gv_AMG(stash)) {
5529         const char *package = HvNAME_get(stash);
5530         TRACEME(("No overloading defined for package %s", package));
5531         TRACEME(("Going to load module '%s'", package));
5532         load_module(PERL_LOADMOD_NOIMPORT, newSVpv(package, 0), Nullsv);
5533         if (!Gv_AMG(stash)) {
5534             CROAK(("Cannot restore overloading on %s(0x%" UVxf
5535                    ") (package %s) (even after a \"require %s;\")",
5536                    sv_reftype(sv, FALSE),
5537                    PTR2UV(sv),
5538                    package, package));
5539         }
5540     }
5541 
5542     SvAMAGIC_on(rv);
5543 
5544     TRACEME(("ok (retrieve_overloaded at 0x%" UVxf ")", PTR2UV(rv)));
5545 
5546     return rv;
5547 }
5548 
5549 /*
5550  * retrieve_weakoverloaded
5551  *
5552  * Retrieve weak overloaded reference to some other scalar.
5553  * Layout is SX_WEAKOVERLOADED <object>, with SX_WEAKOVERLOADED already read.
5554  */
retrieve_weakoverloaded(pTHX_ stcxt_t * cxt,const char * cname)5555 static SV *retrieve_weakoverloaded(pTHX_ stcxt_t *cxt, const char *cname)
5556 {
5557     SV *sv;
5558 
5559     TRACEME(("retrieve_weakoverloaded (#%d)", (int)cxt->tagnum));
5560 
5561     sv = retrieve_overloaded(aTHX_ cxt, cname);
5562     if (sv) {
5563 #ifdef SvWEAKREF
5564         sv_rvweaken(sv);
5565 #else
5566         WEAKREF_CROAK();
5567 #endif
5568     }
5569     return sv;
5570 }
5571 
5572 /*
5573  * retrieve_tied_array
5574  *
5575  * Retrieve tied array
5576  * Layout is SX_TIED_ARRAY <object>, with SX_TIED_ARRAY already read.
5577  */
retrieve_tied_array(pTHX_ stcxt_t * cxt,const char * cname)5578 static SV *retrieve_tied_array(pTHX_ stcxt_t *cxt, const char *cname)
5579 {
5580     SV *tv;
5581     SV *sv;
5582     HV *stash;
5583 
5584     TRACEME(("retrieve_tied_array (#%d)", (int)cxt->tagnum));
5585 
5586     if (!(cxt->flags & FLAG_TIE_OK)) {
5587         CROAK(("Tying is disabled."));
5588     }
5589 
5590     tv = NEWSV(10002, 0);
5591     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5592     SEEN_NN(tv, stash, 0);		/* Will return if tv is null */
5593     sv = retrieve(aTHX_ cxt, 0);	/* Retrieve <object> */
5594     if (!sv)
5595         return (SV *) 0;		/* Failed */
5596 
5597     sv_upgrade(tv, SVt_PVAV);
5598     sv_magic(tv, sv, 'P', (char *)NULL, 0);
5599     SvREFCNT_dec(sv);			/* Undo refcnt inc from sv_magic() */
5600 
5601     TRACEME(("ok (retrieve_tied_array at 0x%" UVxf ")", PTR2UV(tv)));
5602 
5603     return tv;
5604 }
5605 
5606 /*
5607  * retrieve_tied_hash
5608  *
5609  * Retrieve tied hash
5610  * Layout is SX_TIED_HASH <object>, with SX_TIED_HASH already read.
5611  */
retrieve_tied_hash(pTHX_ stcxt_t * cxt,const char * cname)5612 static SV *retrieve_tied_hash(pTHX_ stcxt_t *cxt, const char *cname)
5613 {
5614     SV *tv;
5615     SV *sv;
5616     HV *stash;
5617 
5618     TRACEME(("retrieve_tied_hash (#%d)", (int)cxt->tagnum));
5619 
5620     if (!(cxt->flags & FLAG_TIE_OK)) {
5621         CROAK(("Tying is disabled."));
5622     }
5623 
5624     tv = NEWSV(10002, 0);
5625     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5626     SEEN_NN(tv, stash, 0);		/* Will return if tv is null */
5627     sv = retrieve(aTHX_ cxt, 0);	/* Retrieve <object> */
5628     if (!sv)
5629         return (SV *) 0;		/* Failed */
5630 
5631     sv_upgrade(tv, SVt_PVHV);
5632     sv_magic(tv, sv, 'P', (char *)NULL, 0);
5633     SvREFCNT_dec(sv);			/* Undo refcnt inc from sv_magic() */
5634 
5635     TRACEME(("ok (retrieve_tied_hash at 0x%" UVxf ")", PTR2UV(tv)));
5636 
5637     return tv;
5638 }
5639 
5640 /*
5641  * retrieve_tied_scalar
5642  *
5643  * Retrieve tied scalar
5644  * Layout is SX_TIED_SCALAR <object>, with SX_TIED_SCALAR already read.
5645  */
retrieve_tied_scalar(pTHX_ stcxt_t * cxt,const char * cname)5646 static SV *retrieve_tied_scalar(pTHX_ stcxt_t *cxt, const char *cname)
5647 {
5648     SV *tv;
5649     SV *sv, *obj = NULL;
5650     HV *stash;
5651 
5652     TRACEME(("retrieve_tied_scalar (#%d)", (int)cxt->tagnum));
5653 
5654     if (!(cxt->flags & FLAG_TIE_OK)) {
5655         CROAK(("Tying is disabled."));
5656     }
5657 
5658     tv = NEWSV(10002, 0);
5659     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5660     SEEN_NN(tv, stash, 0);		/* Will return if rv is null */
5661     sv = retrieve(aTHX_ cxt, 0);	/* Retrieve <object> */
5662     if (!sv) {
5663         return (SV *) 0;		/* Failed */
5664     }
5665     else if (SvTYPE(sv) != SVt_NULL) {
5666         obj = sv;
5667     }
5668 
5669     sv_upgrade(tv, SVt_PVMG);
5670     sv_magic(tv, obj, 'q', (char *)NULL, 0);
5671 
5672     if (obj) {
5673         /* Undo refcnt inc from sv_magic() */
5674         SvREFCNT_dec(obj);
5675     }
5676 
5677     TRACEME(("ok (retrieve_tied_scalar at 0x%" UVxf ")", PTR2UV(tv)));
5678 
5679     return tv;
5680 }
5681 
5682 /*
5683  * retrieve_tied_key
5684  *
5685  * Retrieve reference to value in a tied hash.
5686  * Layout is SX_TIED_KEY <object> <key>, with SX_TIED_KEY already read.
5687  */
retrieve_tied_key(pTHX_ stcxt_t * cxt,const char * cname)5688 static SV *retrieve_tied_key(pTHX_ stcxt_t *cxt, const char *cname)
5689 {
5690     SV *tv;
5691     SV *sv;
5692     SV *key;
5693     HV *stash;
5694 
5695     TRACEME(("retrieve_tied_key (#%d)", (int)cxt->tagnum));
5696 
5697     if (!(cxt->flags & FLAG_TIE_OK)) {
5698         CROAK(("Tying is disabled."));
5699     }
5700 
5701     tv = NEWSV(10002, 0);
5702     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5703     SEEN_NN(tv, stash, 0);		/* Will return if tv is null */
5704     sv = retrieve(aTHX_ cxt, 0);	/* Retrieve <object> */
5705     if (!sv)
5706         return (SV *) 0;		/* Failed */
5707 
5708     key = retrieve(aTHX_ cxt, 0);	/* Retrieve <key> */
5709     if (!key)
5710         return (SV *) 0;		/* Failed */
5711 
5712     sv_upgrade(tv, SVt_PVMG);
5713     sv_magic(tv, sv, 'p', (char *)key, HEf_SVKEY);
5714     SvREFCNT_dec(key);			/* Undo refcnt inc from sv_magic() */
5715     SvREFCNT_dec(sv);			/* Undo refcnt inc from sv_magic() */
5716 
5717     return tv;
5718 }
5719 
5720 /*
5721  * retrieve_tied_idx
5722  *
5723  * Retrieve reference to value in a tied array.
5724  * Layout is SX_TIED_IDX <object> <idx>, with SX_TIED_IDX already read.
5725  */
retrieve_tied_idx(pTHX_ stcxt_t * cxt,const char * cname)5726 static SV *retrieve_tied_idx(pTHX_ stcxt_t *cxt, const char *cname)
5727 {
5728     SV *tv;
5729     SV *sv;
5730     HV *stash;
5731     I32 idx;
5732 
5733     TRACEME(("retrieve_tied_idx (#%d)", (int)cxt->tagnum));
5734 
5735     if (!(cxt->flags & FLAG_TIE_OK)) {
5736         CROAK(("Tying is disabled."));
5737     }
5738 
5739     tv = NEWSV(10002, 0);
5740     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5741     SEEN_NN(tv, stash, 0);		/* Will return if tv is null */
5742     sv = retrieve(aTHX_ cxt, 0);	/* Retrieve <object> */
5743     if (!sv)
5744         return (SV *) 0;		/* Failed */
5745 
5746     RLEN(idx);				/* Retrieve <idx> */
5747 
5748     sv_upgrade(tv, SVt_PVMG);
5749     sv_magic(tv, sv, 'p', (char *)NULL, idx);
5750     SvREFCNT_dec(sv);			/* Undo refcnt inc from sv_magic() */
5751 
5752     return tv;
5753 }
5754 
5755 /*
5756  * get_lstring
5757  *
5758  * Helper to read a string
5759  */
get_lstring(pTHX_ stcxt_t * cxt,UV len,int isutf8,const char * cname)5760 static SV *get_lstring(pTHX_ stcxt_t *cxt, UV len, int isutf8, const char *cname)
5761 {
5762     SV *sv;
5763     HV *stash;
5764 
5765     TRACEME(("get_lstring (#%d), len = %" UVuf, (int)cxt->tagnum, len));
5766 
5767     /*
5768      * Allocate an empty scalar of the suitable length.
5769      */
5770 
5771     sv = NEWSV(10002, len);
5772     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5773     SEEN_NN(sv, stash, 0);	/* Associate this new scalar with tag "tagnum" */
5774 
5775     if (len ==  0) {
5776         SvPVCLEAR(sv);
5777         return sv;
5778     }
5779 
5780     /*
5781      * WARNING: duplicates parts of sv_setpv and breaks SV data encapsulation.
5782      *
5783      * Now, for efficiency reasons, read data directly inside the SV buffer,
5784      * and perform the SV final settings directly by duplicating the final
5785      * work done by sv_setpv. Since we're going to allocate lots of scalars
5786      * this way, it's worth the hassle and risk.
5787      */
5788 
5789     SAFEREAD(SvPVX(sv), len, sv);
5790     SvCUR_set(sv, len);			/* Record C string length */
5791     *SvEND(sv) = '\0';			/* Ensure it's null terminated anyway */
5792     (void) SvPOK_only(sv);		/* Validate string pointer */
5793     if (cxt->s_tainted)			/* Is input source tainted? */
5794         SvTAINT(sv);			/* External data cannot be trusted */
5795 
5796     /* Check for CVE-215-1592 */
5797     if (cname && len == 13 && strEQc(cname, "CGITempFile")
5798         && strEQc(SvPVX(sv), "mt-config.cgi")) {
5799 #if defined(USE_CPERL) && defined(WARN_SECURITY)
5800         Perl_warn_security(aTHX_
5801             "Movable-Type CVE-2015-1592 Storable metasploit attack");
5802 #else
5803         Perl_warn(aTHX_
5804             "SECURITY: Movable-Type CVE-2015-1592 Storable metasploit attack");
5805 #endif
5806     }
5807 
5808     if (isutf8) {
5809         TRACEME(("large utf8 string len %" UVuf " '%s'", len,
5810                  len >= 2048 ? "<string too long>" : SvPVX(sv)));
5811 #ifdef HAS_UTF8_SCALARS
5812         SvUTF8_on(sv);
5813 #else
5814         if (cxt->use_bytes < 0)
5815             cxt->use_bytes
5816                 = (SvTRUE(get_sv("Storable::drop_utf8", GV_ADD))
5817                    ? 1 : 0);
5818         if (cxt->use_bytes == 0)
5819             UTF8_CROAK();
5820 #endif
5821     } else {
5822         TRACEME(("large string len %" UVuf " '%s'", len,
5823                  len >= 2048 ? "<string too long>" : SvPVX(sv)));
5824     }
5825     TRACEME(("ok (get_lstring at 0x%" UVxf ")", PTR2UV(sv)));
5826 
5827     return sv;
5828 }
5829 
5830 /*
5831  * retrieve_lscalar
5832  *
5833  * Retrieve defined long (string) scalar.
5834  *
5835  * Layout is SX_LSCALAR <length> <data>, with SX_LSCALAR already read.
5836  * The scalar is "long" in that <length> is larger than LG_SCALAR so it
5837  * was not stored on a single byte, but in 4 bytes. For strings longer than
5838  * 4 byte (>2GB) see retrieve_lobject.
5839  */
retrieve_lscalar(pTHX_ stcxt_t * cxt,const char * cname)5840 static SV *retrieve_lscalar(pTHX_ stcxt_t *cxt, const char *cname)
5841 {
5842     U32 len;
5843     RLEN(len);
5844     return get_lstring(aTHX_ cxt, len, 0, cname);
5845 }
5846 
5847 /*
5848  * retrieve_scalar
5849  *
5850  * Retrieve defined short (string) scalar.
5851  *
5852  * Layout is SX_SCALAR <length> <data>, with SX_SCALAR already read.
5853  * The scalar is "short" so <length> is single byte. If it is 0, there
5854  * is no <data> section.
5855  */
retrieve_scalar(pTHX_ stcxt_t * cxt,const char * cname)5856 static SV *retrieve_scalar(pTHX_ stcxt_t *cxt, const char *cname)
5857 {
5858     int len;
5859     /*SV *sv;
5860       HV *stash;*/
5861 
5862     GETMARK(len);
5863     TRACEME(("retrieve_scalar (#%d), len = %d", (int)cxt->tagnum, len));
5864     return get_lstring(aTHX_ cxt, (UV)len, 0, cname);
5865 }
5866 
5867 /*
5868  * retrieve_utf8str
5869  *
5870  * Like retrieve_scalar(), but tag result as utf8.
5871  * If we're retrieving UTF8 data in a non-UTF8 perl, croaks.
5872  */
retrieve_utf8str(pTHX_ stcxt_t * cxt,const char * cname)5873 static SV *retrieve_utf8str(pTHX_ stcxt_t *cxt, const char *cname)
5874 {
5875     int len;
5876     /*SV *sv;*/
5877 
5878     TRACEME(("retrieve_utf8str"));
5879     GETMARK(len);
5880     return get_lstring(aTHX_ cxt, (UV)len, 1, cname);
5881 }
5882 
5883 /*
5884  * retrieve_lutf8str
5885  *
5886  * Like retrieve_lscalar(), but tag result as utf8.
5887  * If we're retrieving UTF8 data in a non-UTF8 perl, croaks.
5888  */
retrieve_lutf8str(pTHX_ stcxt_t * cxt,const char * cname)5889 static SV *retrieve_lutf8str(pTHX_ stcxt_t *cxt, const char *cname)
5890 {
5891     U32 len;
5892 
5893     TRACEME(("retrieve_lutf8str"));
5894 
5895     RLEN(len);
5896     return get_lstring(aTHX_ cxt, (UV)len, 1, cname);
5897 }
5898 
5899 /*
5900  * retrieve_vstring
5901  *
5902  * Retrieve a vstring, and then retrieve the stringy scalar following it,
5903  * attaching the vstring to the scalar via magic.
5904  * If we're retrieving a vstring in a perl without vstring magic, croaks.
5905  *
5906  * The vstring layout mirrors an SX_SCALAR string:
5907  * SX_VSTRING <length> <data> with SX_VSTRING already read.
5908  */
retrieve_vstring(pTHX_ stcxt_t * cxt,const char * cname)5909 static SV *retrieve_vstring(pTHX_ stcxt_t *cxt, const char *cname)
5910 {
5911 #ifdef SvVOK
5912     char s[256];
5913     int len;
5914     SV *sv;
5915 
5916     GETMARK(len);
5917     TRACEME(("retrieve_vstring (#%d), len = %d", (int)cxt->tagnum, len));
5918 
5919     READ(s, len);
5920     sv = retrieve(aTHX_ cxt, cname);
5921     if (!sv)
5922         return (SV *) 0;		/* Failed */
5923     sv_magic(sv,NULL,PERL_MAGIC_vstring,s,len);
5924     /* 5.10.0 and earlier seem to need this */
5925     SvRMAGICAL_on(sv);
5926 
5927     TRACEME(("ok (retrieve_vstring at 0x%" UVxf ")", PTR2UV(sv)));
5928     return sv;
5929 #else
5930     VSTRING_CROAK();
5931     return Nullsv;
5932 #endif
5933 }
5934 
5935 /*
5936  * retrieve_lvstring
5937  *
5938  * Like retrieve_vstring, but for longer vstrings.
5939  */
retrieve_lvstring(pTHX_ stcxt_t * cxt,const char * cname)5940 static SV *retrieve_lvstring(pTHX_ stcxt_t *cxt, const char *cname)
5941 {
5942 #ifdef SvVOK
5943     char *s;
5944     U32 len;
5945     SV *sv;
5946 
5947     RLEN(len);
5948     TRACEME(("retrieve_lvstring (#%d), len = %" UVuf,
5949              (int)cxt->tagnum, (UV)len));
5950 
5951     /* Since we'll no longer produce such large vstrings, reject them
5952        here too.
5953     */
5954     if (len >= I32_MAX) {
5955         CROAK(("vstring too large to fetch"));
5956     }
5957 
5958     New(10003, s, len+1, char);
5959     SAFEPVREAD(s, (I32)len, s);
5960 
5961     sv = retrieve(aTHX_ cxt, cname);
5962     if (!sv) {
5963         Safefree(s);
5964         return (SV *) 0;		/* Failed */
5965     }
5966     sv_magic(sv,NULL,PERL_MAGIC_vstring,s,len);
5967     /* 5.10.0 and earlier seem to need this */
5968     SvRMAGICAL_on(sv);
5969 
5970     Safefree(s);
5971 
5972     TRACEME(("ok (retrieve_lvstring at 0x%" UVxf ")", PTR2UV(sv)));
5973     return sv;
5974 #else
5975     VSTRING_CROAK();
5976     return Nullsv;
5977 #endif
5978 }
5979 
5980 /*
5981  * retrieve_integer
5982  *
5983  * Retrieve defined integer.
5984  * Layout is SX_INTEGER <data>, whith SX_INTEGER already read.
5985  */
retrieve_integer(pTHX_ stcxt_t * cxt,const char * cname)5986 static SV *retrieve_integer(pTHX_ stcxt_t *cxt, const char *cname)
5987 {
5988     SV *sv;
5989     HV *stash;
5990     IV iv;
5991 
5992     TRACEME(("retrieve_integer (#%d)", (int)cxt->tagnum));
5993 
5994     READ(&iv, sizeof(iv));
5995     sv = newSViv(iv);
5996     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5997     SEEN_NN(sv, stash, 0);	/* Associate this new scalar with tag "tagnum" */
5998 
5999     TRACEME(("integer %" IVdf, iv));
6000     TRACEME(("ok (retrieve_integer at 0x%" UVxf ")", PTR2UV(sv)));
6001 
6002     return sv;
6003 }
6004 
6005 /*
6006  * retrieve_lobject
6007  *
6008  * Retrieve overlong scalar, array or hash.
6009  * Layout is SX_LOBJECT type U64_len ...
6010  */
retrieve_lobject(pTHX_ stcxt_t * cxt,const char * cname)6011 static SV *retrieve_lobject(pTHX_ stcxt_t *cxt, const char *cname)
6012 {
6013     int type;
6014 #ifdef HAS_U64
6015     UV  len;
6016     SV *sv;
6017     int hash_flags = 0;
6018 #endif
6019 
6020     TRACEME(("retrieve_lobject (#%d)", (int)cxt->tagnum));
6021 
6022     GETMARK(type);
6023     TRACEME(("object type %d", type));
6024 #ifdef HAS_U64
6025 
6026     if (type == SX_FLAG_HASH) {
6027 	/* we write the flags immediately after the op.  I could have
6028 	   changed the writer, but this may allow someone to recover
6029 	   data they're already frozen, though such a very large hash
6030 	   seems unlikely.
6031 	*/
6032 	GETMARK(hash_flags);
6033     }
6034     else if (type == SX_HOOK) {
6035         return retrieve_hook_common(aTHX_ cxt, cname, TRUE);
6036     }
6037 
6038     READ_U64(len);
6039     TRACEME(("wlen %" UVuf, len));
6040     switch (type) {
6041     case SX_OBJECT:
6042         {
6043             /* not a large object, just a large index */
6044             SV **svh = av_fetch(cxt->aseen, len, FALSE);
6045             if (!svh)
6046                 CROAK(("Object #%" UVuf " should have been retrieved already",
6047                       len));
6048             sv = *svh;
6049             TRACEME(("had retrieved #%" UVuf " at 0x%" UVxf, len, PTR2UV(sv)));
6050             SvREFCNT_inc(sv);
6051         }
6052         break;
6053     case SX_LSCALAR:
6054         sv = get_lstring(aTHX_ cxt, len, 0, cname);
6055         break;
6056     case SX_LUTF8STR:
6057         sv = get_lstring(aTHX_ cxt, len, 1, cname);
6058         break;
6059     case SX_ARRAY:
6060         sv = get_larray(aTHX_ cxt, len, cname);
6061         break;
6062     /* <5.12 you could store larger hashes, but cannot iterate over them.
6063        So we reject them, it's a bug. */
6064     case SX_FLAG_HASH:
6065         sv = get_lhash(aTHX_ cxt, len, hash_flags, cname);
6066         break;
6067     case SX_HASH:
6068         sv = get_lhash(aTHX_ cxt, len, 0, cname);
6069         break;
6070     default:
6071         CROAK(("Unexpected type %d in retrieve_lobject\n", type));
6072     }
6073 
6074     TRACEME(("ok (retrieve_lobject at 0x%" UVxf ")", PTR2UV(sv)));
6075     return sv;
6076 #else
6077     PERL_UNUSED_ARG(cname);
6078 
6079     /* previously this (brokenly) checked the length value and only failed if
6080        the length was over 4G.
6081        Since this op should only occur with objects over 4GB (or 2GB) we can just
6082        reject it.
6083     */
6084     CROAK(("Invalid large object op for this 32bit system"));
6085 #endif
6086 }
6087 
6088 /*
6089  * retrieve_netint
6090  *
6091  * Retrieve defined integer in network order.
6092  * Layout is SX_NETINT <data>, whith SX_NETINT already read.
6093  */
retrieve_netint(pTHX_ stcxt_t * cxt,const char * cname)6094 static SV *retrieve_netint(pTHX_ stcxt_t *cxt, const char *cname)
6095 {
6096     SV *sv;
6097     HV *stash;
6098     I32 iv;
6099 
6100     TRACEME(("retrieve_netint (#%d)", (int)cxt->tagnum));
6101 
6102     READ_I32(iv);
6103 #ifdef HAS_NTOHL
6104     sv = newSViv((int) ntohl(iv));
6105     TRACEME(("network integer %d", (int) ntohl(iv)));
6106 #else
6107     sv = newSViv(iv);
6108     TRACEME(("network integer (as-is) %d", iv));
6109 #endif
6110     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6111     SEEN_NN(sv, stash, 0);	/* Associate this new scalar with tag "tagnum" */
6112 
6113     TRACEME(("ok (retrieve_netint at 0x%" UVxf ")", PTR2UV(sv)));
6114 
6115     return sv;
6116 }
6117 
6118 /*
6119  * retrieve_double
6120  *
6121  * Retrieve defined double.
6122  * Layout is SX_DOUBLE <data>, whith SX_DOUBLE already read.
6123  */
retrieve_double(pTHX_ stcxt_t * cxt,const char * cname)6124 static SV *retrieve_double(pTHX_ stcxt_t *cxt, const char *cname)
6125 {
6126     SV *sv;
6127     HV *stash;
6128     NV nv;
6129 
6130     TRACEME(("retrieve_double (#%d)", (int)cxt->tagnum));
6131 
6132     READ(&nv, sizeof(nv));
6133     sv = newSVnv(nv);
6134     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6135     SEEN_NN(sv, stash, 0);	/* Associate this new scalar with tag "tagnum" */
6136 
6137     TRACEME(("double %" NVff, nv));
6138     TRACEME(("ok (retrieve_double at 0x%" UVxf ")", PTR2UV(sv)));
6139 
6140     return sv;
6141 }
6142 
6143 /*
6144  * retrieve_byte
6145  *
6146  * Retrieve defined byte (small integer within the [-128, +127] range).
6147  * Layout is SX_BYTE <data>, whith SX_BYTE already read.
6148  */
retrieve_byte(pTHX_ stcxt_t * cxt,const char * cname)6149 static SV *retrieve_byte(pTHX_ stcxt_t *cxt, const char *cname)
6150 {
6151     SV *sv;
6152     HV *stash;
6153     int siv;
6154 #ifdef _MSC_VER
6155     /* MSVC 2017 doesn't handle the AIX workaround well */
6156     int tmp;
6157 #else
6158     signed char tmp;	/* Workaround for AIX cc bug --H.Merijn Brand */
6159 #endif
6160 
6161     TRACEME(("retrieve_byte (#%d)", (int)cxt->tagnum));
6162 
6163     GETMARK(siv);
6164     TRACEME(("small integer read as %d", (unsigned char) siv));
6165     tmp = (unsigned char) siv - 128;
6166     sv = newSViv(tmp);
6167     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6168     SEEN_NN(sv, stash, 0);	/* Associate this new scalar with tag "tagnum" */
6169 
6170     TRACEME(("byte %d", tmp));
6171     TRACEME(("ok (retrieve_byte at 0x%" UVxf ")", PTR2UV(sv)));
6172 
6173     return sv;
6174 }
6175 
6176 /*
6177  * retrieve_undef
6178  *
6179  * Return the undefined value.
6180  */
retrieve_undef(pTHX_ stcxt_t * cxt,const char * cname)6181 static SV *retrieve_undef(pTHX_ stcxt_t *cxt, const char *cname)
6182 {
6183     SV *sv;
6184     HV *stash;
6185 
6186     TRACEME(("retrieve_undef"));
6187 
6188     sv = newSV(0);
6189     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6190     SEEN_NN(sv, stash, 0);
6191 
6192     return sv;
6193 }
6194 
6195 /*
6196  * retrieve_sv_undef
6197  *
6198  * Return the immortal undefined value.
6199  */
retrieve_sv_undef(pTHX_ stcxt_t * cxt,const char * cname)6200 static SV *retrieve_sv_undef(pTHX_ stcxt_t *cxt, const char *cname)
6201 {
6202     SV *sv = &PL_sv_undef;
6203     HV *stash;
6204 
6205     TRACEME(("retrieve_sv_undef"));
6206 
6207     /* Special case PL_sv_undef, as av_fetch uses it internally to mark
6208        deleted elements, and will return NULL (fetch failed) whenever it
6209        is fetched.  */
6210     if (cxt->where_is_undef == UNSET_NTAG_T) {
6211         cxt->where_is_undef = cxt->tagnum;
6212     }
6213     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6214     SEEN_NN(sv, stash, 1);
6215     return sv;
6216 }
6217 
6218 /*
6219  * retrieve_sv_yes
6220  *
6221  * Return the immortal yes value.
6222  */
retrieve_sv_yes(pTHX_ stcxt_t * cxt,const char * cname)6223 static SV *retrieve_sv_yes(pTHX_ stcxt_t *cxt, const char *cname)
6224 {
6225     SV *sv = &PL_sv_yes;
6226     HV *stash;
6227 
6228     TRACEME(("retrieve_sv_yes"));
6229 
6230     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6231     SEEN_NN(sv, stash, 1);
6232     return sv;
6233 }
6234 
6235 /*
6236  * retrieve_sv_no
6237  *
6238  * Return the immortal no value.
6239  */
retrieve_sv_no(pTHX_ stcxt_t * cxt,const char * cname)6240 static SV *retrieve_sv_no(pTHX_ stcxt_t *cxt, const char *cname)
6241 {
6242     SV *sv = &PL_sv_no;
6243     HV *stash;
6244 
6245     TRACEME(("retrieve_sv_no"));
6246 
6247     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6248     SEEN_NN(sv, stash, 1);
6249     return sv;
6250 }
6251 
6252 /*
6253  * retrieve_svundef_elem
6254  *
6255  * Return &PL_sv_placeholder, representing &PL_sv_undef in an array.  This
6256  * is a bit of a hack, but we already use SX_SV_UNDEF to mean a nonexistent
6257  * element, for historical reasons.
6258  */
retrieve_svundef_elem(pTHX_ stcxt_t * cxt,const char * cname)6259 static SV *retrieve_svundef_elem(pTHX_ stcxt_t *cxt, const char *cname)
6260 {
6261     TRACEME(("retrieve_svundef_elem"));
6262 
6263     /* SEEN reads the contents of its SV argument, which we are not
6264        supposed to do with &PL_sv_placeholder. */
6265     SEEN_NN(&PL_sv_undef, cname, 1);
6266 
6267     return &PL_sv_placeholder;
6268 }
6269 
6270 /*
6271  * retrieve_array
6272  *
6273  * Retrieve a whole array.
6274  * Layout is SX_ARRAY <size> followed by each item, in increasing index order.
6275  * Each item is stored as <object>.
6276  *
6277  * When we come here, SX_ARRAY has been read already.
6278  */
retrieve_array(pTHX_ stcxt_t * cxt,const char * cname)6279 static SV *retrieve_array(pTHX_ stcxt_t *cxt, const char *cname)
6280 {
6281     I32 len, i;
6282     AV *av;
6283     SV *sv;
6284     HV *stash;
6285     bool seen_null = FALSE;
6286 
6287     TRACEME(("retrieve_array (#%d)", (int)cxt->tagnum));
6288 
6289     /*
6290      * Read length, and allocate array, then pre-extend it.
6291      */
6292 
6293     RLEN(len);
6294     TRACEME(("size = %d", (int)len));
6295     av = newAV();
6296     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6297     SEEN_NN(av, stash, 0); /* Will return if array not allocated nicely */
6298     if (len)
6299         av_extend(av, len);
6300     else
6301         return (SV *) av;	/* No data follow if array is empty */
6302 
6303     /*
6304      * Now get each item in turn...
6305      */
6306 
6307     for (i = 0; i < len; i++) {
6308         TRACEME(("(#%d) item", (int)i));
6309         sv = retrieve(aTHX_ cxt, 0);	/* Retrieve item */
6310         if (!sv)
6311             return (SV *) 0;
6312         if (sv == &PL_sv_undef) {
6313             seen_null = TRUE;
6314             continue;
6315         }
6316         if (sv == &PL_sv_placeholder)
6317             sv = &PL_sv_undef;
6318         if (av_store(av, i, sv) == 0)
6319             return (SV *) 0;
6320     }
6321     if (seen_null) av_fill(av, len-1);
6322 
6323     TRACEME(("ok (retrieve_array at 0x%" UVxf ")", PTR2UV(av)));
6324 
6325     return (SV *) av;
6326 }
6327 
6328 #ifdef HAS_U64
6329 
6330 /* internal method with len already read */
6331 
get_larray(pTHX_ stcxt_t * cxt,UV len,const char * cname)6332 static SV *get_larray(pTHX_ stcxt_t *cxt, UV len, const char *cname)
6333 {
6334     UV i;
6335     AV *av;
6336     SV *sv;
6337     HV *stash;
6338     bool seen_null = FALSE;
6339 
6340     TRACEME(("get_larray (#%d) %lu", (int)cxt->tagnum, (unsigned long)len));
6341 
6342     /*
6343      * allocate array, then pre-extend it.
6344      */
6345 
6346     av = newAV();
6347     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6348     SEEN_NN(av, stash, 0); /* Will return if array not allocated nicely */
6349     assert(len);
6350     av_extend(av, len);
6351 
6352     /*
6353      * Now get each item in turn...
6354      */
6355 
6356     for (i = 0; i < len; i++) {
6357         TRACEME(("(#%d) item", (int)i));
6358         sv = retrieve(aTHX_ cxt, 0);		/* Retrieve item */
6359         if (!sv)
6360             return (SV *) 0;
6361         if (sv == &PL_sv_undef) {
6362             seen_null = TRUE;
6363             continue;
6364         }
6365         if (sv == &PL_sv_placeholder)
6366             sv = &PL_sv_undef;
6367         if (av_store(av, i, sv) == 0)
6368             return (SV *) 0;
6369     }
6370     if (seen_null) av_fill(av, len-1);
6371 
6372     TRACEME(("ok (get_larray at 0x%" UVxf ")", PTR2UV(av)));
6373 
6374     return (SV *) av;
6375 }
6376 
6377 /*
6378  * get_lhash
6379  *
6380  * Retrieve a overlong hash table.
6381  * <len> is already read. What follows is each key/value pair, in random order.
6382  * Keys are stored as <length> <data>, the <data> section being omitted
6383  * if length is 0.
6384  * Values are stored as <object>.
6385  *
6386  */
get_lhash(pTHX_ stcxt_t * cxt,UV len,int hash_flags,const char * cname)6387 static SV *get_lhash(pTHX_ stcxt_t *cxt, UV len, int hash_flags, const char *cname)
6388 {
6389     UV size;
6390     UV i;
6391     HV *hv;
6392     SV *sv;
6393     HV *stash;
6394 
6395     TRACEME(("get_lhash (#%d)", (int)cxt->tagnum));
6396 
6397 #ifdef HAS_RESTRICTED_HASHES
6398     PERL_UNUSED_ARG(hash_flags);
6399 #else
6400     if (hash_flags & SHV_RESTRICTED) {
6401         if (cxt->derestrict < 0)
6402             cxt->derestrict = (SvTRUE
6403                 (get_sv("Storable::downgrade_restricted", GV_ADD))
6404                                ? 1 : 0);
6405         if (cxt->derestrict == 0)
6406             RESTRICTED_HASH_CROAK();
6407     }
6408 #endif
6409 
6410     TRACEME(("size = %lu", (unsigned long)len));
6411     hv = newHV();
6412     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6413     SEEN_NN(hv, stash, 0);	/* Will return if table not allocated properly */
6414     if (len == 0)
6415         return (SV *) hv;	/* No data follow if table empty */
6416     TRACEME(("split %lu", (unsigned long)len+1));
6417     hv_ksplit(hv, len+1);	/* pre-extend hash to save multiple splits */
6418 
6419     /*
6420      * Now get each key/value pair in turn...
6421      */
6422 
6423     for (i = 0; i < len; i++) {
6424         /*
6425          * Get value first.
6426          */
6427 
6428         TRACEME(("(#%d) value", (int)i));
6429         sv = retrieve(aTHX_ cxt, 0);
6430         if (!sv)
6431             return (SV *) 0;
6432 
6433         /*
6434          * Get key.
6435          * Since we're reading into kbuf, we must ensure we're not
6436          * recursing between the read and the hv_store() where it's used.
6437          * Hence the key comes after the value.
6438          */
6439 
6440         RLEN(size);		/* Get key size */
6441         KBUFCHK((STRLEN)size);	/* Grow hash key read pool if needed */
6442         if (size)
6443             READ(kbuf, size);
6444         kbuf[size] = '\0';	/* Mark string end, just in case */
6445         TRACEME(("(#%d) key '%s'", (int)i, kbuf));
6446 
6447         /*
6448          * Enter key/value pair into hash table.
6449          */
6450 
6451         if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0)
6452             return (SV *) 0;
6453     }
6454 
6455     TRACEME(("ok (get_lhash at 0x%" UVxf ")", PTR2UV(hv)));
6456     return (SV *) hv;
6457 }
6458 #endif
6459 
6460 /*
6461  * retrieve_hash
6462  *
6463  * Retrieve a whole hash table.
6464  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
6465  * Keys are stored as <length> <data>, the <data> section being omitted
6466  * if length is 0.
6467  * Values are stored as <object>.
6468  *
6469  * When we come here, SX_HASH has been read already.
6470  */
retrieve_hash(pTHX_ stcxt_t * cxt,const char * cname)6471 static SV *retrieve_hash(pTHX_ stcxt_t *cxt, const char *cname)
6472 {
6473     I32 len;
6474     I32 size;
6475     I32 i;
6476     HV *hv;
6477     SV *sv;
6478     HV *stash;
6479 
6480     TRACEME(("retrieve_hash (#%d)", (int)cxt->tagnum));
6481 
6482     /*
6483      * Read length, allocate table.
6484      */
6485 
6486     RLEN(len);
6487     TRACEME(("size = %d", (int)len));
6488     hv = newHV();
6489     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6490     SEEN_NN(hv, stash, 0);	/* Will return if table not allocated properly */
6491     if (len == 0)
6492         return (SV *) hv;	/* No data follow if table empty */
6493     TRACEME(("split %d", (int)len+1));
6494     hv_ksplit(hv, len+1);	/* pre-extend hash to save multiple splits */
6495 
6496     /*
6497      * Now get each key/value pair in turn...
6498      */
6499 
6500     for (i = 0; i < len; i++) {
6501         /*
6502          * Get value first.
6503          */
6504 
6505         TRACEME(("(#%d) value", (int)i));
6506         sv = retrieve(aTHX_ cxt, 0);
6507         if (!sv)
6508             return (SV *) 0;
6509 
6510         /*
6511          * Get key.
6512          * Since we're reading into kbuf, we must ensure we're not
6513          * recursing between the read and the hv_store() where it's used.
6514          * Hence the key comes after the value.
6515          */
6516 
6517         RLEN(size);		/* Get key size */
6518         KBUFCHK((STRLEN)size);	/* Grow hash key read pool if needed */
6519         if (size)
6520             READ(kbuf, size);
6521         kbuf[size] = '\0';	/* Mark string end, just in case */
6522         TRACEME(("(#%d) key '%s'", (int)i, kbuf));
6523 
6524         /*
6525          * Enter key/value pair into hash table.
6526          */
6527 
6528         if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0)
6529             return (SV *) 0;
6530     }
6531 
6532     TRACEME(("ok (retrieve_hash at 0x%" UVxf ")", PTR2UV(hv)));
6533 
6534     return (SV *) hv;
6535 }
6536 
6537 /*
6538  * retrieve_hash
6539  *
6540  * Retrieve a whole hash table.
6541  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
6542  * Keys are stored as <length> <data>, the <data> section being omitted
6543  * if length is 0.
6544  * Values are stored as <object>.
6545  *
6546  * When we come here, SX_HASH has been read already.
6547  */
retrieve_flag_hash(pTHX_ stcxt_t * cxt,const char * cname)6548 static SV *retrieve_flag_hash(pTHX_ stcxt_t *cxt, const char *cname)
6549 {
6550     dVAR;
6551     I32 len;
6552     I32 size;
6553     I32 i;
6554     HV *hv;
6555     SV *sv;
6556     HV *stash;
6557     int hash_flags;
6558 
6559     GETMARK(hash_flags);
6560     TRACEME(("retrieve_flag_hash (#%d)", (int)cxt->tagnum));
6561     /*
6562      * Read length, allocate table.
6563      */
6564 
6565 #ifndef HAS_RESTRICTED_HASHES
6566     if (hash_flags & SHV_RESTRICTED) {
6567         if (cxt->derestrict < 0)
6568             cxt->derestrict = (SvTRUE
6569                 (get_sv("Storable::downgrade_restricted", GV_ADD))
6570                                ? 1 : 0);
6571         if (cxt->derestrict == 0)
6572             RESTRICTED_HASH_CROAK();
6573     }
6574 #endif
6575 
6576     RLEN(len);
6577     TRACEME(("size = %d, flags = %d", (int)len, hash_flags));
6578     hv = newHV();
6579     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6580     SEEN_NN(hv, stash, 0);	/* Will return if table not allocated properly */
6581     if (len == 0)
6582         return (SV *) hv;	/* No data follow if table empty */
6583     TRACEME(("split %d", (int)len+1));
6584     hv_ksplit(hv, len+1);	/* pre-extend hash to save multiple splits */
6585 
6586     /*
6587      * Now get each key/value pair in turn...
6588      */
6589 
6590     for (i = 0; i < len; i++) {
6591         int flags;
6592         int store_flags = 0;
6593         /*
6594          * Get value first.
6595          */
6596 
6597         TRACEME(("(#%d) value", (int)i));
6598         sv = retrieve(aTHX_ cxt, 0);
6599         if (!sv)
6600             return (SV *) 0;
6601 
6602         GETMARK(flags);
6603 #ifdef HAS_RESTRICTED_HASHES
6604         if ((hash_flags & SHV_RESTRICTED) && (flags & SHV_K_LOCKED))
6605             SvREADONLY_on(sv);
6606 #endif
6607 
6608         if (flags & SHV_K_ISSV) {
6609             /* XXX you can't set a placeholder with an SV key.
6610                Then again, you can't get an SV key.
6611                Without messing around beyond what the API is supposed to do.
6612             */
6613             SV *keysv;
6614             TRACEME(("(#%d) keysv, flags=%d", (int)i, flags));
6615             keysv = retrieve(aTHX_ cxt, 0);
6616             if (!keysv)
6617                 return (SV *) 0;
6618 
6619             if (!hv_store_ent(hv, keysv, sv, 0))
6620                 return (SV *) 0;
6621         } else {
6622             /*
6623              * Get key.
6624              * Since we're reading into kbuf, we must ensure we're not
6625              * recursing between the read and the hv_store() where it's used.
6626              * Hence the key comes after the value.
6627              */
6628 
6629             if (flags & SHV_K_PLACEHOLDER) {
6630                 SvREFCNT_dec (sv);
6631                 sv = &PL_sv_placeholder;
6632                 store_flags |= HVhek_PLACEHOLD;
6633             }
6634             if (flags & SHV_K_UTF8) {
6635 #ifdef HAS_UTF8_HASHES
6636                 store_flags |= HVhek_UTF8;
6637 #else
6638                 if (cxt->use_bytes < 0)
6639                     cxt->use_bytes
6640                         = (SvTRUE(get_sv("Storable::drop_utf8", GV_ADD))
6641                            ? 1 : 0);
6642                 if (cxt->use_bytes == 0)
6643                     UTF8_CROAK();
6644 #endif
6645             }
6646 #ifdef HAS_UTF8_HASHES
6647             if (flags & SHV_K_WASUTF8)
6648                 store_flags |= HVhek_WASUTF8;
6649 #endif
6650 
6651             RLEN(size);		/* Get key size */
6652             KBUFCHK((STRLEN)size);/* Grow hash key read pool if needed */
6653             if (size)
6654                 READ(kbuf, size);
6655             kbuf[size] = '\0';	/* Mark string end, just in case */
6656             TRACEME(("(#%d) key '%s' flags %X store_flags %X", (int)i, kbuf,
6657                      flags, store_flags));
6658 
6659             /*
6660              * Enter key/value pair into hash table.
6661              */
6662 
6663 #ifdef HAS_RESTRICTED_HASHES
6664             if (hv_store_flags(hv, kbuf, size, sv, 0, store_flags) == 0)
6665                 return (SV *) 0;
6666 #else
6667             if (!(store_flags & HVhek_PLACEHOLD))
6668                 if (hv_store(hv, kbuf, size, sv, 0) == 0)
6669                     return (SV *) 0;
6670 #endif
6671         }
6672     }
6673 #ifdef HAS_RESTRICTED_HASHES
6674     if (hash_flags & SHV_RESTRICTED)
6675         SvREADONLY_on(hv);
6676 #endif
6677 
6678     TRACEME(("ok (retrieve_hash at 0x%" UVxf ")", PTR2UV(hv)));
6679 
6680     return (SV *) hv;
6681 }
6682 
6683 /*
6684  * retrieve_code
6685  *
6686  * Return a code reference.
6687  */
retrieve_code(pTHX_ stcxt_t * cxt,const char * cname)6688 static SV *retrieve_code(pTHX_ stcxt_t *cxt, const char *cname)
6689 {
6690 #if PERL_VERSION_LT(5,6,0)
6691     CROAK(("retrieve_code does not work with perl 5.005 or less\n"));
6692 #else
6693     dSP;
6694     I32 type, count;
6695     IV tagnum;
6696     SV *cv;
6697     SV *sv, *text, *sub, *errsv;
6698     HV *stash;
6699 
6700     TRACEME(("retrieve_code (#%d)", (int)cxt->tagnum));
6701 
6702     /*
6703      *  Insert dummy SV in the aseen array so that we don't screw
6704      *  up the tag numbers.  We would just make the internal
6705      *  scalar an untagged item in the stream, but
6706      *  retrieve_scalar() calls SEEN().  So we just increase the
6707      *  tag number.
6708      */
6709     tagnum = cxt->tagnum;
6710     sv = newSViv(0);
6711     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6712     SEEN_NN(sv, stash, 0);
6713 
6714     /*
6715      * Retrieve the source of the code reference
6716      * as a small or large scalar
6717      */
6718 
6719     GETMARK(type);
6720     switch (type) {
6721     case SX_SCALAR:
6722         text = retrieve_scalar(aTHX_ cxt, cname);
6723         break;
6724     case SX_LSCALAR:
6725         text = retrieve_lscalar(aTHX_ cxt, cname);
6726         break;
6727     case SX_UTF8STR:
6728         text = retrieve_utf8str(aTHX_ cxt, cname);
6729         break;
6730     case SX_LUTF8STR:
6731         text = retrieve_lutf8str(aTHX_ cxt, cname);
6732         break;
6733     default:
6734         CROAK(("Unexpected type %d in retrieve_code\n", (int)type));
6735     }
6736 
6737     if (!text) {
6738         CROAK(("Unable to retrieve code\n"));
6739     }
6740 
6741     /*
6742      * prepend "sub " to the source
6743      */
6744 
6745     sub = newSVpvs("sub ");
6746     if (SvUTF8(text))
6747         SvUTF8_on(sub);
6748     sv_catpv(sub, SvPV_nolen(text)); /* XXX no sv_catsv! */
6749     SvREFCNT_dec(text);
6750 
6751     /*
6752      * evaluate the source to a code reference and use the CV value
6753      */
6754 
6755     if (cxt->eval == NULL) {
6756         cxt->eval = get_sv("Storable::Eval", GV_ADD);
6757         SvREFCNT_inc(cxt->eval);
6758     }
6759     if (!SvTRUE(cxt->eval)) {
6760         if (cxt->forgive_me == 0 ||
6761             (cxt->forgive_me < 0 &&
6762              !(cxt->forgive_me = SvTRUE
6763                (get_sv("Storable::forgive_me", GV_ADD)) ? 1 : 0))
6764             ) {
6765             CROAK(("Can't eval, please set $Storable::Eval to a true value"));
6766         } else {
6767             sv = newSVsv(sub);
6768             /* fix up the dummy entry... */
6769             av_store(cxt->aseen, tagnum, SvREFCNT_inc(sv));
6770             return sv;
6771         }
6772     }
6773 
6774     ENTER;
6775     SAVETMPS;
6776 
6777     errsv = get_sv("@", GV_ADD);
6778     SvPVCLEAR(errsv);	/* clear $@ */
6779     if (SvROK(cxt->eval) && SvTYPE(SvRV(cxt->eval)) == SVt_PVCV) {
6780         PUSHMARK(sp);
6781         XPUSHs(sv_2mortal(newSVsv(sub)));
6782         PUTBACK;
6783         count = call_sv(cxt->eval, G_SCALAR);
6784         if (count != 1)
6785             CROAK(("Unexpected return value from $Storable::Eval callback\n"));
6786     } else {
6787         eval_sv(sub, G_SCALAR);
6788     }
6789     SPAGAIN;
6790     cv = POPs;
6791     PUTBACK;
6792 
6793     if (SvTRUE(errsv)) {
6794         CROAK(("code %s caused an error: %s",
6795                SvPV_nolen(sub), SvPV_nolen(errsv)));
6796     }
6797 
6798     if (cv && SvROK(cv) && SvTYPE(SvRV(cv)) == SVt_PVCV) {
6799         sv = SvRV(cv);
6800     } else {
6801         CROAK(("code %s did not evaluate to a subroutine reference\n",
6802                SvPV_nolen(sub)));
6803     }
6804 
6805     SvREFCNT_inc(sv); /* XXX seems to be necessary */
6806     SvREFCNT_dec(sub);
6807 
6808     FREETMPS;
6809     LEAVE;
6810     /* fix up the dummy entry... */
6811     av_store(cxt->aseen, tagnum, SvREFCNT_inc(sv));
6812 
6813     return sv;
6814 #endif
6815 }
6816 
retrieve_regexp(pTHX_ stcxt_t * cxt,const char * cname)6817 static SV *retrieve_regexp(pTHX_ stcxt_t *cxt, const char *cname) {
6818 #if PERL_VERSION_GE(5,8,0)
6819     int op_flags;
6820     U32 re_len;
6821     STRLEN flags_len;
6822     SV *re;
6823     SV *flags;
6824     SV *re_ref;
6825     SV *sv;
6826     dSP;
6827     I32 count;
6828     HV *stash;
6829 
6830     ENTER;
6831     SAVETMPS;
6832 
6833     GETMARK(op_flags);
6834     if (op_flags & SHR_U32_RE_LEN) {
6835         RLEN(re_len);
6836     }
6837     else
6838         GETMARK(re_len);
6839 
6840     re = sv_2mortal(NEWSV(10002, re_len ? re_len : 1));
6841     READ(SvPVX(re), re_len);
6842     SvCUR_set(re, re_len);
6843     *SvEND(re) = '\0';
6844     SvPOK_only(re);
6845 
6846     GETMARK(flags_len);
6847     flags = sv_2mortal(NEWSV(10002, flags_len ? flags_len : 1));
6848     READ(SvPVX(flags), flags_len);
6849     SvCUR_set(flags, flags_len);
6850     *SvEND(flags) = '\0';
6851     SvPOK_only(flags);
6852 
6853     PUSHMARK(SP);
6854 
6855     XPUSHs(re);
6856     XPUSHs(flags);
6857 
6858     PUTBACK;
6859 
6860     count = call_pv("Storable::_make_re", G_SCALAR);
6861 
6862     SPAGAIN;
6863 
6864     if (count != 1)
6865         CROAK(("Bad count %d calling _make_re", (int)count));
6866 
6867     re_ref = POPs;
6868 
6869     PUTBACK;
6870 
6871     if (!SvROK(re_ref))
6872       CROAK(("_make_re didn't return a reference"));
6873 
6874     sv = SvRV(re_ref);
6875     SvREFCNT_inc(sv);
6876     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6877     SEEN_NN(sv, stash, 0);
6878 
6879     FREETMPS;
6880     LEAVE;
6881 
6882     return sv;
6883 #else
6884     CROAK(("retrieve_regexp does not work with 5.6 or earlier"));
6885 #endif
6886 }
6887 
6888 /*
6889  * old_retrieve_array
6890  *
6891  * Retrieve a whole array in pre-0.6 binary format.
6892  *
6893  * Layout is SX_ARRAY <size> followed by each item, in increasing index order.
6894  * Each item is stored as SX_ITEM <object> or SX_IT_UNDEF for "holes".
6895  *
6896  * When we come here, SX_ARRAY has been read already.
6897  */
old_retrieve_array(pTHX_ stcxt_t * cxt,const char * cname)6898 static SV *old_retrieve_array(pTHX_ stcxt_t *cxt, const char *cname)
6899 {
6900     I32 len;
6901     I32 i;
6902     AV *av;
6903     SV *sv;
6904     int c;
6905 
6906     PERL_UNUSED_ARG(cname);
6907     TRACEME(("old_retrieve_array (#%d)", (int)cxt->tagnum));
6908 
6909     /*
6910      * Read length, and allocate array, then pre-extend it.
6911      */
6912 
6913     RLEN(len);
6914     TRACEME(("size = %d", (int)len));
6915     av = newAV();
6916     SEEN0_NN(av, 0);	/* Will return if array not allocated nicely */
6917     if (len)
6918         av_extend(av, len);
6919     else
6920         return (SV *) av;	/* No data follow if array is empty */
6921 
6922     /*
6923      * Now get each item in turn...
6924      */
6925 
6926     for (i = 0; i < len; i++) {
6927         GETMARK(c);
6928         if (c == SX_IT_UNDEF) {
6929             TRACEME(("(#%d) undef item", (int)i));
6930             continue;		/* av_extend() already filled us with undef */
6931         }
6932         if (c != SX_ITEM)
6933             (void) retrieve_other(aTHX_ cxt, 0);/* Will croak out */
6934         TRACEME(("(#%d) item", (int)i));
6935         sv = retrieve(aTHX_ cxt, 0);		/* Retrieve item */
6936         if (!sv)
6937             return (SV *) 0;
6938         if (av_store(av, i, sv) == 0)
6939             return (SV *) 0;
6940     }
6941 
6942     TRACEME(("ok (old_retrieve_array at 0x%" UVxf ")", PTR2UV(av)));
6943 
6944     return (SV *) av;
6945 }
6946 
6947 /*
6948  * old_retrieve_hash
6949  *
6950  * Retrieve a whole hash table in pre-0.6 binary format.
6951  *
6952  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
6953  * Keys are stored as SX_KEY <length> <data>, the <data> section being omitted
6954  * if length is 0.
6955  * Values are stored as SX_VALUE <object> or SX_VL_UNDEF for "holes".
6956  *
6957  * When we come here, SX_HASH has been read already.
6958  */
old_retrieve_hash(pTHX_ stcxt_t * cxt,const char * cname)6959 static SV *old_retrieve_hash(pTHX_ stcxt_t *cxt, const char *cname)
6960 {
6961     I32 len;
6962     I32 size;
6963     I32 i;
6964     HV *hv;
6965     SV *sv = (SV *) 0;
6966     int c;
6967     SV *sv_h_undef = (SV *) 0;	/* hv_store() bug */
6968 
6969     PERL_UNUSED_ARG(cname);
6970     TRACEME(("old_retrieve_hash (#%d)", (int)cxt->tagnum));
6971 
6972     /*
6973      * Read length, allocate table.
6974      */
6975 
6976     RLEN(len);
6977     TRACEME(("size = %d", (int)len));
6978     hv = newHV();
6979     SEEN0_NN(hv, 0);		/* Will return if table not allocated properly */
6980     if (len == 0)
6981         return (SV *) hv;	/* No data follow if table empty */
6982     TRACEME(("split %d", (int)len+1));
6983     hv_ksplit(hv, len+1);	/* pre-extend hash to save multiple splits */
6984 
6985     /*
6986      * Now get each key/value pair in turn...
6987      */
6988 
6989     for (i = 0; i < len; i++) {
6990         /*
6991          * Get value first.
6992          */
6993 
6994         GETMARK(c);
6995         if (c == SX_VL_UNDEF) {
6996             TRACEME(("(#%d) undef value", (int)i));
6997             /*
6998              * Due to a bug in hv_store(), it's not possible to pass
6999              * &PL_sv_undef to hv_store() as a value, otherwise the
7000              * associated key will not be creatable any more. -- RAM, 14/01/97
7001              */
7002             if (!sv_h_undef)
7003                 sv_h_undef = newSVsv(&PL_sv_undef);
7004             sv = SvREFCNT_inc(sv_h_undef);
7005         } else if (c == SX_VALUE) {
7006             TRACEME(("(#%d) value", (int)i));
7007             sv = retrieve(aTHX_ cxt, 0);
7008             if (!sv)
7009                 return (SV *) 0;
7010         } else
7011             (void) retrieve_other(aTHX_ cxt, 0); /* Will croak out */
7012 
7013         /*
7014          * Get key.
7015          * Since we're reading into kbuf, we must ensure we're not
7016          * recursing between the read and the hv_store() where it's used.
7017          * Hence the key comes after the value.
7018          */
7019 
7020         GETMARK(c);
7021         if (c != SX_KEY)
7022             (void) retrieve_other(aTHX_ cxt, 0); /* Will croak out */
7023         RLEN(size);				/* Get key size */
7024         KBUFCHK((STRLEN)size);			/* Grow hash key read pool if needed */
7025         if (size)
7026             READ(kbuf, size);
7027         kbuf[size] = '\0';			/* Mark string end, just in case */
7028         TRACEME(("(#%d) key '%s'", (int)i, kbuf));
7029 
7030         /*
7031          * Enter key/value pair into hash table.
7032          */
7033 
7034         if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0)
7035             return (SV *) 0;
7036     }
7037 
7038     TRACEME(("ok (retrieve_hash at 0x%" UVxf ")", PTR2UV(hv)));
7039 
7040     return (SV *) hv;
7041 }
7042 
7043 /***
7044  *** Retrieval engine.
7045  ***/
7046 
7047 /*
7048  * magic_check
7049  *
7050  * Make sure the stored data we're trying to retrieve has been produced
7051  * on an ILP compatible system with the same byteorder. It croaks out in
7052  * case an error is detected. [ILP = integer-long-pointer sizes]
7053  * Returns null if error is detected, &PL_sv_undef otherwise.
7054  *
7055  * Note that there's no byte ordering info emitted when network order was
7056  * used at store time.
7057  */
magic_check(pTHX_ stcxt_t * cxt)7058 static SV *magic_check(pTHX_ stcxt_t *cxt)
7059 {
7060     /* The worst case for a malicious header would be old magic (which is
7061        longer), major, minor, byteorder length byte of 255, 255 bytes of
7062        garbage, sizeof int, long, pointer, NV.
7063        So the worse of that we can read is 255 bytes of garbage plus 4.
7064        Err, I am assuming 8 bit bytes here. Please file a bug report if you're
7065        compiling perl on a system with chars that are larger than 8 bits.
7066        (Even Crays aren't *that* perverse).
7067     */
7068     unsigned char buf[4 + 255];
7069     unsigned char *current;
7070     int c;
7071     int length;
7072     int use_network_order;
7073     int use_NV_size;
7074     int old_magic = 0;
7075     int version_major;
7076     int version_minor = 0;
7077 
7078     TRACEME(("magic_check"));
7079 
7080     /*
7081      * The "magic number" is only for files, not when freezing in memory.
7082      */
7083 
7084     if (cxt->fio) {
7085         /* This includes the '\0' at the end.  I want to read the extra byte,
7086            which is usually going to be the major version number.  */
7087         STRLEN len = sizeof(magicstr);
7088         STRLEN old_len;
7089 
7090         READ(buf, (SSize_t)(len));	/* Not null-terminated */
7091 
7092         /* Point at the byte after the byte we read.  */
7093         current = buf + --len;	/* Do the -- outside of macros.  */
7094 
7095         if (memNE(buf, magicstr, len)) {
7096             /*
7097              * Try to read more bytes to check for the old magic number, which
7098              * was longer.
7099              */
7100 
7101             TRACEME(("trying for old magic number"));
7102 
7103             old_len = sizeof(old_magicstr) - 1;
7104             READ(current + 1, (SSize_t)(old_len - len));
7105 
7106             if (memNE(buf, old_magicstr, old_len))
7107                 CROAK(("File is not a perl storable"));
7108             old_magic++;
7109             current = buf + old_len;
7110         }
7111         use_network_order = *current;
7112     } else {
7113         GETMARK(use_network_order);
7114     }
7115 
7116     /*
7117      * Starting with 0.6, the "use_network_order" byte flag is also used to
7118      * indicate the version number of the binary, and therefore governs the
7119      * setting of sv_retrieve_vtbl. See magic_write().
7120      */
7121     if (old_magic && use_network_order > 1) {
7122         /*  0.1 dump - use_network_order is really byte order length */
7123         version_major = -1;
7124     }
7125     else {
7126         version_major = use_network_order >> 1;
7127     }
7128     cxt->retrieve_vtbl = (SV*(**)(pTHX_ stcxt_t *cxt, const char *cname)) (version_major > 0 ? sv_retrieve : sv_old_retrieve);
7129 
7130     TRACEME(("magic_check: netorder = 0x%x", use_network_order));
7131 
7132 
7133     /*
7134      * Starting with 0.7 (binary major 2), a full byte is dedicated to the
7135      * minor version of the protocol.  See magic_write().
7136      */
7137 
7138     if (version_major > 1)
7139         GETMARK(version_minor);
7140 
7141     cxt->ver_major = version_major;
7142     cxt->ver_minor = version_minor;
7143 
7144     TRACEME(("binary image version is %d.%d", version_major, version_minor));
7145 
7146     /*
7147      * Inter-operability sanity check: we can't retrieve something stored
7148      * using a format more recent than ours, because we have no way to
7149      * know what has changed, and letting retrieval go would mean a probable
7150      * failure reporting a "corrupted" storable file.
7151      */
7152 
7153     if (
7154         version_major > STORABLE_BIN_MAJOR ||
7155         (version_major == STORABLE_BIN_MAJOR &&
7156          version_minor > STORABLE_BIN_MINOR)
7157         ) {
7158         int croak_now = 1;
7159         TRACEME(("but I am version is %d.%d", STORABLE_BIN_MAJOR,
7160                  STORABLE_BIN_MINOR));
7161 
7162         if (version_major == STORABLE_BIN_MAJOR) {
7163             TRACEME(("cxt->accept_future_minor is %d",
7164                      cxt->accept_future_minor));
7165             if (cxt->accept_future_minor < 0)
7166                 cxt->accept_future_minor
7167                     = (SvTRUE(get_sv("Storable::accept_future_minor",
7168                                           GV_ADD))
7169                        ? 1 : 0);
7170             if (cxt->accept_future_minor == 1)
7171                 croak_now = 0;  /* Don't croak yet.  */
7172         }
7173         if (croak_now) {
7174             CROAK(("Storable binary image v%d.%d more recent than I am (v%d.%d)",
7175                    version_major, version_minor,
7176                    STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR));
7177         }
7178     }
7179 
7180     /*
7181      * If they stored using network order, there's no byte ordering
7182      * information to check.
7183      */
7184 
7185     if ((cxt->netorder = (use_network_order & 0x1)))	/* Extra () for -Wall */
7186         return &PL_sv_undef;			/* No byte ordering info */
7187 
7188     /* In C truth is 1, falsehood is 0. Very convenient.  */
7189     use_NV_size = version_major >= 2 && version_minor >= 2;
7190 
7191     if (version_major >= 0) {
7192         GETMARK(c);
7193     }
7194     else {
7195         c = use_network_order;
7196     }
7197     length = c + 3 + use_NV_size;
7198     READ(buf, length);	/* Not null-terminated */
7199 
7200     TRACEME(("byte order '%.*s' %d", c, buf, c));
7201 
7202 #ifdef USE_56_INTERWORK_KLUDGE
7203     /* No point in caching this in the context as we only need it once per
7204        retrieve, and we need to recheck it each read.  */
7205     if (SvTRUE(get_sv("Storable::interwork_56_64bit", GV_ADD))) {
7206         if ((c != (sizeof (byteorderstr_56) - 1))
7207             || memNE(buf, byteorderstr_56, c))
7208             CROAK(("Byte order is not compatible"));
7209     } else
7210 #endif
7211     {
7212         if ((c != (sizeof (byteorderstr) - 1))
7213           || memNE(buf, byteorderstr, c))
7214             CROAK(("Byte order is not compatible"));
7215     }
7216 
7217     current = buf + c;
7218 
7219     /* sizeof(int) */
7220     if ((int) *current++ != sizeof(int))
7221         CROAK(("Integer size is not compatible"));
7222 
7223     /* sizeof(long) */
7224     if ((int) *current++ != sizeof(long))
7225         CROAK(("Long integer size is not compatible"));
7226 
7227     /* sizeof(char *) */
7228     if ((int) *current != sizeof(char *))
7229         CROAK(("Pointer size is not compatible"));
7230 
7231     if (use_NV_size) {
7232         /* sizeof(NV) */
7233         if ((int) *++current != sizeof(NV))
7234             CROAK(("Double size is not compatible"));
7235     }
7236 
7237     return &PL_sv_undef;	/* OK */
7238 }
7239 
7240 /*
7241  * retrieve
7242  *
7243  * Recursively retrieve objects from the specified file and return their
7244  * root SV (which may be an AV or an HV for what we care).
7245  * Returns null if there is a problem.
7246  */
retrieve(pTHX_ stcxt_t * cxt,const char * cname)7247 static SV *retrieve(pTHX_ stcxt_t *cxt, const char *cname)
7248 {
7249     int type;
7250     SV **svh;
7251     SV *sv;
7252 
7253     TRACEME(("retrieve"));
7254 
7255     /*
7256      * Grab address tag which identifies the object if we are retrieving
7257      * an older format. Since the new binary format counts objects and no
7258      * longer explicitly tags them, we must keep track of the correspondence
7259      * ourselves.
7260      *
7261      * The following section will disappear one day when the old format is
7262      * no longer supported, hence the final "goto" in the "if" block.
7263      */
7264 
7265     if (cxt->hseen) {			/* Retrieving old binary */
7266         stag_t tag;
7267         if (cxt->netorder) {
7268             I32 nettag;
7269             READ(&nettag, sizeof(I32));	/* Ordered sequence of I32 */
7270             tag = (stag_t) nettag;
7271         } else
7272             READ(&tag, sizeof(stag_t));	/* Original address of the SV */
7273 
7274         GETMARK(type);
7275         if (type == SX_OBJECT) {
7276             I32 tagn;
7277             svh = hv_fetch(cxt->hseen, (char *) &tag, sizeof(tag), FALSE);
7278             if (!svh)
7279                 CROAK(("Old tag 0x%" UVxf " should have been mapped already",
7280                        (UV) tag));
7281             tagn = SvIV(*svh);	/* Mapped tag number computed earlier below */
7282 
7283             /*
7284              * The following code is common with the SX_OBJECT case below.
7285              */
7286 
7287             svh = av_fetch(cxt->aseen, tagn, FALSE);
7288             if (!svh)
7289                 CROAK(("Object #%" IVdf " should have been retrieved already",
7290                        (IV) tagn));
7291             sv = *svh;
7292             TRACEME(("has retrieved #%d at 0x%" UVxf, (int)tagn, PTR2UV(sv)));
7293             SvREFCNT_inc(sv);	/* One more reference to this same sv */
7294             return sv;		/* The SV pointer where object was retrieved */
7295         }
7296 
7297         /*
7298          * Map new object, but don't increase tagnum. This will be done
7299          * by each of the retrieve_* functions when they call SEEN().
7300          *
7301          * The mapping associates the "tag" initially present with a unique
7302          * tag number. See test for SX_OBJECT above to see how this is perused.
7303          */
7304 
7305         if (!hv_store(cxt->hseen, (char *) &tag, sizeof(tag),
7306                       newSViv(cxt->tagnum), 0))
7307             return (SV *) 0;
7308 
7309         goto first_time;
7310     }
7311 
7312     /*
7313      * Regular post-0.6 binary format.
7314      */
7315 
7316     GETMARK(type);
7317 
7318     TRACEME(("retrieve type = %d", type));
7319 
7320     /*
7321      * Are we dealing with an object we should have already retrieved?
7322      */
7323 
7324     if (type == SX_OBJECT) {
7325         I32 tag;
7326         READ_I32(tag);
7327         tag = ntohl(tag);
7328 #ifndef HAS_U64
7329         /* A 32-bit system can't have over 2**31 objects anyway */
7330         if (tag < 0)
7331             CROAK(("Object #%" IVdf " out of range", (IV)tag));
7332 #endif
7333         /* Older versions of Storable on with 64-bit support on 64-bit
7334            systems can produce values above the 2G boundary (or wrapped above
7335            the 4G boundary, which we can't do much about), treat those as
7336            unsigned.
7337            This same commit stores tag ids over the 2G boundary as long tags
7338            since older Storables will mis-handle them as short tags.
7339          */
7340         svh = av_fetch(cxt->aseen, (U32)tag, FALSE);
7341         if (!svh)
7342             CROAK(("Object #%" IVdf " should have been retrieved already",
7343                    (IV) tag));
7344         sv = *svh;
7345         TRACEME(("had retrieved #%d at 0x%" UVxf, (int)tag, PTR2UV(sv)));
7346         SvREFCNT_inc(sv);	/* One more reference to this same sv */
7347         return sv;		/* The SV pointer where object was retrieved */
7348     } else if (type >= SX_LAST && cxt->ver_minor > STORABLE_BIN_MINOR) {
7349         if (cxt->accept_future_minor < 0)
7350             cxt->accept_future_minor
7351                 = (SvTRUE(get_sv("Storable::accept_future_minor",
7352                                       GV_ADD))
7353                    ? 1 : 0);
7354         if (cxt->accept_future_minor == 1) {
7355             CROAK(("Storable binary image v%d.%d contains data of type %d. "
7356                    "This Storable is v%d.%d and can only handle data types up to %d",
7357                    cxt->ver_major, cxt->ver_minor, type,
7358                    STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR, SX_LAST - 1));
7359         }
7360     }
7361 
7362  first_time:	/* Will disappear when support for old format is dropped */
7363 
7364     /*
7365      * Okay, first time through for this one.
7366      */
7367 
7368     sv = RETRIEVE(cxt, type)(aTHX_ cxt, cname);
7369     if (!sv)
7370         return (SV *) 0;		/* Failed */
7371 
7372     /*
7373      * Old binary formats (pre-0.7).
7374      *
7375      * Final notifications, ended by SX_STORED may now follow.
7376      * Currently, the only pertinent notification to apply on the
7377      * freshly retrieved object is either:
7378      *    SX_CLASS <char-len> <classname> for short classnames.
7379      *    SX_LG_CLASS <int-len> <classname> for larger one (rare!).
7380      * Class name is then read into the key buffer pool used by
7381      * hash table key retrieval.
7382      */
7383 
7384     if (cxt->ver_major < 2) {
7385         while ((type = GETCHAR()) != SX_STORED) {
7386             I32 len;
7387             HV* stash;
7388             switch (type) {
7389             case SX_CLASS:
7390                 GETMARK(len);		/* Length coded on a single char */
7391                 break;
7392             case SX_LG_CLASS:		/* Length coded on a regular integer */
7393                 RLEN(len);
7394                 break;
7395             case EOF:
7396             default:
7397                 return (SV *) 0;	/* Failed */
7398             }
7399             KBUFCHK((STRLEN)len);	/* Grow buffer as necessary */
7400             if (len)
7401                 READ(kbuf, len);
7402             kbuf[len] = '\0';		/* Mark string end */
7403             stash = gv_stashpvn(kbuf, len, GV_ADD);
7404             BLESS(sv, stash);
7405         }
7406     }
7407 
7408     TRACEME(("ok (retrieved 0x%" UVxf ", refcnt=%d, %s)", PTR2UV(sv),
7409              (int)SvREFCNT(sv) - 1, sv_reftype(sv, FALSE)));
7410 
7411     return sv;	/* Ok */
7412 }
7413 
7414 /*
7415  * do_retrieve
7416  *
7417  * Retrieve data held in file and return the root object.
7418  * Common routine for pretrieve and mretrieve.
7419  */
do_retrieve(pTHX_ PerlIO * f,SV * in,int optype,int flags)7420 static SV *do_retrieve(
7421                        pTHX_
7422                        PerlIO *f,
7423                        SV *in,
7424                        int optype,
7425                        int flags)
7426 {
7427     dSTCXT;
7428     SV *sv;
7429     int is_tainted;		/* Is input source tainted? */
7430     int pre_06_fmt = 0;		/* True with pre Storable 0.6 formats */
7431 
7432     TRACEMED(("do_retrieve (optype = 0x%x, flags=0x%x)",
7433 	     (unsigned)optype, (unsigned)flags));
7434 
7435     optype |= ST_RETRIEVE;
7436     cxt->flags = flags;
7437 
7438     /*
7439      * Sanity assertions for retrieve dispatch tables.
7440      */
7441 
7442     ASSERT(sizeof(sv_old_retrieve) == sizeof(sv_retrieve),
7443            ("old and new retrieve dispatch table have same size"));
7444     ASSERT(sv_old_retrieve[(int)SX_LAST] == retrieve_other,
7445            ("SX_LAST entry correctly initialized in old dispatch table"));
7446     ASSERT(sv_retrieve[(int)SX_LAST] == retrieve_other,
7447            ("SX_LAST entry correctly initialized in new dispatch table"));
7448 
7449     /*
7450      * Workaround for CROAK leak: if they enter with a "dirty" context,
7451      * free up memory for them now.
7452      */
7453 
7454     assert(cxt);
7455     if (cxt->s_dirty)
7456         clean_context(aTHX_ cxt);
7457 
7458     /*
7459      * Now that STORABLE_xxx hooks exist, it is possible that they try to
7460      * re-enter retrieve() via the hooks.
7461      */
7462 
7463     if (cxt->entry) {
7464         cxt = allocate_context(aTHX_ cxt);
7465         cxt->flags = flags;
7466     }
7467     INIT_TRACEME;
7468 
7469     cxt->entry++;
7470 
7471     ASSERT(cxt->entry == 1, ("starting new recursion"));
7472     ASSERT(!cxt->s_dirty, ("clean context"));
7473 
7474     /*
7475      * Prepare context.
7476      *
7477      * Data is loaded into the memory buffer when f is NULL, unless 'in' is
7478      * also NULL, in which case we're expecting the data to already lie
7479      * in the buffer (dclone case).
7480      */
7481 
7482     KBUFINIT();			 /* Allocate hash key reading pool once */
7483 
7484     if (!f && in) {
7485 #ifdef SvUTF8_on
7486         if (SvUTF8(in)) {
7487             STRLEN length;
7488             const char *orig = SvPV(in, length);
7489             char *asbytes;
7490             /* This is quite deliberate. I want the UTF8 routines
7491                to encounter the '\0' which perl adds at the end
7492                of all scalars, so that any new string also has
7493                this.
7494             */
7495             STRLEN klen_tmp = length + 1;
7496             bool is_utf8 = TRUE;
7497 
7498             /* Just casting the &klen to (STRLEN) won't work
7499                well if STRLEN and I32 are of different widths.
7500                --jhi */
7501             asbytes = (char*)bytes_from_utf8((U8*)orig,
7502                                              &klen_tmp,
7503                                              &is_utf8);
7504             if (is_utf8) {
7505                 CROAK(("Frozen string corrupt - contains characters outside 0-255"));
7506             }
7507             if (asbytes != orig) {
7508                 /* String has been converted.
7509                    There is no need to keep any reference to
7510                    the old string.  */
7511                 in = sv_newmortal();
7512                 /* We donate the SV the malloc()ed string
7513                    bytes_from_utf8 returned us.  */
7514                 SvUPGRADE(in, SVt_PV);
7515                 SvPOK_on(in);
7516                 SvPV_set(in, asbytes);
7517                 SvLEN_set(in, klen_tmp);
7518                 SvCUR_set(in, klen_tmp - 1);
7519             }
7520         }
7521 #endif
7522         MBUF_SAVE_AND_LOAD(in);
7523     }
7524 
7525     /*
7526      * Magic number verifications.
7527      *
7528      * This needs to be done before calling init_retrieve_context()
7529      * since the format indication in the file are necessary to conduct
7530      * some of the initializations.
7531      */
7532 
7533     cxt->fio = f;			/* Where I/O are performed */
7534 
7535     if (!magic_check(aTHX_ cxt))
7536         CROAK(("Magic number checking on storable %s failed",
7537                cxt->fio ? "file" : "string"));
7538 
7539     TRACEME(("data stored in %s format",
7540              cxt->netorder ? "net order" : "native"));
7541 
7542     /*
7543      * Check whether input source is tainted, so that we don't wrongly
7544      * taint perfectly good values...
7545      *
7546      * We assume file input is always tainted.  If both 'f' and 'in' are
7547      * NULL, then we come from dclone, and tainted is already filled in
7548      * the context.  That's a kludge, but the whole dclone() thing is
7549      * already quite a kludge anyway! -- RAM, 15/09/2000.
7550      */
7551 
7552     is_tainted = f ? 1 : (in ? SvTAINTED(in) : cxt->s_tainted);
7553     TRACEME(("input source is %s", is_tainted ? "tainted" : "trusted"));
7554     init_retrieve_context(aTHX_ cxt, optype, is_tainted);
7555 
7556     ASSERT(is_retrieving(aTHX), ("within retrieve operation"));
7557 
7558     sv = retrieve(aTHX_ cxt, 0); /* Recursively retrieve object, get root SV */
7559 
7560     /*
7561      * Final cleanup.
7562      */
7563 
7564     if (!f && in)
7565         MBUF_RESTORE();
7566 
7567     pre_06_fmt = cxt->hseen != NULL;	/* Before we clean context */
7568 
7569     /*
7570      * The "root" context is never freed.
7571      */
7572 
7573     clean_retrieve_context(aTHX_ cxt);
7574     if (cxt->prev)			/* This context was stacked */
7575         free_context(aTHX_ cxt);	/* It was not the "root" context */
7576 
7577     /*
7578      * Prepare returned value.
7579      */
7580 
7581     if (!sv) {
7582         TRACEMED(("retrieve ERROR"));
7583 #if PERL_VERSION_LT(5,5,0)
7584         /* perl 5.00405 seems to screw up at this point with an
7585            'attempt to modify a read only value' error reported in the
7586            eval { $self = pretrieve(*FILE) } in _retrieve.
7587            I can't see what the cause of this error is, but I suspect a
7588            bug in 5.004, as it seems to be capable of issuing spurious
7589            errors or core dumping with matches on $@. I'm not going to
7590            spend time on what could be a fruitless search for the cause,
7591            so here's a bodge. If you're running 5.004 and don't like
7592            this inefficiency, either upgrade to a newer perl, or you are
7593            welcome to find the problem and send in a patch.
7594         */
7595         return newSV(0);
7596 #else
7597         return &PL_sv_undef;		/* Something went wrong, return undef */
7598 #endif
7599     }
7600 
7601     TRACEMED(("retrieve got %s(0x%" UVxf ")",
7602              sv_reftype(sv, FALSE), PTR2UV(sv)));
7603 
7604     /*
7605      * Backward compatibility with Storable-0.5@9 (which we know we
7606      * are retrieving if hseen is non-null): don't create an extra RV
7607      * for objects since we special-cased it at store time.
7608      *
7609      * Build a reference to the SV returned by pretrieve even if it is
7610      * already one and not a scalar, for consistency reasons.
7611      */
7612 
7613     if (pre_06_fmt) {			/* Was not handling overloading by then */
7614         SV *rv;
7615         TRACEMED(("fixing for old formats -- pre 0.6"));
7616         if (sv_type(aTHX_ sv) == svis_REF && (rv = SvRV(sv)) && SvOBJECT(rv)) {
7617             TRACEME(("ended do_retrieve() with an object -- pre 0.6"));
7618             return sv;
7619         }
7620     }
7621 
7622     /*
7623      * If reference is overloaded, restore behaviour.
7624      *
7625      * NB: minor glitch here: normally, overloaded refs are stored specially
7626      * so that we can croak when behaviour cannot be re-installed, and also
7627      * avoid testing for overloading magic at each reference retrieval.
7628      *
7629      * Unfortunately, the root reference is implicitly stored, so we must
7630      * check for possible overloading now.  Furthermore, if we don't restore
7631      * overloading, we cannot croak as if the original ref was, because we
7632      * have no way to determine whether it was an overloaded ref or not in
7633      * the first place.
7634      *
7635      * It's a pity that overloading magic is attached to the rv, and not to
7636      * the underlying sv as blessing is.
7637      */
7638 
7639     if (SvOBJECT(sv)) {
7640         HV *stash = (HV *) SvSTASH(sv);
7641         SV *rv = newRV_noinc(sv);
7642         if (stash && Gv_AMG(stash)) {
7643             SvAMAGIC_on(rv);
7644             TRACEMED(("restored overloading on root reference"));
7645         }
7646         TRACEMED(("ended do_retrieve() with an object"));
7647         return rv;
7648     }
7649 
7650     TRACEMED(("regular do_retrieve() end"));
7651 
7652     return newRV_noinc(sv);
7653 }
7654 
7655 /*
7656  * pretrieve
7657  *
7658  * Retrieve data held in file and return the root object, undef on error.
7659  */
pretrieve(pTHX_ PerlIO * f,IV flag)7660 static SV *pretrieve(pTHX_ PerlIO *f, IV flag)
7661 {
7662     TRACEMED(("pretrieve"));
7663     return do_retrieve(aTHX_ f, Nullsv, 0, (int)flag);
7664 }
7665 
7666 /*
7667  * mretrieve
7668  *
7669  * Retrieve data held in scalar and return the root object, undef on error.
7670  */
mretrieve(pTHX_ SV * sv,IV flag)7671 static SV *mretrieve(pTHX_ SV *sv, IV flag)
7672 {
7673     TRACEMED(("mretrieve"));
7674     return do_retrieve(aTHX_ (PerlIO*) 0, sv, 0, (int)flag);
7675 }
7676 
7677 /***
7678  *** Deep cloning
7679  ***/
7680 
7681 /*
7682  * dclone
7683  *
7684  * Deep clone: returns a fresh copy of the original referenced SV tree.
7685  *
7686  * This is achieved by storing the object in memory and restoring from
7687  * there. Not that efficient, but it should be faster than doing it from
7688  * pure perl anyway.
7689  */
dclone(pTHX_ SV * sv)7690 static SV *dclone(pTHX_ SV *sv)
7691 {
7692     dSTCXT;
7693     STRLEN size;
7694     stcxt_t *real_context;
7695     SV *out;
7696 
7697     TRACEMED(("dclone"));
7698 
7699     /*
7700      * Workaround for CROAK leak: if they enter with a "dirty" context,
7701      * free up memory for them now.
7702      */
7703 
7704     assert(cxt);
7705     if (cxt->s_dirty)
7706         clean_context(aTHX_ cxt);
7707 
7708     /*
7709      * Tied elements seem to need special handling.
7710      */
7711 
7712     if ((SvTYPE(sv) == SVt_PVLV
7713 #if PERL_VERSION_LT(5,8,0)
7714          || SvTYPE(sv) == SVt_PVMG
7715 #endif
7716          ) && (SvFLAGS(sv) & (SVs_GMG|SVs_SMG|SVs_RMG)) ==
7717         (SVs_GMG|SVs_SMG|SVs_RMG) &&
7718         mg_find(sv, 'p')) {
7719         mg_get(sv);
7720     }
7721 
7722     /*
7723      * do_store() optimizes for dclone by not freeing its context, should
7724      * we need to allocate one because we're deep cloning from a hook.
7725      */
7726 
7727     if (!do_store(aTHX_ (PerlIO*) 0, sv, ST_CLONE, FALSE, (SV**) 0))
7728         return &PL_sv_undef;				/* Error during store */
7729 
7730     /*
7731      * Because of the above optimization, we have to refresh the context,
7732      * since a new one could have been allocated and stacked by do_store().
7733      */
7734 
7735     { dSTCXT; real_context = cxt; }		/* Sub-block needed for macro */
7736     cxt = real_context;					/* And we need this temporary... */
7737 
7738     /*
7739      * Now, 'cxt' may refer to a new context.
7740      */
7741 
7742     assert(cxt);
7743     ASSERT(!cxt->s_dirty, ("clean context"));
7744     ASSERT(!cxt->entry, ("entry will not cause new context allocation"));
7745 
7746     size = MBUF_SIZE();
7747     TRACEME(("dclone stored %ld bytes", (long)size));
7748     MBUF_INIT(size);
7749 
7750     /*
7751      * Since we're passing do_retrieve() both a NULL file and sv, we need
7752      * to pre-compute the taintedness of the input by setting cxt->tainted
7753      * to whatever state our own input string was.	-- RAM, 15/09/2000
7754      *
7755      * do_retrieve() will free non-root context.
7756      */
7757 
7758     cxt->s_tainted = SvTAINTED(sv);
7759     out = do_retrieve(aTHX_ (PerlIO*) 0, Nullsv, ST_CLONE, FLAG_BLESS_OK | FLAG_TIE_OK);
7760 
7761     TRACEMED(("dclone returns 0x%" UVxf, PTR2UV(out)));
7762 
7763     return out;
7764 }
7765 
7766 /***
7767  *** Glue with perl.
7768  ***/
7769 
7770 /*
7771  * The Perl IO GV object distinguishes between input and output for sockets
7772  * but not for plain files. To allow Storable to transparently work on
7773  * plain files and sockets transparently, we have to ask xsubpp to fetch the
7774  * right object for us. Hence the OutputStream and InputStream declarations.
7775  *
7776  * Before perl 5.004_05, those entries in the standard typemap are not
7777  * defined in perl include files, so we do that here.
7778  */
7779 
7780 #ifndef OutputStream
7781 #define OutputStream	PerlIO *
7782 #define InputStream	PerlIO *
7783 #endif	/* !OutputStream */
7784 
7785 static int
storable_free(pTHX_ SV * sv,MAGIC * mg)7786 storable_free(pTHX_ SV *sv, MAGIC* mg) {
7787     stcxt_t *cxt = (stcxt_t *)SvPVX(sv);
7788 
7789     PERL_UNUSED_ARG(mg);
7790 #ifdef USE_PTR_TABLE
7791     if (cxt->pseen)
7792         ptr_table_free(cxt->pseen);
7793 #endif
7794     if (kbuf)
7795         Safefree(kbuf);
7796     if (!cxt->membuf_ro && mbase)
7797         Safefree(mbase);
7798     if (cxt->membuf_ro && (cxt->msaved).arena)
7799         Safefree((cxt->msaved).arena);
7800     return 0;
7801 }
7802 
7803 MODULE = Storable	PACKAGE = Storable
7804 
7805 PROTOTYPES: ENABLE
7806 
7807 BOOT:
7808 {
7809     HV *stash = gv_stashpvn("Storable", 8, GV_ADD);
7810     newCONSTSUB(stash, "BIN_MAJOR", newSViv(STORABLE_BIN_MAJOR));
7811     newCONSTSUB(stash, "BIN_MINOR", newSViv(STORABLE_BIN_MINOR));
7812     newCONSTSUB(stash, "BIN_WRITE_MINOR", newSViv(STORABLE_BIN_WRITE_MINOR));
7813 
7814     newCONSTSUB(stash, "CAN_FLOCK", CAN_FLOCK);
7815 
7816     init_perinterp(aTHX);
7817     gv_fetchpv("Storable::drop_utf8",   GV_ADDMULTI, SVt_PV);
7818 #ifdef DEBUGME
7819     /* Only disable the used only once warning if we are in debugging mode.  */
7820     gv_fetchpv("Storable::DEBUGME",   GV_ADDMULTI, SVt_PV);
7821 #endif
7822 #ifdef USE_56_INTERWORK_KLUDGE
7823     gv_fetchpv("Storable::interwork_56_64bit",   GV_ADDMULTI, SVt_PV);
7824 #endif
7825     }
7826 
7827 void
7828 init_perinterp()
7829 CODE:
7830     init_perinterp(aTHX);
7831 
7832 # pstore
7833 #
7834 # Store the transitive data closure of given object to disk.
7835 # Returns undef on error, a true value otherwise.
7836 
7837 # net_pstore
7838 #
7839 # Same as pstore(), but network order is used for integers and doubles are
7840 # emitted as strings.
7841 
7842 SV *
7843 pstore(f,obj)
7844     OutputStream f
7845     SV*		obj
7846 ALIAS:
7847     net_pstore = 1
7848 PPCODE:
7849     RETVAL = do_store(aTHX_ f, obj, 0, ix, (SV **)0) ? &PL_sv_yes : &PL_sv_undef;
7850     /* do_store() can reallocate the stack, so need a sequence point to ensure
7851        that ST(0) knows about it. Hence using two statements.  */
7852     ST(0) = RETVAL;
7853     XSRETURN(1);
7854 
7855 # mstore
7856 #
7857 # Store the transitive data closure of given object to memory.
7858 # Returns undef on error, a scalar value containing the data otherwise.
7859 
7860 # net_mstore
7861 #
7862 # Same as mstore(), but network order is used for integers and doubles are
7863 # emitted as strings.
7864 
7865 SV *
7866 mstore(obj)
7867     SV*	obj
7868 ALIAS:
7869     net_mstore = 1
7870 CODE:
7871     RETVAL = &PL_sv_undef;
7872     if (!do_store(aTHX_ (PerlIO*) 0, obj, 0, ix, &RETVAL))
7873         RETVAL = &PL_sv_undef;
7874 OUTPUT:
7875     RETVAL
7876 
7877 SV *
7878 pretrieve(f, flag = 6)
7879     InputStream	f
7880     IV		flag
7881 CODE:
7882     RETVAL = pretrieve(aTHX_ f, flag);
7883 OUTPUT:
7884     RETVAL
7885 
7886 SV *
7887 mretrieve(sv, flag = 6)
7888     SV* sv
7889     IV	flag
7890 CODE:
7891     RETVAL = mretrieve(aTHX_ sv, flag);
7892 OUTPUT:
7893     RETVAL
7894 
7895 SV *
7896 dclone(sv)
7897     SV*	sv
7898 CODE:
7899     RETVAL = dclone(aTHX_ sv);
7900 OUTPUT:
7901     RETVAL
7902 
7903 void
7904 last_op_in_netorder()
7905 ALIAS:
7906     is_storing = ST_STORE
7907     is_retrieving = ST_RETRIEVE
7908 PREINIT:
7909     bool result;
7910 CODE:
7911     if (ix) {
7912         dSTCXT;
7913         assert(cxt);
7914         result = cxt->entry && (cxt->optype & ix) ? TRUE : FALSE;
7915     } else {
7916         result = !!last_op_in_netorder(aTHX);
7917     }
7918     ST(0) = boolSV(result);
7919 
7920 
7921 IV
7922 stack_depth()
7923 CODE:
7924     RETVAL = SvIV(get_sv("Storable::recursion_limit", GV_ADD));
7925 OUTPUT:
7926     RETVAL
7927 
7928 IV
7929 stack_depth_hash()
7930 CODE:
7931     RETVAL = SvIV(get_sv("Storable::recursion_limit_hash", GV_ADD));
7932 OUTPUT:
7933     RETVAL
7934