1 /* -*-  c-basic-offset: 4 -*-
2  *
3  *  Fast store and retrieve mechanism.
4  *
5  *  Copyright (c) 1995-2000, Raphael Manfredi
6  *  Copyright (c) 2016, 2017 cPanel Inc
7  *  Copyright (c) 2017 Reini Urban
8  *
9  *  You may redistribute only under the same terms as Perl 5, as specified
10  *  in the README file that comes with the distribution.
11  *
12  */
13 
14 #define PERL_NO_GET_CONTEXT     /* we want efficiency */
15 #include <EXTERN.h>
16 #include <perl.h>
17 #include <XSUB.h>
18 
19 #ifndef PATCHLEVEL
20 #include <patchlevel.h>		/* Perl's one, needed since 5.6 */
21 #endif
22 
23 #if !defined(PERL_VERSION) || PERL_VERSION < 10 || (PERL_VERSION == 10 && PERL_SUBVERSION < 1)
24 #define NEED_PL_parser
25 #define NEED_sv_2pv_flags
26 #define NEED_load_module
27 #define NEED_vload_module
28 #define NEED_newCONSTSUB
29 #define NEED_newSVpvn_flags
30 #define NEED_newRV_noinc
31 #include "ppport.h"             /* handle old perls */
32 #endif
33 
34 #ifdef DEBUGGING
35 #define DEBUGME /* Debug mode, turns assertions on as well */
36 #define DASSERT /* Assertion mode */
37 #endif
38 
39 /*
40  * Pre PerlIO time when none of USE_PERLIO and PERLIO_IS_STDIO is defined
41  * Provide them with the necessary defines so they can build with pre-5.004.
42  */
43 #ifndef USE_PERLIO
44 #ifndef PERLIO_IS_STDIO
45 #define PerlIO FILE
46 #define PerlIO_getc(x) getc(x)
47 #define PerlIO_putc(f,x) putc(x,f)
48 #define PerlIO_read(x,y,z) fread(y,1,z,x)
49 #define PerlIO_write(x,y,z) fwrite(y,1,z,x)
50 #define PerlIO_stdoutf printf
51 #endif	/* PERLIO_IS_STDIO */
52 #endif	/* USE_PERLIO */
53 
54 /*
55  * Earlier versions of perl might be used, we can't assume they have the latest!
56  */
57 
58 #ifndef HvSHAREKEYS_off
59 #define HvSHAREKEYS_off(hv)	/* Ignore */
60 #endif
61 
62 /* perl <= 5.8.2 needs this */
63 #ifndef SvIsCOW
64 # define SvIsCOW(sv) 0
65 #endif
66 
67 #ifndef HvRITER_set
68 #  define HvRITER_set(hv,r)	(HvRITER(hv) = r)
69 #endif
70 #ifndef HvEITER_set
71 #  define HvEITER_set(hv,r)	(HvEITER(hv) = r)
72 #endif
73 
74 #ifndef HvRITER_get
75 #  define HvRITER_get           HvRITER
76 #endif
77 #ifndef HvEITER_get
78 #  define HvEITER_get           HvEITER
79 #endif
80 
81 #ifndef HvPLACEHOLDERS_get
82 #  define HvPLACEHOLDERS_get    HvPLACEHOLDERS
83 #endif
84 
85 #ifndef HvTOTALKEYS
86 #  define HvTOTALKEYS(hv)	HvKEYS(hv)
87 #endif
88 /* 5.6 */
89 #ifndef HvUSEDKEYS
90 #  define HvUSEDKEYS(hv)	HvKEYS(hv)
91 #endif
92 
93 #ifdef SVf_IsCOW
94 #  define SvTRULYREADONLY(sv)	SvREADONLY(sv)
95 #else
96 #  define SvTRULYREADONLY(sv)	(SvREADONLY(sv) && !SvIsCOW(sv))
97 #endif
98 
99 #ifndef SvPVCLEAR
100 #  define SvPVCLEAR(sv) sv_setpvs(sv, "")
101 #endif
102 
103 #ifndef strEQc
104 #  define strEQc(s,c) memEQ(s, ("" c ""), sizeof(c))
105 #endif
106 
107 #if defined(HAS_FLOCK) || defined(FCNTL_CAN_LOCK) && defined(HAS_LOCKF)
108 #define CAN_FLOCK &PL_sv_yes
109 #else
110 #define CAN_FLOCK &PL_sv_no
111 #endif
112 
113 #ifdef DEBUGME
114 
115 #ifndef DASSERT
116 #define DASSERT
117 #endif
118 
119 /*
120  * TRACEME() will only output things when the $Storable::DEBUGME is true,
121  * using the value traceme cached in the context.
122  *
123  *
124  * TRACEMED() directly looks at the variable, for use before traceme has been
125  * updated.
126  */
127 
128 #define TRACEME(x)                                            \
129     STMT_START {					      \
130         if (cxt->traceme)				      \
131             { PerlIO_stdoutf x; PerlIO_stdoutf("\n"); }       \
132     } STMT_END
133 
134 #define TRACEMED(x)                                           \
135     STMT_START {                                              \
136         if (SvTRUE(get_sv("Storable::DEBUGME", GV_ADD)))      \
137             { PerlIO_stdoutf x; PerlIO_stdoutf("\n"); }       \
138     } STMT_END
139 
140 #define INIT_TRACEME							\
141     STMT_START {							\
142 	cxt->traceme = SvTRUE(get_sv("Storable::DEBUGME", GV_ADD));	\
143     } STMT_END
144 
145 #else
146 #define TRACEME(x)
147 #define TRACEMED(x)
148 #define INIT_TRACEME
149 #endif	/* DEBUGME */
150 
151 #ifdef DASSERT
152 #define ASSERT(x,y)                                              \
153     STMT_START {                                                 \
154         if (!(x)) {                                              \
155             PerlIO_stdoutf("ASSERT FAILED (\"%s\", line %d): ",  \
156                            __FILE__, (int)__LINE__);             \
157             PerlIO_stdoutf y; PerlIO_stdoutf("\n");              \
158         }                                                        \
159     } STMT_END
160 #else
161 #define ASSERT(x,y)
162 #endif
163 
164 /*
165  * Type markers.
166  */
167 
168 #define C(x) ((char) (x))	/* For markers with dynamic retrieval handling */
169 
170 #define SX_OBJECT	C(0)	/* Already stored object */
171 #define SX_LSCALAR	C(1)	/* Scalar (large binary) follows (length, data) */
172 #define SX_ARRAY	C(2)	/* Array forthcoming (size, item list) */
173 #define SX_HASH		C(3)	/* Hash forthcoming (size, key/value pair list) */
174 #define SX_REF		C(4)	/* Reference to object forthcoming */
175 #define SX_UNDEF	C(5)	/* Undefined scalar */
176 #define SX_INTEGER	C(6)	/* Integer forthcoming */
177 #define SX_DOUBLE	C(7)	/* Double forthcoming */
178 #define SX_BYTE		C(8)	/* (signed) byte forthcoming */
179 #define SX_NETINT	C(9)	/* Integer in network order forthcoming */
180 #define SX_SCALAR	C(10)	/* Scalar (binary, small) follows (length, data) */
181 #define SX_TIED_ARRAY	C(11)	/* Tied array forthcoming */
182 #define SX_TIED_HASH	C(12)	/* Tied hash forthcoming */
183 #define SX_TIED_SCALAR	C(13)	/* Tied scalar forthcoming */
184 #define SX_SV_UNDEF	C(14)	/* Perl's immortal PL_sv_undef */
185 #define SX_SV_YES	C(15)	/* Perl's immortal PL_sv_yes */
186 #define SX_SV_NO	C(16)	/* Perl's immortal PL_sv_no */
187 #define SX_BLESS	C(17)	/* Object is blessed */
188 #define SX_IX_BLESS	C(18)	/* Object is blessed, classname given by index */
189 #define SX_HOOK		C(19)	/* Stored via hook, user-defined */
190 #define SX_OVERLOAD	C(20)	/* Overloaded reference */
191 #define SX_TIED_KEY	C(21)	/* Tied magic key forthcoming */
192 #define SX_TIED_IDX	C(22)	/* Tied magic index forthcoming */
193 #define SX_UTF8STR	C(23)	/* UTF-8 string forthcoming (small) */
194 #define SX_LUTF8STR	C(24)	/* UTF-8 string forthcoming (large) */
195 #define SX_FLAG_HASH	C(25)	/* Hash with flags forthcoming (size, flags, key/flags/value triplet list) */
196 #define SX_CODE         C(26)   /* Code references as perl source code */
197 #define SX_WEAKREF	C(27)	/* Weak reference to object forthcoming */
198 #define SX_WEAKOVERLOAD	C(28)	/* Overloaded weak reference */
199 #define SX_VSTRING	C(29)	/* vstring forthcoming (small) */
200 #define SX_LVSTRING	C(30)	/* vstring forthcoming (large) */
201 #define SX_SVUNDEF_ELEM	C(31)	/* array element set to &PL_sv_undef */
202 #define SX_REGEXP	C(32)	/* Regexp */
203 #define SX_LOBJECT	C(33)	/* Large object: string, array or hash (size >2G) */
204 #define SX_LAST		C(34)	/* invalid. marker only */
205 
206 /*
207  * Those are only used to retrieve "old" pre-0.6 binary images.
208  */
209 #define SX_ITEM		'i'	/* An array item introducer */
210 #define SX_IT_UNDEF	'I'	/* Undefined array item */
211 #define SX_KEY		'k'	/* A hash key introducer */
212 #define SX_VALUE	'v'	/* A hash value introducer */
213 #define SX_VL_UNDEF	'V'	/* Undefined hash value */
214 
215 /*
216  * Those are only used to retrieve "old" pre-0.7 binary images
217  */
218 
219 #define SX_CLASS	'b'	/* Object is blessed, class name length <255 */
220 #define SX_LG_CLASS	'B'	/* Object is blessed, class name length >255 */
221 #define SX_STORED	'X'	/* End of object */
222 
223 /*
224  * Limits between short/long length representation.
225  */
226 
227 #define LG_SCALAR	255	/* Large scalar length limit */
228 #define LG_BLESS	127	/* Large classname bless limit */
229 
230 /*
231  * Operation types
232  */
233 
234 #define ST_STORE	0x1	/* Store operation */
235 #define ST_RETRIEVE	0x2	/* Retrieval operation */
236 #define ST_CLONE	0x4	/* Deep cloning operation */
237 
238 /*
239  * The following structure is used for hash table key retrieval. Since, when
240  * retrieving objects, we'll be facing blessed hash references, it's best
241  * to pre-allocate that buffer once and resize it as the need arises, never
242  * freeing it (keys will be saved away someplace else anyway, so even large
243  * keys are not enough a motivation to reclaim that space).
244  *
245  * This structure is also used for memory store/retrieve operations which
246  * happen in a fixed place before being malloc'ed elsewhere if persistence
247  * is required. Hence the aptr pointer.
248  */
249 struct extendable {
250     char *arena;	/* Will hold hash key strings, resized as needed */
251     STRLEN asiz;	/* Size of aforementioned buffer */
252     char *aptr;		/* Arena pointer, for in-place read/write ops */
253     char *aend;		/* First invalid address */
254 };
255 
256 /*
257  * At store time:
258  * A hash table records the objects which have already been stored.
259  * Those are referred to as SX_OBJECT in the file, and their "tag" (i.e.
260  * an arbitrary sequence number) is used to identify them.
261  *
262  * At retrieve time:
263  * An array table records the objects which have already been retrieved,
264  * as seen by the tag determined by counting the objects themselves. The
265  * reference to that retrieved object is kept in the table, and is returned
266  * when an SX_OBJECT is found bearing that same tag.
267  *
268  * The same processing is used to record "classname" for blessed objects:
269  * indexing by a hash at store time, and via an array at retrieve time.
270  */
271 
272 typedef unsigned long stag_t;	/* Used by pre-0.6 binary format */
273 
274 /*
275  * Make the tag type 64-bit on 64-bit platforms.
276  *
277  * If the tag number is low enough it's stored as a 32-bit value, but
278  * with very large arrays and hashes it's possible to go over 2**32
279  * scalars.
280  */
281 
282 typedef STRLEN ntag_t;
283 
284 /* used for where_is_undef - marks an unset value */
285 #define UNSET_NTAG_T (~(ntag_t)0)
286 
287 /*
288  * The following "thread-safe" related defines were contributed by
289  * Murray Nesbitt <murray@activestate.com> and integrated by RAM, who
290  * only renamed things a little bit to ensure consistency with surrounding
291  * code.	-- RAM, 14/09/1999
292  *
293  * The original patch suffered from the fact that the stcxt_t structure
294  * was global.  Murray tried to minimize the impact on the code as much as
295  * possible.
296  *
297  * Starting with 0.7, Storable can be re-entrant, via the STORABLE_xxx hooks
298  * on objects.  Therefore, the notion of context needs to be generalized,
299  * threading or not.
300  */
301 
302 #define MY_VERSION "Storable(" XS_VERSION ")"
303 
304 
305 /*
306  * Conditional UTF8 support.
307  *
308  */
309 #ifdef SvUTF8_on
310 #define STORE_UTF8STR(pv, len)	STORE_PV_LEN(pv, len, SX_UTF8STR, SX_LUTF8STR)
311 #define HAS_UTF8_SCALARS
312 #ifdef HeKUTF8
313 #define HAS_UTF8_HASHES
314 #define HAS_UTF8_ALL
315 #else
316 /* 5.6 perl has utf8 scalars but not hashes */
317 #endif
318 #else
319 #define SvUTF8(sv) 0
320 #define STORE_UTF8STR(pv, len) CROAK(("panic: storing UTF8 in non-UTF8 perl"))
321 #endif
322 #ifndef HAS_UTF8_ALL
323 #define UTF8_CROAK() CROAK(("Cannot retrieve UTF8 data in non-UTF8 perl"))
324 #endif
325 #ifndef SvWEAKREF
326 #define WEAKREF_CROAK() CROAK(("Cannot retrieve weak references in this perl"))
327 #endif
328 #ifndef SvVOK
329 #define VSTRING_CROAK() CROAK(("Cannot retrieve vstring in this perl"))
330 #endif
331 
332 #ifdef HvPLACEHOLDERS
333 #define HAS_RESTRICTED_HASHES
334 #else
335 #define HVhek_PLACEHOLD	0x200
336 #define RESTRICTED_HASH_CROAK() CROAK(("Cannot retrieve restricted hash"))
337 #endif
338 
339 #ifdef HvHASKFLAGS
340 #define HAS_HASH_KEY_FLAGS
341 #endif
342 
343 #ifdef ptr_table_new
344 #define USE_PTR_TABLE
345 #endif
346 
347 /* do we need/want to clear padding on NVs? */
348 #if defined(LONG_DOUBLEKIND) && defined(USE_LONG_DOUBLE)
349 #  if LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN || \
350       LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN
351 #    define NV_PADDING (NVSIZE - 10)
352 #  else
353 #    define NV_PADDING 0
354 #  endif
355 #else
356 /* This is kind of a guess - it means we'll get an unneeded clear on 128-bit NV
357    but an upgraded perl will fix that
358 */
359 #  if NVSIZE > 8
360 #    define NV_CLEAR
361 #  endif
362 #  define NV_PADDING 0
363 #endif
364 
365 typedef union {
366     NV nv;
367     U8 bytes[sizeof(NV)];
368 } NV_bytes;
369 
370 /* Needed for 32bit with lengths > 2G - 4G, and 64bit */
371 #if PTRSIZE > 4
372 #define HAS_U64
373 #endif
374 
375 /*
376  * Fields s_tainted and s_dirty are prefixed with s_ because Perl's include
377  * files remap tainted and dirty when threading is enabled.  That's bad for
378  * perl to remap such common words.	-- RAM, 29/09/00
379  */
380 
381 struct stcxt;
382 typedef struct stcxt {
383     int entry;		/* flags recursion */
384     int optype;		/* type of traversal operation */
385     /* which objects have been seen, store time.
386        tags are numbers, which are cast to (SV *) and stored directly */
387 #ifdef USE_PTR_TABLE
388     /* use pseen if we have ptr_tables. We have to store tag+1, because
389        tag numbers start at 0, and we can't store (SV *) 0 in a ptr_table
390        without it being confused for a fetch lookup failure.  */
391     struct ptr_tbl *pseen;
392     /* Still need hseen for the 0.6 file format code. */
393 #endif
394     HV *hseen;
395     AV *hook_seen;		/* which SVs were returned by STORABLE_freeze() */
396     AV *aseen;			/* which objects have been seen, retrieve time */
397     ntag_t where_is_undef;		/* index in aseen of PL_sv_undef */
398     HV *hclass;			/* which classnames have been seen, store time */
399     AV *aclass;			/* which classnames have been seen, retrieve time */
400     HV *hook;			/* cache for hook methods per class name */
401     IV tagnum;			/* incremented at store time for each seen object */
402     IV classnum;		/* incremented at store time for each seen classname */
403     int netorder;		/* true if network order used */
404     int s_tainted;		/* true if input source is tainted, at retrieve time */
405     int forgive_me;		/* whether to be forgiving... */
406     int deparse;		/* whether to deparse code refs */
407     SV *eval;			/* whether to eval source code */
408     int canonical;		/* whether to store hashes sorted by key */
409 #ifndef HAS_RESTRICTED_HASHES
410     int derestrict;		/* whether to downgrade restricted hashes */
411 #endif
412 #ifndef HAS_UTF8_ALL
413     int use_bytes;		/* whether to bytes-ify utf8 */
414 #endif
415     int accept_future_minor;	/* croak immediately on future minor versions?  */
416     int s_dirty;		/* context is dirty due to CROAK() -- can be cleaned */
417     int membuf_ro;		/* true means membuf is read-only and msaved is rw */
418     struct extendable keybuf;	/* for hash key retrieval */
419     struct extendable membuf;	/* for memory store/retrieve operations */
420     struct extendable msaved;	/* where potentially valid mbuf is saved */
421     PerlIO *fio;		/* where I/O are performed, NULL for memory */
422     int ver_major;		/* major of version for retrieved object */
423     int ver_minor;		/* minor of version for retrieved object */
424     SV *(**retrieve_vtbl)(pTHX_ struct stcxt *, const char *);	/* retrieve dispatch table */
425     SV *prev;			/* contexts chained backwards in real recursion */
426     SV *my_sv;			/* the blessed scalar who's SvPVX() I am */
427 
428     /* recur_sv:
429 
430        A hashref of hashrefs or arrayref of arrayrefs is actually a
431        chain of four SVs, eg for an array ref containing an array ref:
432 
433          RV -> AV (element) -> RV -> AV
434 
435        To make this depth appear natural from a perl level we only
436        want to count this as two levels, so store_ref() stores it's RV
437        into recur_sv and store_array()/store_hash() will only count
438        that level if the AV/HV *isn't* recur_sv.
439 
440        We can't just have store_hash()/store_array() not count that
441        level, since it's possible for XS code to store an AV or HV
442        directly as an element (though perl code trying to access such
443        an object will generally croak.)
444      */
445     SV *recur_sv;               /* check only one recursive SV */
446     int in_retrieve_overloaded; /* performance hack for retrieving overloaded objects */
447     int flags;			/* controls whether to bless or tie objects */
448     IV recur_depth;        	/* avoid stack overflows RT #97526 */
449     IV max_recur_depth;        /* limit for recur_depth */
450     IV max_recur_depth_hash;   /* limit for recur_depth for hashes */
451 #ifdef DEBUGME
452     int traceme;                /* TRACEME() produces output */
453 #endif
454 } stcxt_t;
455 
456 #define RECURSION_TOO_DEEP() \
457     (cxt->max_recur_depth != -1 && ++cxt->recur_depth > cxt->max_recur_depth)
458 
459 /* There's cases where we need to check whether the hash recursion
460    limit has been reached without bumping the recursion levels, so the
461    hash check doesn't bump the depth.
462 */
463 #define RECURSION_TOO_DEEP_HASH() \
464     (cxt->max_recur_depth_hash != -1 && cxt->recur_depth > cxt->max_recur_depth_hash)
465 #define MAX_DEPTH_ERROR "Max. recursion depth with nested structures exceeded"
466 
467 static int storable_free(pTHX_ SV *sv, MAGIC* mg);
468 
469 static MGVTBL vtbl_storable = {
470     NULL, /* get */
471     NULL, /* set */
472     NULL, /* len */
473     NULL, /* clear */
474     storable_free,
475 #ifdef MGf_COPY
476     NULL, /* copy */
477 #endif
478 #ifdef MGf_DUP
479     NULL, /* dup */
480 #endif
481 #ifdef MGf_LOCAL
482     NULL /* local */
483 #endif
484 };
485 
486 /* From Digest::MD5.  */
487 #ifndef sv_magicext
488 # define sv_magicext(sv, obj, type, vtbl, name, namlen)         \
489     THX_sv_magicext(aTHX_ sv, obj, type, vtbl, name, namlen)
THX_sv_magicext(pTHX_ SV * sv,SV * obj,int type,MGVTBL const * vtbl,char const * name,I32 namlen)490 static MAGIC *THX_sv_magicext(pTHX_
491 	SV *sv, SV *obj, int type,
492 	MGVTBL const *vtbl, char const *name, I32 namlen)
493 {
494     MAGIC *mg;
495     if (obj || namlen)
496         /* exceeded intended usage of this reserve implementation */
497         return NULL;
498     Newxz(mg, 1, MAGIC);
499     mg->mg_virtual = (MGVTBL*)vtbl;
500     mg->mg_type = type;
501     mg->mg_ptr = (char *)name;
502     mg->mg_len = -1;
503     (void) SvUPGRADE(sv, SVt_PVMG);
504     mg->mg_moremagic = SvMAGIC(sv);
505     SvMAGIC_set(sv, mg);
506     SvMAGICAL_off(sv);
507     mg_magical(sv);
508     return mg;
509 }
510 #endif
511 
512 #define NEW_STORABLE_CXT_OBJ(cxt)				\
513     STMT_START {						\
514         SV *self = newSV(sizeof(stcxt_t) - 1);                  \
515         SV *my_sv = newRV_noinc(self);                          \
516         sv_magicext(self, NULL, PERL_MAGIC_ext, &vtbl_storable, NULL, 0); \
517         cxt = (stcxt_t *)SvPVX(self);                           \
518         Zero(cxt, 1, stcxt_t);                                  \
519         cxt->my_sv = my_sv;                                     \
520     } STMT_END
521 
522 #if defined(MULTIPLICITY) || defined(PERL_OBJECT) || defined(PERL_CAPI)
523 
524 #if (PATCHLEVEL <= 4) && (SUBVERSION < 68)
525 #define dSTCXT_SV                                               \
526     SV *perinterp_sv = get_sv(MY_VERSION, 0)
527 #else	/* >= perl5.004_68 */
528 #define dSTCXT_SV						\
529     SV *perinterp_sv = *hv_fetch(PL_modglobal,                  \
530 				 MY_VERSION, sizeof(MY_VERSION)-1, TRUE)
531 #endif	/* < perl5.004_68 */
532 
533 #define dSTCXT_PTR(T,name)					\
534     T name = ((perinterp_sv                                     \
535                && SvIOK(perinterp_sv) && SvIVX(perinterp_sv)    \
536                ? (T)SvPVX(SvRV(INT2PTR(SV*,SvIVX(perinterp_sv)))) : (T) 0))
537 #define dSTCXT					\
538     dSTCXT_SV;                                  \
539     dSTCXT_PTR(stcxt_t *, cxt)
540 
541 #define INIT_STCXT					\
542     dSTCXT;                                             \
543     NEW_STORABLE_CXT_OBJ(cxt);                          \
544     assert(perinterp_sv);				\
545     sv_setiv(perinterp_sv, PTR2IV(cxt->my_sv))
546 
547 #define SET_STCXT(x)					\
548     STMT_START {					\
549         dSTCXT_SV;                                      \
550         sv_setiv(perinterp_sv, PTR2IV(x->my_sv));       \
551     } STMT_END
552 
553 #else /* !MULTIPLICITY && !PERL_OBJECT && !PERL_CAPI */
554 
555 static stcxt_t *Context_ptr = NULL;
556 #define dSTCXT			stcxt_t *cxt = Context_ptr
557 #define SET_STCXT(x)		Context_ptr = x
558 #define INIT_STCXT				\
559     dSTCXT;                                     \
560     NEW_STORABLE_CXT_OBJ(cxt);                  \
561     SET_STCXT(cxt)
562 
563 
564 #endif /* MULTIPLICITY || PERL_OBJECT || PERL_CAPI */
565 
566 /*
567  * KNOWN BUG:
568  *   Croaking implies a memory leak, since we don't use setjmp/longjmp
569  *   to catch the exit and free memory used during store or retrieve
570  *   operations.  This is not too difficult to fix, but I need to understand
571  *   how Perl does it, and croaking is exceptional anyway, so I lack the
572  *   motivation to do it.
573  *
574  * The current workaround is to mark the context as dirty when croaking,
575  * so that data structures can be freed whenever we renter Storable code
576  * (but only *then*: it's a workaround, not a fix).
577  *
578  * This is also imperfect, because we don't really know how far they trapped
579  * the croak(), and when we were recursing, we won't be able to clean anything
580  * but the topmost context stacked.
581  */
582 
583 #define CROAK(x)	STMT_START { cxt->s_dirty = 1; croak x; } STMT_END
584 
585 /*
586  * End of "thread-safe" related definitions.
587  */
588 
589 /*
590  * LOW_32BITS
591  *
592  * Keep only the low 32 bits of a pointer (used for tags, which are not
593  * really pointers).
594  */
595 
596 #if PTRSIZE <= 4
597 #define LOW_32BITS(x)	((I32) (x))
598 #else
599 #define LOW_32BITS(x)	((I32) ((STRLEN) (x) & 0xffffffffUL))
600 #endif
601 
602 /*
603  * PTR2TAG(x)
604  *
605  * Convert a pointer into an ntag_t.
606  */
607 
608 #define PTR2TAG(x) ((ntag_t)(x))
609 
610 #define TAG2PTR(x, type) ((y)(x))
611 
612 /*
613  * oI, oS, oC
614  *
615  * Hack for Crays, where sizeof(I32) == 8, and which are big-endians.
616  * Used in the WLEN and RLEN macros.
617  */
618 
619 #if INTSIZE > 4
620 #define oI(x)	((I32 *) ((char *) (x) + 4))
621 #define oS(x)	((x) - 4)
622 #define oL(x)	(x)
623 #define oC(x)	(x = 0)
624 #define CRAY_HACK
625 #else
626 #define oI(x)	(x)
627 #define oS(x)	(x)
628 #define oL(x)	(x)
629 #define oC(x)
630 #endif
631 
632 /*
633  * key buffer handling
634  */
635 #define kbuf	(cxt->keybuf).arena
636 #define ksiz	(cxt->keybuf).asiz
637 #define KBUFINIT()							\
638     STMT_START {							\
639         if (!kbuf) {                                                    \
640             TRACEME(("** allocating kbuf of 128 bytes"));               \
641             New(10003, kbuf, 128, char);                                \
642             ksiz = 128;                                                 \
643         }                                                               \
644     } STMT_END
645 #define KBUFCHK(x)							\
646     STMT_START {							\
647         if (x >= ksiz) {                                                \
648             if (x >= I32_MAX)                                           \
649                 CROAK(("Too large size > I32_MAX"));                    \
650             TRACEME(("** extending kbuf to %d bytes (had %d)",          \
651                      (int)(x+1), (int)ksiz));                           \
652             Renew(kbuf, x+1, char);                                     \
653             ksiz = x+1;                                                 \
654         }                                                               \
655     } STMT_END
656 
657 /*
658  * memory buffer handling
659  */
660 #define mbase	(cxt->membuf).arena
661 #define msiz	(cxt->membuf).asiz
662 #define mptr	(cxt->membuf).aptr
663 #define mend	(cxt->membuf).aend
664 
665 #define MGROW	(1 << 13)
666 #define MMASK	(MGROW - 1)
667 
668 #define round_mgrow(x)	\
669     ((STRLEN) (((STRLEN) (x) + MMASK) & ~MMASK))
670 #define trunc_int(x)	\
671     ((STRLEN) ((STRLEN) (x) & ~(sizeof(int)-1)))
672 #define int_aligned(x)	\
673     ((STRLEN)(x) == trunc_int(x))
674 
675 #define MBUF_INIT(x)							\
676     STMT_START {							\
677         if (!mbase) {                                                   \
678             TRACEME(("** allocating mbase of %d bytes", MGROW));        \
679             New(10003, mbase, (int)MGROW, char);                        \
680             msiz = (STRLEN)MGROW;                                       \
681         }                                                               \
682         mptr = mbase;                                                   \
683         if (x)                                                          \
684             mend = mbase + x;                                           \
685         else                                                            \
686             mend = mbase + msiz;                                        \
687     } STMT_END
688 
689 #define MBUF_TRUNC(x)	mptr = mbase + x
690 #define MBUF_SIZE()	(mptr - mbase)
691 
692 /*
693  * MBUF_SAVE_AND_LOAD
694  * MBUF_RESTORE
695  *
696  * Those macros are used in do_retrieve() to save the current memory
697  * buffer into cxt->msaved, before MBUF_LOAD() can be used to retrieve
698  * data from a string.
699  */
700 #define MBUF_SAVE_AND_LOAD(in)						\
701     STMT_START {							\
702         ASSERT(!cxt->membuf_ro, ("mbase not already saved"));           \
703         cxt->membuf_ro = 1;                                             \
704         TRACEME(("saving mbuf"));                                       \
705         StructCopy(&cxt->membuf, &cxt->msaved, struct extendable);      \
706         MBUF_LOAD(in);                                                  \
707     } STMT_END
708 
709 #define MBUF_RESTORE()							\
710     STMT_START {							\
711         ASSERT(cxt->membuf_ro, ("mbase is read-only"));                 \
712         cxt->membuf_ro = 0;                                             \
713         TRACEME(("restoring mbuf"));                                    \
714         StructCopy(&cxt->msaved, &cxt->membuf, struct extendable);      \
715     } STMT_END
716 
717 /*
718  * Use SvPOKp(), because SvPOK() fails on tainted scalars.
719  * See store_scalar() for other usage of this workaround.
720  */
721 #define MBUF_LOAD(v)						\
722     STMT_START {						\
723         ASSERT(cxt->membuf_ro, ("mbase is read-only"));         \
724         if (!SvPOKp(v))                                         \
725             CROAK(("Not a scalar string"));                     \
726         mptr = mbase = SvPV(v, msiz);                           \
727         mend = mbase + msiz;                                    \
728     } STMT_END
729 
730 #define MBUF_XTEND(x)						\
731     STMT_START {						\
732         STRLEN nsz = (STRLEN) round_mgrow((x)+msiz);            \
733         STRLEN offset = mptr - mbase;                           \
734         ASSERT(!cxt->membuf_ro, ("mbase is not read-only"));    \
735         TRACEME(("** extending mbase from %lu to %lu bytes (wants %lu new)", \
736                  (unsigned long)msiz, (unsigned long)nsz, (unsigned long)(x)));  \
737         Renew(mbase, nsz, char);                                \
738         msiz = nsz;                                             \
739         mptr = mbase + offset;                                  \
740         mend = mbase + nsz;                                     \
741     } STMT_END
742 
743 #define MBUF_CHK(x)				\
744     STMT_START {				\
745         if ((mptr + (x)) > mend)                \
746             MBUF_XTEND(x);                      \
747     } STMT_END
748 
749 #define MBUF_GETC(x)				\
750     STMT_START {				\
751         if (mptr < mend)                        \
752             x = (int) (unsigned char) *mptr++;  \
753         else                                    \
754             return (SV *) 0;                    \
755     } STMT_END
756 
757 #ifdef CRAY_HACK
758 #define MBUF_GETINT(x)				\
759     STMT_START {				\
760         oC(x);                                  \
761         if ((mptr + 4) <= mend) {               \
762             memcpy(oI(&x), mptr, 4);            \
763             mptr += 4;                          \
764         } else                                  \
765             return (SV *) 0;                    \
766     } STMT_END
767 #else
768 #define MBUF_GETINT(x)				\
769     STMT_START {				\
770         if ((mptr + sizeof(int)) <= mend) {     \
771             if (int_aligned(mptr))              \
772                 x = *(int *) mptr;              \
773             else                                \
774                 memcpy(&x, mptr, sizeof(int));  \
775             mptr += sizeof(int);                \
776         } else                                  \
777             return (SV *) 0;                    \
778     } STMT_END
779 #endif
780 
781 #define MBUF_READ(x,s)				\
782     STMT_START {				\
783         if ((mptr + (s)) <= mend) {             \
784             memcpy(x, mptr, s);                 \
785             mptr += s;                          \
786         } else                                  \
787             return (SV *) 0;                    \
788     } STMT_END
789 
790 #define MBUF_SAFEREAD(x,s,z)			\
791     STMT_START {				\
792         if ((mptr + (s)) <= mend) {             \
793             memcpy(x, mptr, s);                 \
794             mptr += s;                          \
795         } else {                                \
796             sv_free(z);                         \
797             return (SV *) 0;                    \
798         }                                       \
799     } STMT_END
800 
801 #define MBUF_SAFEPVREAD(x,s,z)			\
802     STMT_START {				\
803         if ((mptr + (s)) <= mend) {             \
804             memcpy(x, mptr, s);                 \
805             mptr += s;                          \
806         } else {                                \
807             Safefree(z);                        \
808             return (SV *) 0;                    \
809         }                                       \
810     } STMT_END
811 
812 #define MBUF_PUTC(c)				\
813     STMT_START {				\
814         if (mptr < mend)                        \
815             *mptr++ = (char) c;                 \
816         else {                                  \
817             MBUF_XTEND(1);                      \
818             *mptr++ = (char) c;                 \
819         }                                       \
820     } STMT_END
821 
822 #ifdef CRAY_HACK
823 #define MBUF_PUTINT(i)				\
824     STMT_START {				\
825         MBUF_CHK(4);                            \
826         memcpy(mptr, oI(&i), 4);                \
827         mptr += 4;                              \
828     } STMT_END
829 #else
830 #define MBUF_PUTINT(i) 				\
831     STMT_START {				\
832         MBUF_CHK(sizeof(int));                  \
833         if (int_aligned(mptr))                  \
834             *(int *) mptr = i;                  \
835         else                                    \
836             memcpy(mptr, &i, sizeof(int));      \
837         mptr += sizeof(int);                    \
838     } STMT_END
839 #endif
840 
841 #define MBUF_PUTLONG(l)				\
842     STMT_START {				\
843         MBUF_CHK(8);                            \
844         memcpy(mptr, &l, 8);                    \
845         mptr += 8;                              \
846     } STMT_END
847 #define MBUF_WRITE(x,s)				\
848     STMT_START {				\
849         MBUF_CHK(s);                            \
850         memcpy(mptr, x, s);                     \
851         mptr += s;                              \
852     } STMT_END
853 
854 /*
855  * Possible return values for sv_type().
856  */
857 
858 #define svis_REF		0
859 #define svis_SCALAR		1
860 #define svis_ARRAY		2
861 #define svis_HASH		3
862 #define svis_TIED		4
863 #define svis_TIED_ITEM		5
864 #define svis_CODE		6
865 #define svis_REGEXP		7
866 #define svis_OTHER		8
867 
868 /*
869  * Flags for SX_HOOK.
870  */
871 
872 #define SHF_TYPE_MASK		0x03
873 #define SHF_LARGE_CLASSLEN	0x04
874 #define SHF_LARGE_STRLEN	0x08
875 #define SHF_LARGE_LISTLEN	0x10
876 #define SHF_IDX_CLASSNAME	0x20
877 #define SHF_NEED_RECURSE	0x40
878 #define SHF_HAS_LIST		0x80
879 
880 /*
881  * Types for SX_HOOK (last 2 bits in flags).
882  */
883 
884 #define SHT_SCALAR		0
885 #define SHT_ARRAY		1
886 #define SHT_HASH		2
887 #define SHT_EXTRA		3	/* Read extra byte for type */
888 
889 /*
890  * The following are held in the "extra byte"...
891  */
892 
893 #define SHT_TSCALAR		4	/* 4 + 0 -- tied scalar */
894 #define SHT_TARRAY		5	/* 4 + 1 -- tied array */
895 #define SHT_THASH		6	/* 4 + 2 -- tied hash */
896 
897 /*
898  * per hash flags for flagged hashes
899  */
900 
901 #define SHV_RESTRICTED		0x01
902 
903 /*
904  * per key flags for flagged hashes
905  */
906 
907 #define SHV_K_UTF8		0x01
908 #define SHV_K_WASUTF8		0x02
909 #define SHV_K_LOCKED		0x04
910 #define SHV_K_ISSV		0x08
911 #define SHV_K_PLACEHOLDER	0x10
912 
913 /*
914  * flags to allow blessing and/or tieing data the data we load
915  */
916 #define FLAG_BLESS_OK 2
917 #define FLAG_TIE_OK   4
918 
919 /*
920  * Flags for SX_REGEXP.
921  */
922 
923 #define SHR_U32_RE_LEN		0x01
924 
925 /*
926  * Before 0.6, the magic string was "perl-store" (binary version number 0).
927  *
928  * Since 0.6 introduced many binary incompatibilities, the magic string has
929  * been changed to "pst0" to allow an old image to be properly retrieved by
930  * a newer Storable, but ensure a newer image cannot be retrieved with an
931  * older version.
932  *
933  * At 0.7, objects are given the ability to serialize themselves, and the
934  * set of markers is extended, backward compatibility is not jeopardized,
935  * so the binary version number could have remained unchanged.  To correctly
936  * spot errors if a file making use of 0.7-specific extensions is given to
937  * 0.6 for retrieval, the binary version was moved to "2".  And I'm introducing
938  * a "minor" version, to better track this kind of evolution from now on.
939  *
940  */
941 static const char old_magicstr[] = "perl-store"; /* Magic number before 0.6 */
942 static const char magicstr[] = "pst0";		 /* Used as a magic number */
943 
944 #define MAGICSTR_BYTES  'p','s','t','0'
945 #define OLDMAGICSTR_BYTES  'p','e','r','l','-','s','t','o','r','e'
946 
947 /* 5.6.x introduced the ability to have IVs as long long.
948    However, Configure still defined BYTEORDER based on the size of a long.
949    Storable uses the BYTEORDER value as part of the header, but doesn't
950    explicitly store sizeof(IV) anywhere in the header.  Hence on 5.6.x built
951    with IV as long long on a platform that uses Configure (ie most things
952    except VMS and Windows) headers are identical for the different IV sizes,
953    despite the files containing some fields based on sizeof(IV)
954    Erk. Broken-ness.
955    5.8 is consistent - the following redefinition kludge is only needed on
956    5.6.x, but the interwork is needed on 5.8 while data survives in files
957    with the 5.6 header.
958 
959 */
960 
961 #if defined (IVSIZE) && (IVSIZE == 8) && (LONGSIZE == 4)
962 #ifndef NO_56_INTERWORK_KLUDGE
963 #define USE_56_INTERWORK_KLUDGE
964 #endif
965 #if BYTEORDER == 0x1234
966 #undef BYTEORDER
967 #define BYTEORDER 0x12345678
968 #else
969 #if BYTEORDER == 0x4321
970 #undef BYTEORDER
971 #define BYTEORDER 0x87654321
972 #endif
973 #endif
974 #endif
975 
976 #if BYTEORDER == 0x1234
977 #define BYTEORDER_BYTES  '1','2','3','4'
978 #else
979 #if BYTEORDER == 0x12345678
980 #define BYTEORDER_BYTES  '1','2','3','4','5','6','7','8'
981 #ifdef USE_56_INTERWORK_KLUDGE
982 #define BYTEORDER_BYTES_56  '1','2','3','4'
983 #endif
984 #else
985 #if BYTEORDER == 0x87654321
986 #define BYTEORDER_BYTES  '8','7','6','5','4','3','2','1'
987 #ifdef USE_56_INTERWORK_KLUDGE
988 #define BYTEORDER_BYTES_56  '4','3','2','1'
989 #endif
990 #else
991 #if BYTEORDER == 0x4321
992 #define BYTEORDER_BYTES  '4','3','2','1'
993 #else
994 #error Unknown byteorder. Please append your byteorder to Storable.xs
995 #endif
996 #endif
997 #endif
998 #endif
999 
1000 #ifndef INT32_MAX
1001 # define INT32_MAX 2147483647
1002 #endif
1003 #if IVSIZE > 4 && !defined(INT64_MAX)
1004 # define INT64_MAX 9223372036854775807LL
1005 #endif
1006 
1007 static const char byteorderstr[] = {BYTEORDER_BYTES, 0};
1008 #ifdef USE_56_INTERWORK_KLUDGE
1009 static const char byteorderstr_56[] = {BYTEORDER_BYTES_56, 0};
1010 #endif
1011 
1012 #define STORABLE_BIN_MAJOR	2		/* Binary major "version" */
1013 #define STORABLE_BIN_MINOR	11		/* Binary minor "version" */
1014 
1015 #if (PATCHLEVEL <= 5)
1016 #define STORABLE_BIN_WRITE_MINOR	4
1017 #elif !defined (SvVOK)
1018 /*
1019  * Perl 5.6.0-5.8.0 can do weak references, but not vstring magic.
1020 */
1021 #define STORABLE_BIN_WRITE_MINOR	8
1022 #elif PATCHLEVEL >= 19
1023 /* Perl 5.19 takes away the special meaning of PL_sv_undef in arrays. */
1024 /* With 3.x we added LOBJECT */
1025 #define STORABLE_BIN_WRITE_MINOR	11
1026 #else
1027 #define STORABLE_BIN_WRITE_MINOR	9
1028 #endif /* (PATCHLEVEL <= 5) */
1029 
1030 #if (PATCHLEVEL < 8 || (PATCHLEVEL == 8 && SUBVERSION < 1))
1031 #define PL_sv_placeholder PL_sv_undef
1032 #endif
1033 
1034 /*
1035  * Useful store shortcuts...
1036  */
1037 
1038 /*
1039  * Note that if you put more than one mark for storing a particular
1040  * type of thing, *and* in the retrieve_foo() function you mark both
1041  * the thingy's you get off with SEEN(), you *must* increase the
1042  * tagnum with cxt->tagnum++ along with this macro!
1043  *     - samv 20Jan04
1044  */
1045 #define PUTMARK(x) 					\
1046     STMT_START {					\
1047         if (!cxt->fio)                                  \
1048             MBUF_PUTC(x);                               \
1049         else if (PerlIO_putc(cxt->fio, x) == EOF)       \
1050             return -1;                                  \
1051     } STMT_END
1052 
1053 #define WRITE_I32(x)						\
1054     STMT_START {						\
1055         ASSERT(sizeof(x) == sizeof(I32), ("writing an I32"));   \
1056         if (!cxt->fio)                                          \
1057             MBUF_PUTINT(x);                                     \
1058         else if (PerlIO_write(cxt->fio, oI(&x),                 \
1059                               oS(sizeof(x))) != oS(sizeof(x)))  \
1060             return -1;                                          \
1061     } STMT_END
1062 
1063 #define WRITE_U64(x)							\
1064     STMT_START {							\
1065         ASSERT(sizeof(x) == sizeof(UV), ("writing an UV"));		\
1066         if (!cxt->fio)                                                  \
1067             MBUF_PUTLONG(x);                                            \
1068         else if (PerlIO_write(cxt->fio, oL(&x),                         \
1069                               oS(sizeof(x))) != oS(sizeof(x)))          \
1070             return -1;                                                  \
1071     } STMT_END
1072 
1073 #ifdef HAS_HTONL
1074 #define WLEN(x)                                                         \
1075     STMT_START {							\
1076         ASSERT(sizeof(x) == sizeof(int), ("WLEN writing an int"));      \
1077         if (cxt->netorder) {                                            \
1078             int y = (int) htonl(x);                                     \
1079             if (!cxt->fio)                                              \
1080                 MBUF_PUTINT(y);                                         \
1081             else if (PerlIO_write(cxt->fio,oI(&y),oS(sizeof(y))) != oS(sizeof(y))) \
1082                 return -1;                                              \
1083         } else {                                                        \
1084             if (!cxt->fio)                                              \
1085                 MBUF_PUTINT(x);                                         \
1086             else if (PerlIO_write(cxt->fio,oI(&x),                      \
1087                                   oS(sizeof(x))) != oS(sizeof(x)))      \
1088                 return -1;                                              \
1089         }                                                               \
1090     } STMT_END
1091 
1092 #  ifdef HAS_U64
1093 
1094 #define W64LEN(x)							\
1095     STMT_START {							\
1096         ASSERT(sizeof(x) == 8, ("W64LEN writing a U64"));               \
1097         if (cxt->netorder) {                                            \
1098             U32 buf[2];      						\
1099             buf[1] = htonl(x & 0xffffffffUL);                           \
1100             buf[0] = htonl(x >> 32);                                    \
1101             if (!cxt->fio)                                              \
1102                 MBUF_PUTLONG(buf);                                      \
1103             else if (PerlIO_write(cxt->fio, buf,                        \
1104                                   sizeof(buf)) != sizeof(buf))          \
1105                 return -1;                                              \
1106         } else {                                                        \
1107             if (!cxt->fio)                                              \
1108                 MBUF_PUTLONG(x);                                        \
1109             else if (PerlIO_write(cxt->fio,oI(&x),                      \
1110                                   oS(sizeof(x))) != oS(sizeof(x)))      \
1111                 return -1;                                              \
1112         }                                                               \
1113     } STMT_END
1114 
1115 #  else
1116 
1117 #define W64LEN(x) CROAK(("No 64bit UVs"))
1118 
1119 #  endif
1120 
1121 #else
1122 #define WLEN(x)	WRITE_I32(x)
1123 #ifdef HAS_U64
1124 #define W64LEN(x) WRITE_U64(x)
1125 #else
1126 #define W64LEN(x) CROAK(("no 64bit UVs"))
1127 #endif
1128 #endif
1129 
1130 #define WRITE(x,y) 							\
1131     STMT_START {							\
1132         if (!cxt->fio)                                                  \
1133             MBUF_WRITE(x,y);                                            \
1134         else if (PerlIO_write(cxt->fio, x, y) != (SSize_t)y)            \
1135             return -1;                                                  \
1136     } STMT_END
1137 
1138 #define STORE_PV_LEN(pv, len, small, large)			\
1139     STMT_START {						\
1140         if (len <= LG_SCALAR) {                                 \
1141             int ilen = (int) len;                               \
1142             unsigned char clen = (unsigned char) len;           \
1143             PUTMARK(small);                                     \
1144             PUTMARK(clen);                                      \
1145             if (len)                                            \
1146                 WRITE(pv, ilen);                                \
1147         } else if (sizeof(len) > 4 && len > INT32_MAX) {        \
1148             PUTMARK(SX_LOBJECT);                                \
1149             PUTMARK(large);                                     \
1150             W64LEN(len);                                        \
1151             WRITE(pv, len);                                     \
1152         } else {                                                \
1153             int ilen = (int) len;                               \
1154             PUTMARK(large);                                     \
1155             WLEN(ilen);                                         \
1156             WRITE(pv, ilen);                                    \
1157         }                                                       \
1158     } STMT_END
1159 
1160 #define STORE_SCALAR(pv, len)	STORE_PV_LEN(pv, len, SX_SCALAR, SX_LSCALAR)
1161 
1162 /*
1163  * Store &PL_sv_undef in arrays without recursing through store().  We
1164  * actually use this to represent nonexistent elements, for historical
1165  * reasons.
1166  */
1167 #define STORE_SV_UNDEF() 					\
1168     STMT_START {                                                \
1169 	cxt->tagnum++;						\
1170 	PUTMARK(SX_SV_UNDEF);					\
1171     } STMT_END
1172 
1173 /*
1174  * Useful retrieve shortcuts...
1175  */
1176 
1177 #define GETCHAR() \
1178     (cxt->fio ? PerlIO_getc(cxt->fio)                   \
1179               : (mptr >= mend ? EOF : (int) *mptr++))
1180 
1181 #define GETMARK(x)							\
1182     STMT_START {							\
1183         if (!cxt->fio)                                                  \
1184             MBUF_GETC(x);                                               \
1185         else if ((int) (x = PerlIO_getc(cxt->fio)) == EOF)              \
1186             return (SV *) 0;                                            \
1187     } STMT_END
1188 
1189 #define READ_I32(x)							\
1190     STMT_START {							\
1191         ASSERT(sizeof(x) == sizeof(I32), ("reading an I32"));           \
1192         oC(x);                                                          \
1193         if (!cxt->fio)                                                  \
1194             MBUF_GETINT(x);                                             \
1195         else if (PerlIO_read(cxt->fio, oI(&x),                          \
1196                                  oS(sizeof(x))) != oS(sizeof(x)))       \
1197             return (SV *) 0;                                            \
1198     } STMT_END
1199 
1200 #ifdef HAS_NTOHL
1201 #define RLEN(x)                                                         \
1202     STMT_START {							\
1203         oC(x);                                                          \
1204         if (!cxt->fio)                                                  \
1205             MBUF_GETINT(x);                                             \
1206         else if (PerlIO_read(cxt->fio, oI(&x),                          \
1207                                  oS(sizeof(x))) != oS(sizeof(x)))       \
1208             return (SV *) 0;                                            \
1209         if (cxt->netorder)                                              \
1210             x = (int) ntohl(x);                                         \
1211     } STMT_END
1212 #else
1213 #define RLEN(x) READ_I32(x)
1214 #endif
1215 
1216 #define READ(x,y) 							\
1217     STMT_START {							\
1218 	if (!cxt->fio)							\
1219             MBUF_READ(x, y);                                            \
1220 	else if (PerlIO_read(cxt->fio, x, y) != (SSize_t)y)             \
1221             return (SV *) 0;                                            \
1222     } STMT_END
1223 
1224 #define SAFEREAD(x,y,z)                                                 \
1225     STMT_START {							\
1226         if (!cxt->fio)                                                  \
1227             MBUF_SAFEREAD(x,y,z);                                       \
1228         else if (PerlIO_read(cxt->fio, x, y) != (SSize_t)y) {           \
1229             sv_free(z);                                                 \
1230             return (SV *) 0;                                            \
1231         }                                                               \
1232     } STMT_END
1233 
1234 #define SAFEPVREAD(x,y,z)					\
1235     STMT_START {						\
1236         if (!cxt->fio)                                          \
1237             MBUF_SAFEPVREAD(x,y,z);                             \
1238         else if (PerlIO_read(cxt->fio, x, y) != y) {            \
1239             Safefree(z);                                        \
1240             return (SV *) 0;                                    \
1241         }                                                       \
1242     } STMT_END
1243 
1244 #ifdef HAS_U64
1245 
1246 #  if defined(HAS_NTOHL)
1247 #    define Sntohl(x) ntohl(x)
1248 #  elif BYTEORDER == 0x87654321 || BYTEORDER == 0x4321
1249 #    define Sntohl(x) (x)
1250 #  else
Sntohl(U32 x)1251 static U32 Sntohl(U32 x) {
1252     return ((x & 0xFF) << 24) + ((x * 0xFF00) << 8)
1253 	+ ((x & 0xFF0000) >> 8) + ((x & 0xFF000000) >> 24);
1254 }
1255 #  endif
1256 
1257 #  define READ_U64(x)                                                       \
1258     STMT_START {                                                          \
1259 	ASSERT(sizeof(x) == 8, ("R64LEN reading a U64"));                 \
1260 	if (cxt->netorder) {                                              \
1261 	    U32 buf[2];                                                   \
1262 	    READ((void *)buf, sizeof(buf));                               \
1263 	    (x) = ((UV)Sntohl(buf[0]) << 32) + Sntohl(buf[1]);		\
1264 	}                                                                 \
1265 	else {                                                            \
1266 	    READ(&(x), sizeof(x));                                        \
1267 	}                                                                 \
1268     } STMT_END
1269 
1270 #endif
1271 
1272 /*
1273  * SEEN() is used at retrieve time, to remember where object 'y', bearing a
1274  * given tag 'tagnum', has been retrieved. Next time we see an SX_OBJECT marker,
1275  * we'll therefore know where it has been retrieved and will be able to
1276  * share the same reference, as in the original stored memory image.
1277  *
1278  * We also need to bless objects ASAP for hooks (which may compute "ref $x"
1279  * on the objects given to STORABLE_thaw and expect that to be defined), and
1280  * also for overloaded objects (for which we might not find the stash if the
1281  * object is not blessed yet--this might occur for overloaded objects that
1282  * refer to themselves indirectly: if we blessed upon return from a sub
1283  * retrieve(), the SX_OBJECT marker we'd found could not have overloading
1284  * restored on it because the underlying object would not be blessed yet!).
1285  *
1286  * To achieve that, the class name of the last retrieved object is passed down
1287  * recursively, and the first SEEN() call for which the class name is not NULL
1288  * will bless the object.
1289  *
1290  * i should be true iff sv is immortal (ie PL_sv_yes, PL_sv_no or PL_sv_undef)
1291  *
1292  * SEEN0() is a short-cut where stash is always NULL.
1293  *
1294  * The _NN variants dont check for y being null
1295  */
1296 #define SEEN0_NN(y,i)							\
1297     STMT_START {							\
1298         if (av_store(cxt->aseen, cxt->tagnum++, i ? (SV*)(y)            \
1299                      : SvREFCNT_inc(y)) == 0)                           \
1300             return (SV *) 0;                                            \
1301         TRACEME(("aseen(#%d) = 0x%" UVxf " (refcnt=%d)",                \
1302                  (int)cxt->tagnum-1,                                    \
1303                  PTR2UV(y), (int)SvREFCNT(y)-1));                       \
1304     } STMT_END
1305 
1306 #define SEEN0(y,i)							\
1307     STMT_START {							\
1308         if (!y)                                                         \
1309             return (SV *) 0;                                            \
1310         SEEN0_NN(y,i);                                                  \
1311     } STMT_END
1312 
1313 #define SEEN_NN(y,stash,i)						\
1314     STMT_START {							\
1315         SEEN0_NN(y,i);                                                  \
1316         if (stash)							\
1317             BLESS((SV *)(y), (HV *)(stash));                            \
1318     } STMT_END
1319 
1320 #define SEEN(y,stash,i)							\
1321     STMT_START {                                                	\
1322         if (!y)                                                         \
1323             return (SV *) 0;                                            \
1324         SEEN_NN(y,stash, i);                                            \
1325     } STMT_END
1326 
1327 /*
1328  * Bless 's' in 'p', via a temporary reference, required by sv_bless().
1329  * "A" magic is added before the sv_bless for overloaded classes, this avoids
1330  * an expensive call to S_reset_amagic in sv_bless.
1331  */
1332 #define BLESS(s,stash)							\
1333     STMT_START {							\
1334         SV *ref;                                                        \
1335         if (cxt->flags & FLAG_BLESS_OK) {                               \
1336             TRACEME(("blessing 0x%" UVxf " in %s", PTR2UV(s),           \
1337                      HvNAME_get(stash)));                               \
1338             ref = newRV_noinc(s);                                       \
1339             if (cxt->in_retrieve_overloaded && Gv_AMG(stash)) {         \
1340                 cxt->in_retrieve_overloaded = 0;                        \
1341                 SvAMAGIC_on(ref);                                       \
1342             }                                                           \
1343             (void) sv_bless(ref, stash);                                \
1344             SvRV_set(ref, NULL);                                        \
1345             SvREFCNT_dec(ref);                                          \
1346         }                                                               \
1347         else {                                                          \
1348             TRACEME(("not blessing 0x%" UVxf " in %s", PTR2UV(s),       \
1349                      (HvNAME_get(stash))));                             \
1350         }                                                               \
1351     } STMT_END
1352 /*
1353  * sort (used in store_hash) - conditionally use qsort when
1354  * sortsv is not available ( <= 5.6.1 ).
1355  */
1356 
1357 #if (PATCHLEVEL <= 6)
1358 
1359 #if defined(USE_ITHREADS)
1360 
1361 #define STORE_HASH_SORT						\
1362     ENTER; {                                                    \
1363         PerlInterpreter *orig_perl = PERL_GET_CONTEXT;          \
1364         SAVESPTR(orig_perl);                                    \
1365         PERL_SET_CONTEXT(aTHX);                                 \
1366         qsort((char *) AvARRAY(av), len, sizeof(SV *), sortcmp);\
1367     } LEAVE;
1368 
1369 #else /* ! USE_ITHREADS */
1370 
1371 #define STORE_HASH_SORT					\
1372     qsort((char *) AvARRAY(av), len, sizeof(SV *), sortcmp);
1373 
1374 #endif  /* USE_ITHREADS */
1375 
1376 #else /* PATCHLEVEL > 6 */
1377 
1378 #define STORE_HASH_SORT \
1379     sortsv(AvARRAY(av), len, Perl_sv_cmp);
1380 
1381 #endif /* PATCHLEVEL <= 6 */
1382 
1383 static int store(pTHX_ stcxt_t *cxt, SV *sv);
1384 static SV *retrieve(pTHX_ stcxt_t *cxt, const char *cname);
1385 
1386 #define UNSEE()			\
1387     STMT_START {			\
1388         av_pop(cxt->aseen);             \
1389         cxt->tagnum--;                  \
1390     } STMT_END
1391 
1392 /*
1393  * Dynamic dispatching table for SV store.
1394  */
1395 
1396 static int store_ref(pTHX_ stcxt_t *cxt, SV *sv);
1397 static int store_scalar(pTHX_ stcxt_t *cxt, SV *sv);
1398 static int store_array(pTHX_ stcxt_t *cxt, AV *av);
1399 static int store_hash(pTHX_ stcxt_t *cxt, HV *hv);
1400 static int store_tied(pTHX_ stcxt_t *cxt, SV *sv);
1401 static int store_tied_item(pTHX_ stcxt_t *cxt, SV *sv);
1402 static int store_code(pTHX_ stcxt_t *cxt, CV *cv);
1403 static int store_regexp(pTHX_ stcxt_t *cxt, SV *sv);
1404 static int store_other(pTHX_ stcxt_t *cxt, SV *sv);
1405 static int store_blessed(pTHX_ stcxt_t *cxt, SV *sv, int type, HV *pkg);
1406 
1407 typedef int (*sv_store_t)(pTHX_ stcxt_t *cxt, SV *sv);
1408 
1409 static const sv_store_t sv_store[] = {
1410     (sv_store_t)store_ref,	/* svis_REF */
1411     (sv_store_t)store_scalar,	/* svis_SCALAR */
1412     (sv_store_t)store_array,	/* svis_ARRAY */
1413     (sv_store_t)store_hash,	/* svis_HASH */
1414     (sv_store_t)store_tied,	/* svis_TIED */
1415     (sv_store_t)store_tied_item,/* svis_TIED_ITEM */
1416     (sv_store_t)store_code,	/* svis_CODE */
1417     (sv_store_t)store_regexp,	/* svis_REGEXP */
1418     (sv_store_t)store_other,	/* svis_OTHER */
1419 };
1420 
1421 #define SV_STORE(x)	(*sv_store[x])
1422 
1423 /*
1424  * Dynamic dispatching tables for SV retrieval.
1425  */
1426 
1427 static SV *retrieve_lscalar(pTHX_ stcxt_t *cxt, const char *cname);
1428 static SV *retrieve_lutf8str(pTHX_ stcxt_t *cxt, const char *cname);
1429 static SV *old_retrieve_array(pTHX_ stcxt_t *cxt, const char *cname);
1430 static SV *old_retrieve_hash(pTHX_ stcxt_t *cxt, const char *cname);
1431 static SV *retrieve_ref(pTHX_ stcxt_t *cxt, const char *cname);
1432 static SV *retrieve_undef(pTHX_ stcxt_t *cxt, const char *cname);
1433 static SV *retrieve_integer(pTHX_ stcxt_t *cxt, const char *cname);
1434 static SV *retrieve_double(pTHX_ stcxt_t *cxt, const char *cname);
1435 static SV *retrieve_byte(pTHX_ stcxt_t *cxt, const char *cname);
1436 static SV *retrieve_netint(pTHX_ stcxt_t *cxt, const char *cname);
1437 static SV *retrieve_scalar(pTHX_ stcxt_t *cxt, const char *cname);
1438 static SV *retrieve_utf8str(pTHX_ stcxt_t *cxt, const char *cname);
1439 static SV *retrieve_tied_array(pTHX_ stcxt_t *cxt, const char *cname);
1440 static SV *retrieve_tied_hash(pTHX_ stcxt_t *cxt, const char *cname);
1441 static SV *retrieve_tied_scalar(pTHX_ stcxt_t *cxt, const char *cname);
1442 static SV *retrieve_other(pTHX_ stcxt_t *cxt, const char *cname);
1443 static SV *retrieve_lobject(pTHX_ stcxt_t *cxt, const char *cname);
1444 static SV *retrieve_regexp(pTHX_ stcxt_t *cxt, const char *cname);
1445 
1446 /* helpers for U64 lobjects */
1447 
1448 static SV *get_lstring(pTHX_ stcxt_t *cxt, UV len, int isutf8, const char *cname);
1449 #ifdef HAS_U64
1450 static SV *get_larray(pTHX_ stcxt_t *cxt, UV len, const char *cname);
1451 static SV *get_lhash(pTHX_ stcxt_t *cxt, UV len, int hash_flags, const char *cname);
1452 static int store_lhash(pTHX_ stcxt_t *cxt, HV *hv, unsigned char hash_flags);
1453 #endif
1454 static int store_hentry(pTHX_ stcxt_t *cxt, HV* hv, UV i, HE *he, unsigned char hash_flags);
1455 
1456 typedef SV* (*sv_retrieve_t)(pTHX_ stcxt_t *cxt, const char *name);
1457 
1458 static const sv_retrieve_t sv_old_retrieve[] = {
1459     0,					/* SX_OBJECT -- entry unused dynamically */
1460     (sv_retrieve_t)retrieve_lscalar,	/* SX_LSCALAR */
1461     (sv_retrieve_t)old_retrieve_array,	/* SX_ARRAY -- for pre-0.6 binaries */
1462     (sv_retrieve_t)old_retrieve_hash,	/* SX_HASH -- for pre-0.6 binaries */
1463     (sv_retrieve_t)retrieve_ref,	/* SX_REF */
1464     (sv_retrieve_t)retrieve_undef,	/* SX_UNDEF */
1465     (sv_retrieve_t)retrieve_integer,	/* SX_INTEGER */
1466     (sv_retrieve_t)retrieve_double,	/* SX_DOUBLE */
1467     (sv_retrieve_t)retrieve_byte,	/* SX_BYTE */
1468     (sv_retrieve_t)retrieve_netint,	/* SX_NETINT */
1469     (sv_retrieve_t)retrieve_scalar,	/* SX_SCALAR */
1470     (sv_retrieve_t)retrieve_tied_array,	/* SX_TIED_ARRAY */
1471     (sv_retrieve_t)retrieve_tied_hash,	/* SX_TIED_HASH */
1472     (sv_retrieve_t)retrieve_tied_scalar,/* SX_TIED_SCALAR */
1473     (sv_retrieve_t)retrieve_other,	/* SX_SV_UNDEF not supported */
1474     (sv_retrieve_t)retrieve_other,	/* SX_SV_YES not supported */
1475     (sv_retrieve_t)retrieve_other,	/* SX_SV_NO not supported */
1476     (sv_retrieve_t)retrieve_other,	/* SX_BLESS not supported */
1477     (sv_retrieve_t)retrieve_other,	/* SX_IX_BLESS not supported */
1478     (sv_retrieve_t)retrieve_other,	/* SX_HOOK not supported */
1479     (sv_retrieve_t)retrieve_other,	/* SX_OVERLOADED not supported */
1480     (sv_retrieve_t)retrieve_other,	/* SX_TIED_KEY not supported */
1481     (sv_retrieve_t)retrieve_other,	/* SX_TIED_IDX not supported */
1482     (sv_retrieve_t)retrieve_other,	/* SX_UTF8STR not supported */
1483     (sv_retrieve_t)retrieve_other,	/* SX_LUTF8STR not supported */
1484     (sv_retrieve_t)retrieve_other,	/* SX_FLAG_HASH not supported */
1485     (sv_retrieve_t)retrieve_other,	/* SX_CODE not supported */
1486     (sv_retrieve_t)retrieve_other,	/* SX_WEAKREF not supported */
1487     (sv_retrieve_t)retrieve_other,	/* SX_WEAKOVERLOAD not supported */
1488     (sv_retrieve_t)retrieve_other,	/* SX_VSTRING not supported */
1489     (sv_retrieve_t)retrieve_other,	/* SX_LVSTRING not supported */
1490     (sv_retrieve_t)retrieve_other,	/* SX_SVUNDEF_ELEM not supported */
1491     (sv_retrieve_t)retrieve_other,	/* SX_REGEXP */
1492     (sv_retrieve_t)retrieve_other,  	/* SX_LOBJECT not supported */
1493     (sv_retrieve_t)retrieve_other,  	/* SX_LAST */
1494 };
1495 
1496 static SV *retrieve_hook_common(pTHX_ stcxt_t *cxt, const char *cname, int large);
1497 
1498 static SV *retrieve_array(pTHX_ stcxt_t *cxt, const char *cname);
1499 static SV *retrieve_hash(pTHX_ stcxt_t *cxt, const char *cname);
1500 static SV *retrieve_sv_undef(pTHX_ stcxt_t *cxt, const char *cname);
1501 static SV *retrieve_sv_yes(pTHX_ stcxt_t *cxt, const char *cname);
1502 static SV *retrieve_sv_no(pTHX_ stcxt_t *cxt, const char *cname);
1503 static SV *retrieve_blessed(pTHX_ stcxt_t *cxt, const char *cname);
1504 static SV *retrieve_idx_blessed(pTHX_ stcxt_t *cxt, const char *cname);
1505 static SV *retrieve_hook(pTHX_ stcxt_t *cxt, const char *cname);
1506 static SV *retrieve_overloaded(pTHX_ stcxt_t *cxt, const char *cname);
1507 static SV *retrieve_tied_key(pTHX_ stcxt_t *cxt, const char *cname);
1508 static SV *retrieve_tied_idx(pTHX_ stcxt_t *cxt, const char *cname);
1509 static SV *retrieve_flag_hash(pTHX_ stcxt_t *cxt, const char *cname);
1510 static SV *retrieve_code(pTHX_ stcxt_t *cxt, const char *cname);
1511 static SV *retrieve_weakref(pTHX_ stcxt_t *cxt, const char *cname);
1512 static SV *retrieve_weakoverloaded(pTHX_ stcxt_t *cxt, const char *cname);
1513 static SV *retrieve_vstring(pTHX_ stcxt_t *cxt, const char *cname);
1514 static SV *retrieve_lvstring(pTHX_ stcxt_t *cxt, const char *cname);
1515 static SV *retrieve_svundef_elem(pTHX_ stcxt_t *cxt, const char *cname);
1516 
1517 static const sv_retrieve_t sv_retrieve[] = {
1518     0,					/* SX_OBJECT -- entry unused dynamically */
1519     (sv_retrieve_t)retrieve_lscalar,	/* SX_LSCALAR */
1520     (sv_retrieve_t)retrieve_array,	/* SX_ARRAY */
1521     (sv_retrieve_t)retrieve_hash,	/* SX_HASH */
1522     (sv_retrieve_t)retrieve_ref,	/* SX_REF */
1523     (sv_retrieve_t)retrieve_undef,	/* SX_UNDEF */
1524     (sv_retrieve_t)retrieve_integer,	/* SX_INTEGER */
1525     (sv_retrieve_t)retrieve_double,	/* SX_DOUBLE */
1526     (sv_retrieve_t)retrieve_byte,	/* SX_BYTE */
1527     (sv_retrieve_t)retrieve_netint,	/* SX_NETINT */
1528     (sv_retrieve_t)retrieve_scalar,	/* SX_SCALAR */
1529     (sv_retrieve_t)retrieve_tied_array,	/* SX_TIED_ARRAY */
1530     (sv_retrieve_t)retrieve_tied_hash,	/* SX_TIED_HASH */
1531     (sv_retrieve_t)retrieve_tied_scalar,/* SX_TIED_SCALAR */
1532     (sv_retrieve_t)retrieve_sv_undef,	/* SX_SV_UNDEF */
1533     (sv_retrieve_t)retrieve_sv_yes,	/* SX_SV_YES */
1534     (sv_retrieve_t)retrieve_sv_no,	/* SX_SV_NO */
1535     (sv_retrieve_t)retrieve_blessed,	/* SX_BLESS */
1536     (sv_retrieve_t)retrieve_idx_blessed,/* SX_IX_BLESS */
1537     (sv_retrieve_t)retrieve_hook,	/* SX_HOOK */
1538     (sv_retrieve_t)retrieve_overloaded,	/* SX_OVERLOAD */
1539     (sv_retrieve_t)retrieve_tied_key,	/* SX_TIED_KEY */
1540     (sv_retrieve_t)retrieve_tied_idx,	/* SX_TIED_IDX */
1541     (sv_retrieve_t)retrieve_utf8str,	/* SX_UTF8STR  */
1542     (sv_retrieve_t)retrieve_lutf8str,	/* SX_LUTF8STR */
1543     (sv_retrieve_t)retrieve_flag_hash,	/* SX_HASH */
1544     (sv_retrieve_t)retrieve_code,	/* SX_CODE */
1545     (sv_retrieve_t)retrieve_weakref,	/* SX_WEAKREF */
1546     (sv_retrieve_t)retrieve_weakoverloaded,/* SX_WEAKOVERLOAD */
1547     (sv_retrieve_t)retrieve_vstring,	/* SX_VSTRING */
1548     (sv_retrieve_t)retrieve_lvstring,	/* SX_LVSTRING */
1549     (sv_retrieve_t)retrieve_svundef_elem,/* SX_SVUNDEF_ELEM */
1550     (sv_retrieve_t)retrieve_regexp,	/* SX_REGEXP */
1551     (sv_retrieve_t)retrieve_lobject,	/* SX_LOBJECT */
1552     (sv_retrieve_t)retrieve_other,  	/* SX_LAST */
1553 };
1554 
1555 #define RETRIEVE(c,x) ((x) >= SX_LAST ? retrieve_other : *(c)->retrieve_vtbl[x])
1556 
1557 static SV *mbuf2sv(pTHX);
1558 
1559 /***
1560  *** Context management.
1561  ***/
1562 
1563 /*
1564  * init_perinterp
1565  *
1566  * Called once per "thread" (interpreter) to initialize some global context.
1567  */
init_perinterp(pTHX)1568 static void init_perinterp(pTHX)
1569 {
1570     INIT_STCXT;
1571     INIT_TRACEME;
1572     cxt->netorder = 0;		/* true if network order used */
1573     cxt->forgive_me = -1;	/* whether to be forgiving... */
1574     cxt->accept_future_minor = -1; /* would otherwise occur too late */
1575 }
1576 
1577 /*
1578  * reset_context
1579  *
1580  * Called at the end of every context cleaning, to perform common reset
1581  * operations.
1582  */
reset_context(stcxt_t * cxt)1583 static void reset_context(stcxt_t *cxt)
1584 {
1585     cxt->entry = 0;
1586     cxt->s_dirty = 0;
1587     cxt->recur_sv = NULL;
1588     cxt->recur_depth = 0;
1589     cxt->optype &= ~(ST_STORE|ST_RETRIEVE);	/* Leave ST_CLONE alone */
1590 }
1591 
1592 /*
1593  * init_store_context
1594  *
1595  * Initialize a new store context for real recursion.
1596  */
init_store_context(pTHX_ stcxt_t * cxt,PerlIO * f,int optype,int network_order)1597 static void init_store_context(pTHX_
1598 	stcxt_t *cxt,
1599         PerlIO *f,
1600         int optype,
1601         int network_order)
1602 {
1603     INIT_TRACEME;
1604 
1605     TRACEME(("init_store_context"));
1606 
1607     cxt->netorder = network_order;
1608     cxt->forgive_me = -1;		/* Fetched from perl if needed */
1609     cxt->deparse = -1;			/* Idem */
1610     cxt->eval = NULL;			/* Idem */
1611     cxt->canonical = -1;		/* Idem */
1612     cxt->tagnum = -1;			/* Reset tag numbers */
1613     cxt->classnum = -1;			/* Reset class numbers */
1614     cxt->fio = f;			/* Where I/O are performed */
1615     cxt->optype = optype;		/* A store, or a deep clone */
1616     cxt->entry = 1;			/* No recursion yet */
1617 
1618     /*
1619      * The 'hseen' table is used to keep track of each SV stored and their
1620      * associated tag numbers is special. It is "abused" because the
1621      * values stored are not real SV, just integers cast to (SV *),
1622      * which explains the freeing below.
1623      *
1624      * It is also one possible bottleneck to achieve good storing speed,
1625      * so the "shared keys" optimization is turned off (unlikely to be
1626      * of any use here), and the hash table is "pre-extended". Together,
1627      * those optimizations increase the throughput by 12%.
1628      */
1629 
1630 #ifdef USE_PTR_TABLE
1631     cxt->pseen = ptr_table_new();
1632     cxt->hseen = 0;
1633 #else
1634     cxt->hseen = newHV();	/* Table where seen objects are stored */
1635     HvSHAREKEYS_off(cxt->hseen);
1636 #endif
1637     /*
1638      * The following does not work well with perl5.004_04, and causes
1639      * a core dump later on, in a completely unrelated spot, which
1640      * makes me think there is a memory corruption going on.
1641      *
1642      * Calling hv_ksplit(hseen, HBUCKETS) instead of manually hacking
1643      * it below does not make any difference. It seems to work fine
1644      * with perl5.004_68 but given the probable nature of the bug,
1645      * that does not prove anything.
1646      *
1647      * It's a shame because increasing the amount of buckets raises
1648      * store() throughput by 5%, but until I figure this out, I can't
1649      * allow for this to go into production.
1650      *
1651      * It is reported fixed in 5.005, hence the #if.
1652      */
1653 #if PERL_VERSION >= 5
1654 #define HBUCKETS	4096		/* Buckets for %hseen */
1655 #ifndef USE_PTR_TABLE
1656     HvMAX(cxt->hseen) = HBUCKETS - 1;	/* keys %hseen = $HBUCKETS; */
1657 #endif
1658 #endif
1659 
1660     /*
1661      * The 'hclass' hash uses the same settings as 'hseen' above, but it is
1662      * used to assign sequential tags (numbers) to class names for blessed
1663      * objects.
1664      *
1665      * We turn the shared key optimization on.
1666      */
1667 
1668     cxt->hclass = newHV();		/* Where seen classnames are stored */
1669 
1670 #if PERL_VERSION >= 5
1671     HvMAX(cxt->hclass) = HBUCKETS - 1;	/* keys %hclass = $HBUCKETS; */
1672 #endif
1673 
1674     /*
1675      * The 'hook' hash table is used to keep track of the references on
1676      * the STORABLE_freeze hook routines, when found in some class name.
1677      *
1678      * It is assumed that the inheritance tree will not be changed during
1679      * storing, and that no new method will be dynamically created by the
1680      * hooks.
1681      */
1682 
1683     cxt->hook = newHV();		/* Table where hooks are cached */
1684 
1685     /*
1686      * The 'hook_seen' array keeps track of all the SVs returned by
1687      * STORABLE_freeze hooks for us to serialize, so that they are not
1688      * reclaimed until the end of the serialization process.  Each SV is
1689      * only stored once, the first time it is seen.
1690      */
1691 
1692     cxt->hook_seen = newAV(); /* Lists SVs returned by STORABLE_freeze */
1693 
1694     cxt->max_recur_depth = SvIV(get_sv("Storable::recursion_limit", GV_ADD));
1695     cxt->max_recur_depth_hash = SvIV(get_sv("Storable::recursion_limit_hash", GV_ADD));
1696 }
1697 
1698 /*
1699  * clean_store_context
1700  *
1701  * Clean store context by
1702  */
clean_store_context(pTHX_ stcxt_t * cxt)1703 static void clean_store_context(pTHX_ stcxt_t *cxt)
1704 {
1705     HE *he;
1706 
1707     TRACEMED(("clean_store_context"));
1708 
1709     ASSERT(cxt->optype & ST_STORE, ("was performing a store()"));
1710 
1711     /*
1712      * Insert real values into hashes where we stored faked pointers.
1713      */
1714 
1715 #ifndef USE_PTR_TABLE
1716     if (cxt->hseen) {
1717         hv_iterinit(cxt->hseen);
1718         while ((he = hv_iternext(cxt->hseen)))	/* Extra () for -Wall */
1719             HeVAL(he) = &PL_sv_undef;
1720     }
1721 #endif
1722 
1723     if (cxt->hclass) {
1724         hv_iterinit(cxt->hclass);
1725         while ((he = hv_iternext(cxt->hclass))) /* Extra () for -Wall */
1726             HeVAL(he) = &PL_sv_undef;
1727     }
1728 
1729     /*
1730      * And now dispose of them...
1731      *
1732      * The surrounding if() protection has been added because there might be
1733      * some cases where this routine is called more than once, during
1734      * exceptional events.  This was reported by Marc Lehmann when Storable
1735      * is executed from mod_perl, and the fix was suggested by him.
1736      * 		-- RAM, 20/12/2000
1737      */
1738 
1739 #ifdef USE_PTR_TABLE
1740     if (cxt->pseen) {
1741         struct ptr_tbl *pseen = cxt->pseen;
1742         cxt->pseen = 0;
1743         ptr_table_free(pseen);
1744     }
1745     assert(!cxt->hseen);
1746 #else
1747     if (cxt->hseen) {
1748         HV *hseen = cxt->hseen;
1749         cxt->hseen = 0;
1750         hv_undef(hseen);
1751         sv_free((SV *) hseen);
1752     }
1753 #endif
1754 
1755     if (cxt->hclass) {
1756         HV *hclass = cxt->hclass;
1757         cxt->hclass = 0;
1758         hv_undef(hclass);
1759         sv_free((SV *) hclass);
1760     }
1761 
1762     if (cxt->hook) {
1763         HV *hook = cxt->hook;
1764         cxt->hook = 0;
1765         hv_undef(hook);
1766         sv_free((SV *) hook);
1767     }
1768 
1769     if (cxt->hook_seen) {
1770         AV *hook_seen = cxt->hook_seen;
1771         cxt->hook_seen = 0;
1772         av_undef(hook_seen);
1773         sv_free((SV *) hook_seen);
1774     }
1775 
1776     cxt->forgive_me = -1;	/* Fetched from perl if needed */
1777     cxt->deparse = -1;		/* Idem */
1778     if (cxt->eval) {
1779         SvREFCNT_dec(cxt->eval);
1780     }
1781     cxt->eval = NULL;		/* Idem */
1782     cxt->canonical = -1;	/* Idem */
1783 
1784     reset_context(cxt);
1785 }
1786 
1787 /*
1788  * init_retrieve_context
1789  *
1790  * Initialize a new retrieve context for real recursion.
1791  */
init_retrieve_context(pTHX_ stcxt_t * cxt,int optype,int is_tainted)1792 static void init_retrieve_context(pTHX_
1793 	stcxt_t *cxt, int optype, int is_tainted)
1794 {
1795     INIT_TRACEME;
1796 
1797     TRACEME(("init_retrieve_context"));
1798 
1799     /*
1800      * The hook hash table is used to keep track of the references on
1801      * the STORABLE_thaw hook routines, when found in some class name.
1802      *
1803      * It is assumed that the inheritance tree will not be changed during
1804      * storing, and that no new method will be dynamically created by the
1805      * hooks.
1806      */
1807 
1808     cxt->hook  = newHV();			/* Caches STORABLE_thaw */
1809 
1810 #ifdef USE_PTR_TABLE
1811     cxt->pseen = 0;
1812 #endif
1813 
1814     /*
1815      * If retrieving an old binary version, the cxt->retrieve_vtbl variable
1816      * was set to sv_old_retrieve. We'll need a hash table to keep track of
1817      * the correspondence between the tags and the tag number used by the
1818      * new retrieve routines.
1819      */
1820 
1821     cxt->hseen = (((void*)cxt->retrieve_vtbl == (void*)sv_old_retrieve)
1822                   ? newHV() : 0);
1823 
1824     cxt->aseen = newAV();	/* Where retrieved objects are kept */
1825     cxt->where_is_undef = UNSET_NTAG_T;	/* Special case for PL_sv_undef */
1826     cxt->aclass = newAV();	/* Where seen classnames are kept */
1827     cxt->tagnum = 0;		/* Have to count objects... */
1828     cxt->classnum = 0;		/* ...and class names as well */
1829     cxt->optype = optype;
1830     cxt->s_tainted = is_tainted;
1831     cxt->entry = 1;		/* No recursion yet */
1832 #ifndef HAS_RESTRICTED_HASHES
1833     cxt->derestrict = -1;	/* Fetched from perl if needed */
1834 #endif
1835 #ifndef HAS_UTF8_ALL
1836     cxt->use_bytes = -1;	/* Fetched from perl if needed */
1837 #endif
1838     cxt->accept_future_minor = -1;/* Fetched from perl if needed */
1839     cxt->in_retrieve_overloaded = 0;
1840 
1841     cxt->max_recur_depth = SvIV(get_sv("Storable::recursion_limit", GV_ADD));
1842     cxt->max_recur_depth_hash = SvIV(get_sv("Storable::recursion_limit_hash", GV_ADD));
1843 }
1844 
1845 /*
1846  * clean_retrieve_context
1847  *
1848  * Clean retrieve context by
1849  */
clean_retrieve_context(pTHX_ stcxt_t * cxt)1850 static void clean_retrieve_context(pTHX_ stcxt_t *cxt)
1851 {
1852     TRACEMED(("clean_retrieve_context"));
1853 
1854     ASSERT(cxt->optype & ST_RETRIEVE, ("was performing a retrieve()"));
1855 
1856     if (cxt->aseen) {
1857         AV *aseen = cxt->aseen;
1858         cxt->aseen = 0;
1859         av_undef(aseen);
1860         sv_free((SV *) aseen);
1861     }
1862     cxt->where_is_undef = UNSET_NTAG_T;
1863 
1864     if (cxt->aclass) {
1865         AV *aclass = cxt->aclass;
1866         cxt->aclass = 0;
1867         av_undef(aclass);
1868         sv_free((SV *) aclass);
1869     }
1870 
1871     if (cxt->hook) {
1872         HV *hook = cxt->hook;
1873         cxt->hook = 0;
1874         hv_undef(hook);
1875         sv_free((SV *) hook);
1876     }
1877 
1878     if (cxt->hseen) {
1879         HV *hseen = cxt->hseen;
1880         cxt->hseen = 0;
1881         hv_undef(hseen);
1882         sv_free((SV *) hseen);	/* optional HV, for backward compat. */
1883     }
1884 
1885 #ifndef HAS_RESTRICTED_HASHES
1886     cxt->derestrict = -1;		/* Fetched from perl if needed */
1887 #endif
1888 #ifndef HAS_UTF8_ALL
1889     cxt->use_bytes = -1;		/* Fetched from perl if needed */
1890 #endif
1891     cxt->accept_future_minor = -1;	/* Fetched from perl if needed */
1892 
1893     cxt->in_retrieve_overloaded = 0;
1894     reset_context(cxt);
1895 }
1896 
1897 /*
1898  * clean_context
1899  *
1900  * A workaround for the CROAK bug: cleanup the last context.
1901  */
clean_context(pTHX_ stcxt_t * cxt)1902 static void clean_context(pTHX_ stcxt_t *cxt)
1903 {
1904     TRACEMED(("clean_context"));
1905 
1906     ASSERT(cxt->s_dirty, ("dirty context"));
1907 
1908     if (cxt->membuf_ro)
1909         MBUF_RESTORE();
1910 
1911     ASSERT(!cxt->membuf_ro, ("mbase is not read-only"));
1912 
1913     if (cxt->optype & ST_RETRIEVE)
1914         clean_retrieve_context(aTHX_ cxt);
1915     else if (cxt->optype & ST_STORE)
1916         clean_store_context(aTHX_ cxt);
1917     else
1918         reset_context(cxt);
1919 
1920     ASSERT(!cxt->s_dirty, ("context is clean"));
1921     ASSERT(cxt->entry == 0, ("context is reset"));
1922 }
1923 
1924 /*
1925  * allocate_context
1926  *
1927  * Allocate a new context and push it on top of the parent one.
1928  * This new context is made globally visible via SET_STCXT().
1929  */
allocate_context(pTHX_ stcxt_t * parent_cxt)1930 static stcxt_t *allocate_context(pTHX_ stcxt_t *parent_cxt)
1931 {
1932     stcxt_t *cxt;
1933 
1934     ASSERT(!parent_cxt->s_dirty, ("parent context clean"));
1935 
1936     NEW_STORABLE_CXT_OBJ(cxt);
1937     TRACEMED(("allocate_context"));
1938 
1939     cxt->prev = parent_cxt->my_sv;
1940     SET_STCXT(cxt);
1941 
1942     ASSERT(!cxt->s_dirty, ("clean context"));
1943 
1944     return cxt;
1945 }
1946 
1947 /*
1948  * free_context
1949  *
1950  * Free current context, which cannot be the "root" one.
1951  * Make the context underneath globally visible via SET_STCXT().
1952  */
free_context(pTHX_ stcxt_t * cxt)1953 static void free_context(pTHX_ stcxt_t *cxt)
1954 {
1955     stcxt_t *prev = (stcxt_t *)(cxt->prev ? SvPVX(SvRV(cxt->prev)) : 0);
1956 
1957     TRACEMED(("free_context"));
1958 
1959     ASSERT(!cxt->s_dirty, ("clean context"));
1960     ASSERT(prev, ("not freeing root context"));
1961     assert(prev);
1962 
1963     SvREFCNT_dec(cxt->my_sv);
1964     SET_STCXT(prev);
1965 
1966     ASSERT(cxt, ("context not void"));
1967 }
1968 
1969 /***
1970  *** Predicates.
1971  ***/
1972 
1973 /* these two functions are currently only used within asserts */
1974 #ifdef DASSERT
1975 /*
1976  * is_storing
1977  *
1978  * Tells whether we're in the middle of a store operation.
1979  */
is_storing(pTHX)1980 static int is_storing(pTHX)
1981 {
1982     dSTCXT;
1983 
1984     return cxt->entry && (cxt->optype & ST_STORE);
1985 }
1986 
1987 /*
1988  * is_retrieving
1989  *
1990  * Tells whether we're in the middle of a retrieve operation.
1991  */
is_retrieving(pTHX)1992 static int is_retrieving(pTHX)
1993 {
1994     dSTCXT;
1995 
1996     return cxt->entry && (cxt->optype & ST_RETRIEVE);
1997 }
1998 #endif
1999 
2000 /*
2001  * last_op_in_netorder
2002  *
2003  * Returns whether last operation was made using network order.
2004  *
2005  * This is typically out-of-band information that might prove useful
2006  * to people wishing to convert native to network order data when used.
2007  */
last_op_in_netorder(pTHX)2008 static int last_op_in_netorder(pTHX)
2009 {
2010     dSTCXT;
2011 
2012     assert(cxt);
2013     return cxt->netorder;
2014 }
2015 
2016 /***
2017  *** Hook lookup and calling routines.
2018  ***/
2019 
2020 /*
2021  * pkg_fetchmeth
2022  *
2023  * A wrapper on gv_fetchmethod_autoload() which caches results.
2024  *
2025  * Returns the routine reference as an SV*, or null if neither the package
2026  * nor its ancestors know about the method.
2027  */
pkg_fetchmeth(pTHX_ HV * cache,HV * pkg,const char * method)2028 static SV *pkg_fetchmeth(pTHX_
2029 	HV *cache,
2030 	HV *pkg,
2031 	const char *method)
2032 {
2033     GV *gv;
2034     SV *sv;
2035     const char *hvname = HvNAME_get(pkg);
2036 #ifdef DEBUGME
2037     dSTCXT;
2038 #endif
2039 
2040     /*
2041      * The following code is the same as the one performed by UNIVERSAL::can
2042      * in the Perl core.
2043      */
2044 
2045     gv = gv_fetchmethod_autoload(pkg, method, FALSE);
2046     if (gv && isGV(gv)) {
2047         sv = newRV_inc((SV*) GvCV(gv));
2048         TRACEME(("%s->%s: 0x%" UVxf, hvname, method, PTR2UV(sv)));
2049     } else {
2050         sv = newSVsv(&PL_sv_undef);
2051         TRACEME(("%s->%s: not found", hvname, method));
2052     }
2053 
2054     /*
2055      * Cache the result, ignoring failure: if we can't store the value,
2056      * it just won't be cached.
2057      */
2058 
2059     (void) hv_store(cache, hvname, strlen(hvname), sv, 0);
2060 
2061     return SvOK(sv) ? sv : (SV *) 0;
2062 }
2063 
2064 /*
2065  * pkg_hide
2066  *
2067  * Force cached value to be undef: hook ignored even if present.
2068  */
pkg_hide(pTHX_ HV * cache,HV * pkg,const char * method)2069 static void pkg_hide(pTHX_
2070 	HV *cache,
2071 	HV *pkg,
2072 	const char *method)
2073 {
2074     const char *hvname = HvNAME_get(pkg);
2075     PERL_UNUSED_ARG(method);
2076     (void) hv_store(cache,
2077                     hvname, strlen(hvname), newSVsv(&PL_sv_undef), 0);
2078 }
2079 
2080 /*
2081  * pkg_uncache
2082  *
2083  * Discard cached value: a whole fetch loop will be retried at next lookup.
2084  */
pkg_uncache(pTHX_ HV * cache,HV * pkg,const char * method)2085 static void pkg_uncache(pTHX_
2086 	HV *cache,
2087 	HV *pkg,
2088 	const char *method)
2089 {
2090     const char *hvname = HvNAME_get(pkg);
2091     PERL_UNUSED_ARG(method);
2092     (void) hv_delete(cache, hvname, strlen(hvname), G_DISCARD);
2093 }
2094 
2095 /*
2096  * pkg_can
2097  *
2098  * Our own "UNIVERSAL::can", which caches results.
2099  *
2100  * Returns the routine reference as an SV*, or null if the object does not
2101  * know about the method.
2102  */
pkg_can(pTHX_ HV * cache,HV * pkg,const char * method)2103 static SV *pkg_can(pTHX_
2104 	HV *cache,
2105 	HV *pkg,
2106 	const char *method)
2107 {
2108     SV **svh;
2109     SV *sv;
2110     const char *hvname = HvNAME_get(pkg);
2111 #ifdef DEBUGME
2112     dSTCXT;
2113 #endif
2114 
2115     TRACEME(("pkg_can for %s->%s", hvname, method));
2116 
2117     /*
2118      * Look into the cache to see whether we already have determined
2119      * where the routine was, if any.
2120      *
2121      * NOTA BENE: we don't use 'method' at all in our lookup, since we know
2122      * that only one hook (i.e. always the same) is cached in a given cache.
2123      */
2124 
2125     svh = hv_fetch(cache, hvname, strlen(hvname), FALSE);
2126     if (svh) {
2127         sv = *svh;
2128         if (!SvOK(sv)) {
2129             TRACEME(("cached %s->%s: not found", hvname, method));
2130             return (SV *) 0;
2131         } else {
2132             TRACEME(("cached %s->%s: 0x%" UVxf,
2133                      hvname, method, PTR2UV(sv)));
2134             return sv;
2135         }
2136     }
2137 
2138     TRACEME(("not cached yet"));
2139     return pkg_fetchmeth(aTHX_ cache, pkg, method);	/* Fetch and cache */
2140 }
2141 
2142 /*
2143  * scalar_call
2144  *
2145  * Call routine as obj->hook(av) in scalar context.
2146  * Propagates the single returned value if not called in void context.
2147  */
scalar_call(pTHX_ SV * obj,SV * hook,int cloning,AV * av,I32 flags)2148 static SV *scalar_call(pTHX_
2149 	SV *obj,
2150 	SV *hook,
2151 	int cloning,
2152 	AV *av,
2153 	I32 flags)
2154 {
2155     dSP;
2156     int count;
2157     SV *sv = 0;
2158 #ifdef DEBUGME
2159     dSTCXT;
2160 #endif
2161 
2162     TRACEME(("scalar_call (cloning=%d)", cloning));
2163 
2164     ENTER;
2165     SAVETMPS;
2166 
2167     PUSHMARK(sp);
2168     XPUSHs(obj);
2169     XPUSHs(sv_2mortal(newSViv(cloning)));		/* Cloning flag */
2170     if (av) {
2171         SV **ary = AvARRAY(av);
2172         SSize_t cnt = AvFILLp(av) + 1;
2173         SSize_t i;
2174         XPUSHs(ary[0]);					/* Frozen string */
2175         for (i = 1; i < cnt; i++) {
2176             TRACEME(("pushing arg #%d (0x%" UVxf ")...",
2177                      (int)i, PTR2UV(ary[i])));
2178             XPUSHs(sv_2mortal(newRV_inc(ary[i])));
2179         }
2180     }
2181     PUTBACK;
2182 
2183     TRACEME(("calling..."));
2184     count = call_sv(hook, flags);	/* Go back to Perl code */
2185     TRACEME(("count = %d", count));
2186 
2187     SPAGAIN;
2188 
2189     if (count) {
2190         sv = POPs;
2191         SvREFCNT_inc(sv); /* We're returning it, must stay alive! */
2192     }
2193 
2194     PUTBACK;
2195     FREETMPS;
2196     LEAVE;
2197 
2198     return sv;
2199 }
2200 
2201 /*
2202  * array_call
2203  *
2204  * Call routine obj->hook(cloning) in list context.
2205  * Returns the list of returned values in an array.
2206  */
array_call(pTHX_ SV * obj,SV * hook,int cloning)2207 static AV *array_call(pTHX_
2208 	SV *obj,
2209 	SV *hook,
2210 	int cloning)
2211 {
2212     dSP;
2213     int count;
2214     AV *av;
2215     int i;
2216 #ifdef DEBUGME
2217     dSTCXT;
2218 #endif
2219 
2220     TRACEME(("array_call (cloning=%d)", cloning));
2221 
2222     ENTER;
2223     SAVETMPS;
2224 
2225     PUSHMARK(sp);
2226     XPUSHs(obj);				/* Target object */
2227     XPUSHs(sv_2mortal(newSViv(cloning)));	/* Cloning flag */
2228     PUTBACK;
2229 
2230     count = call_sv(hook, G_ARRAY);	/* Go back to Perl code */
2231 
2232     SPAGAIN;
2233 
2234     av = newAV();
2235     for (i = count - 1; i >= 0; i--) {
2236         SV *sv = POPs;
2237         av_store(av, i, SvREFCNT_inc(sv));
2238     }
2239 
2240     PUTBACK;
2241     FREETMPS;
2242     LEAVE;
2243 
2244     return av;
2245 }
2246 
2247 #if PERL_VERSION < 15
2248 static void
cleanup_recursive_av(pTHX_ AV * av)2249 cleanup_recursive_av(pTHX_ AV* av) {
2250     SSize_t i = AvFILLp(av);
2251     SV** arr = AvARRAY(av);
2252     if (SvMAGICAL(av)) return;
2253     while (i >= 0) {
2254         if (arr[i]) {
2255 #if PERL_VERSION < 14
2256             arr[i] = NULL;
2257 #else
2258             SvREFCNT_dec(arr[i]);
2259 #endif
2260         }
2261         i--;
2262     }
2263 }
2264 
2265 #ifndef SvREFCNT_IMMORTAL
2266 #ifdef DEBUGGING
2267    /* exercise the immortal resurrection code in sv_free2() */
2268 #  define SvREFCNT_IMMORTAL 1000
2269 #else
2270 #  define SvREFCNT_IMMORTAL ((~(U32)0)/2)
2271 #endif
2272 #endif
2273 
2274 static void
cleanup_recursive_hv(pTHX_ HV * hv)2275 cleanup_recursive_hv(pTHX_ HV* hv) {
2276     SSize_t i = HvTOTALKEYS(hv);
2277     HE** arr = HvARRAY(hv);
2278     if (SvMAGICAL(hv)) return;
2279     while (i >= 0) {
2280         if (arr[i]) {
2281             SvREFCNT(HeVAL(arr[i])) = SvREFCNT_IMMORTAL;
2282             arr[i] = NULL; /* let it leak. too dangerous to clean it up here */
2283         }
2284         i--;
2285     }
2286 #if PERL_VERSION < 8
2287     ((XPVHV*)SvANY(hv))->xhv_array = NULL;
2288 #else
2289     HvARRAY(hv) = NULL;
2290 #endif
2291     HvTOTALKEYS(hv) = 0;
2292 }
2293 static void
cleanup_recursive_rv(pTHX_ SV * sv)2294 cleanup_recursive_rv(pTHX_ SV* sv) {
2295     if (sv && SvROK(sv))
2296         SvREFCNT_dec(SvRV(sv));
2297 }
2298 static void
cleanup_recursive_data(pTHX_ SV * sv)2299 cleanup_recursive_data(pTHX_ SV* sv) {
2300     if (SvTYPE(sv) == SVt_PVAV) {
2301         cleanup_recursive_av(aTHX_ (AV*)sv);
2302     }
2303     else if (SvTYPE(sv) == SVt_PVHV) {
2304         cleanup_recursive_hv(aTHX_ (HV*)sv);
2305     }
2306     else {
2307         cleanup_recursive_rv(aTHX_ sv);
2308     }
2309 }
2310 #endif
2311 
2312 /*
2313  * known_class
2314  *
2315  * Lookup the class name in the 'hclass' table and either assign it a new ID
2316  * or return the existing one, by filling in 'classnum'.
2317  *
2318  * Return true if the class was known, false if the ID was just generated.
2319  */
known_class(pTHX_ stcxt_t * cxt,char * name,int len,I32 * classnum)2320 static int known_class(pTHX_
2321 	stcxt_t *cxt,
2322 	char *name,		/* Class name */
2323 	int len,		/* Name length */
2324 	I32 *classnum)
2325 {
2326     SV **svh;
2327     HV *hclass = cxt->hclass;
2328 
2329     TRACEME(("known_class (%s)", name));
2330 
2331     /*
2332      * Recall that we don't store pointers in this hash table, but tags.
2333      * Therefore, we need LOW_32BITS() to extract the relevant parts.
2334      */
2335 
2336     svh = hv_fetch(hclass, name, len, FALSE);
2337     if (svh) {
2338         *classnum = LOW_32BITS(*svh);
2339         return TRUE;
2340     }
2341 
2342     /*
2343      * Unknown classname, we need to record it.
2344      */
2345 
2346     cxt->classnum++;
2347     if (!hv_store(hclass, name, len, INT2PTR(SV*, cxt->classnum), 0))
2348         CROAK(("Unable to record new classname"));
2349 
2350     *classnum = cxt->classnum;
2351     return FALSE;
2352 }
2353 
2354 /***
2355  *** Specific store routines.
2356  ***/
2357 
2358 /*
2359  * store_ref
2360  *
2361  * Store a reference.
2362  * Layout is SX_REF <object> or SX_OVERLOAD <object>.
2363  */
store_ref(pTHX_ stcxt_t * cxt,SV * sv)2364 static int store_ref(pTHX_ stcxt_t *cxt, SV *sv)
2365 {
2366     int retval;
2367     int is_weak = 0;
2368     TRACEME(("store_ref (0x%" UVxf ")", PTR2UV(sv)));
2369 
2370     /*
2371      * Follow reference, and check if target is overloaded.
2372      */
2373 
2374 #ifdef SvWEAKREF
2375     if (SvWEAKREF(sv))
2376         is_weak = 1;
2377     TRACEME(("ref (0x%" UVxf ") is%s weak", PTR2UV(sv),
2378              is_weak ? "" : "n't"));
2379 #endif
2380     sv = SvRV(sv);
2381 
2382     if (SvOBJECT(sv)) {
2383         HV *stash = (HV *) SvSTASH(sv);
2384         if (stash && Gv_AMG(stash)) {
2385             TRACEME(("ref (0x%" UVxf ") is overloaded", PTR2UV(sv)));
2386             PUTMARK(is_weak ? SX_WEAKOVERLOAD : SX_OVERLOAD);
2387         } else
2388             PUTMARK(is_weak ? SX_WEAKREF : SX_REF);
2389     } else
2390         PUTMARK(is_weak ? SX_WEAKREF : SX_REF);
2391 
2392     cxt->recur_sv = sv;
2393 
2394     TRACEME((">ref recur_depth %" IVdf ", recur_sv (0x%" UVxf ") max %" IVdf, cxt->recur_depth,
2395              PTR2UV(cxt->recur_sv), cxt->max_recur_depth));
2396     if (RECURSION_TOO_DEEP()) {
2397 #if PERL_VERSION < 15
2398         cleanup_recursive_data(aTHX_ (SV*)sv);
2399 #endif
2400         CROAK((MAX_DEPTH_ERROR));
2401     }
2402 
2403     retval = store(aTHX_ cxt, sv);
2404     if (cxt->max_recur_depth != -1 && cxt->recur_depth > 0) {
2405         TRACEME(("<ref recur_depth --%" IVdf, cxt->recur_depth));
2406         --cxt->recur_depth;
2407     }
2408     return retval;
2409 }
2410 
2411 /*
2412  * store_scalar
2413  *
2414  * Store a scalar.
2415  *
2416  * Layout is SX_LSCALAR <length> <data>, SX_SCALAR <length> <data> or SX_UNDEF.
2417  * SX_LUTF8STR and SX_UTF8STR are used for UTF-8 strings.
2418  * The <data> section is omitted if <length> is 0.
2419  *
2420  * For vstrings, the vstring portion is stored first with
2421  * SX_LVSTRING <length> <data> or SX_VSTRING <length> <data>, followed by
2422  * SX_(L)SCALAR or SX_(L)UTF8STR with the actual PV.
2423  *
2424  * If integer or double, the layout is SX_INTEGER <data> or SX_DOUBLE <data>.
2425  * Small integers (within [-127, +127]) are stored as SX_BYTE <byte>.
2426  *
2427  * For huge strings use SX_LOBJECT SX_type SX_U64 <type> <data>
2428  */
store_scalar(pTHX_ stcxt_t * cxt,SV * sv)2429 static int store_scalar(pTHX_ stcxt_t *cxt, SV *sv)
2430 {
2431     IV iv;
2432     char *pv;
2433     STRLEN len;
2434     U32 flags = SvFLAGS(sv);	/* "cc -O" may put it in register */
2435 
2436     TRACEME(("store_scalar (0x%" UVxf ")", PTR2UV(sv)));
2437 
2438     /*
2439      * For efficiency, break the SV encapsulation by peaking at the flags
2440      * directly without using the Perl macros to avoid dereferencing
2441      * sv->sv_flags each time we wish to check the flags.
2442      */
2443 
2444     if (!(flags & SVf_OK)) {			/* !SvOK(sv) */
2445         if (sv == &PL_sv_undef) {
2446             TRACEME(("immortal undef"));
2447             PUTMARK(SX_SV_UNDEF);
2448         } else {
2449             TRACEME(("undef at 0x%" UVxf, PTR2UV(sv)));
2450             PUTMARK(SX_UNDEF);
2451         }
2452         return 0;
2453     }
2454 
2455     /*
2456      * Always store the string representation of a scalar if it exists.
2457      * Gisle Aas provided me with this test case, better than a long speach:
2458      *
2459      *  perl -MDevel::Peek -le '$a="abc"; $a+0; Dump($a)'
2460      *  SV = PVNV(0x80c8520)
2461      *       REFCNT = 1
2462      *       FLAGS = (NOK,POK,pNOK,pPOK)
2463      *       IV = 0
2464      *       NV = 0
2465      *       PV = 0x80c83d0 "abc"\0
2466      *       CUR = 3
2467      *       LEN = 4
2468      *
2469      * Write SX_SCALAR, length, followed by the actual data.
2470      *
2471      * Otherwise, write an SX_BYTE, SX_INTEGER or an SX_DOUBLE as
2472      * appropriate, followed by the actual (binary) data. A double
2473      * is written as a string if network order, for portability.
2474      *
2475      * NOTE: instead of using SvNOK(sv), we test for SvNOKp(sv).
2476      * The reason is that when the scalar value is tainted, the SvNOK(sv)
2477      * value is false.
2478      *
2479      * The test for a read-only scalar with both POK and NOK set is meant
2480      * to quickly detect &PL_sv_yes and &PL_sv_no without having to pay the
2481      * address comparison for each scalar we store.
2482      */
2483 
2484 #define SV_MAYBE_IMMORTAL (SVf_READONLY|SVf_POK|SVf_NOK)
2485 
2486     if ((flags & SV_MAYBE_IMMORTAL) == SV_MAYBE_IMMORTAL) {
2487         if (sv == &PL_sv_yes) {
2488             TRACEME(("immortal yes"));
2489             PUTMARK(SX_SV_YES);
2490         } else if (sv == &PL_sv_no) {
2491             TRACEME(("immortal no"));
2492             PUTMARK(SX_SV_NO);
2493         } else {
2494             pv = SvPV(sv, len);		/* We know it's SvPOK */
2495             goto string;			/* Share code below */
2496         }
2497     } else if (flags & SVf_POK) {
2498         /* public string - go direct to string read.  */
2499         goto string_readlen;
2500     } else if (
2501 #if (PATCHLEVEL <= 6)
2502                /* For 5.6 and earlier NV flag trumps IV flag, so only use integer
2503                   direct if NV flag is off.  */
2504                (flags & (SVf_NOK | SVf_IOK)) == SVf_IOK
2505 #else
2506                /* 5.7 rules are that if IV public flag is set, IV value is as
2507                   good, if not better, than NV value.  */
2508                flags & SVf_IOK
2509 #endif
2510                ) {
2511         iv = SvIV(sv);
2512         /*
2513          * Will come here from below with iv set if double is an integer.
2514          */
2515     integer:
2516 
2517         /* Sorry. This isn't in 5.005_56 (IIRC) or earlier.  */
2518 #ifdef SVf_IVisUV
2519         /* Need to do this out here, else 0xFFFFFFFF becomes iv of -1
2520          * (for example) and that ends up in the optimised small integer
2521          * case.
2522          */
2523         if ((flags & SVf_IVisUV) && SvUV(sv) > IV_MAX) {
2524             TRACEME(("large unsigned integer as string, value = %" UVuf,
2525                      SvUV(sv)));
2526             goto string_readlen;
2527         }
2528 #endif
2529         /*
2530          * Optimize small integers into a single byte, otherwise store as
2531          * a real integer (converted into network order if they asked).
2532          */
2533 
2534         if (iv >= -128 && iv <= 127) {
2535             unsigned char siv = (unsigned char) (iv + 128); /* [0,255] */
2536             PUTMARK(SX_BYTE);
2537             PUTMARK(siv);
2538             TRACEME(("small integer stored as %d", (int)siv));
2539         } else if (cxt->netorder) {
2540 #ifndef HAS_HTONL
2541             TRACEME(("no htonl, fall back to string for integer"));
2542             goto string_readlen;
2543 #else
2544             I32 niv;
2545 
2546 
2547 #if IVSIZE > 4
2548             if (
2549 #ifdef SVf_IVisUV
2550                 /* Sorry. This isn't in 5.005_56 (IIRC) or earlier.  */
2551                 ((flags & SVf_IVisUV) && SvUV(sv) > (UV)0x7FFFFFFF) ||
2552 #endif
2553                 (iv > (IV)0x7FFFFFFF) || (iv < -(IV)0x80000000)) {
2554                 /* Bigger than 32 bits.  */
2555                 TRACEME(("large network order integer as string, value = %" IVdf, iv));
2556                 goto string_readlen;
2557             }
2558 #endif
2559 
2560             niv = (I32) htonl((I32) iv);
2561             TRACEME(("using network order"));
2562             PUTMARK(SX_NETINT);
2563             WRITE_I32(niv);
2564 #endif
2565         } else {
2566             PUTMARK(SX_INTEGER);
2567             WRITE(&iv, sizeof(iv));
2568         }
2569 
2570         TRACEME(("ok (integer 0x%" UVxf ", value = %" IVdf ")", PTR2UV(sv), iv));
2571     } else if (flags & SVf_NOK) {
2572         NV_bytes nv;
2573 #ifdef NV_CLEAR
2574         /* if we can't tell if there's padding, clear the whole NV and hope the
2575            compiler leaves the padding alone
2576         */
2577         Zero(&nv, 1, NV_bytes);
2578 #endif
2579 #if (PATCHLEVEL <= 6)
2580         nv.nv = SvNV(sv);
2581         /*
2582          * Watch for number being an integer in disguise.
2583          */
2584         if (nv.nv == (NV) (iv = I_V(nv.nv))) {
2585             TRACEME(("double %" NVff " is actually integer %" IVdf, nv, iv));
2586             goto integer;		/* Share code above */
2587         }
2588 #else
2589 
2590         SvIV_please(sv);
2591         if (SvIOK_notUV(sv)) {
2592             iv = SvIV(sv);
2593             goto integer;		/* Share code above */
2594         }
2595         nv.nv = SvNV(sv);
2596 #endif
2597 
2598         if (cxt->netorder) {
2599             TRACEME(("double %" NVff " stored as string", nv.nv));
2600             goto string_readlen;		/* Share code below */
2601         }
2602 #if NV_PADDING
2603         Zero(nv.bytes + NVSIZE - NV_PADDING, NV_PADDING, char);
2604 #endif
2605 
2606         PUTMARK(SX_DOUBLE);
2607         WRITE(&nv, sizeof(nv));
2608 
2609         TRACEME(("ok (double 0x%" UVxf ", value = %" NVff ")", PTR2UV(sv), nv.nv));
2610 
2611     } else if (flags & (SVp_POK | SVp_NOK | SVp_IOK)) {
2612 #ifdef SvVOK
2613         MAGIC *mg;
2614 #endif
2615         UV wlen; /* For 64-bit machines */
2616 
2617     string_readlen:
2618         pv = SvPV(sv, len);
2619 
2620         /*
2621          * Will come here from above  if it was readonly, POK and NOK but
2622          * neither &PL_sv_yes nor &PL_sv_no.
2623          */
2624     string:
2625 
2626 #ifdef SvVOK
2627         if (SvMAGICAL(sv) && (mg = mg_find(sv, 'V'))) {
2628             /* The macro passes this by address, not value, and a lot of
2629                called code assumes that it's 32 bits without checking.  */
2630             const SSize_t len = mg->mg_len;
2631             /* we no longer accept vstrings over I32_SIZE-1, so don't emit
2632                them, also, older Storables handle them badly.
2633             */
2634             if (len >= I32_MAX) {
2635                 CROAK(("vstring too large to freeze"));
2636             }
2637             STORE_PV_LEN((const char *)mg->mg_ptr,
2638                          len, SX_VSTRING, SX_LVSTRING);
2639         }
2640 #endif
2641 
2642         wlen = (Size_t)len;
2643         if (SvUTF8 (sv))
2644             STORE_UTF8STR(pv, wlen);
2645         else
2646             STORE_SCALAR(pv, wlen);
2647         TRACEME(("ok (scalar 0x%" UVxf " '%s', length = %" UVuf ")",
2648                  PTR2UV(sv), len >= 2048 ? "<string too long>" : SvPVX(sv),
2649                  (UV)len));
2650     } else {
2651         CROAK(("Can't determine type of %s(0x%" UVxf ")",
2652                sv_reftype(sv, FALSE),
2653                PTR2UV(sv)));
2654     }
2655     return 0;		/* Ok, no recursion on scalars */
2656 }
2657 
2658 /*
2659  * store_array
2660  *
2661  * Store an array.
2662  *
2663  * Layout is SX_ARRAY <size> followed by each item, in increasing index order.
2664  * Each item is stored as <object>.
2665  */
store_array(pTHX_ stcxt_t * cxt,AV * av)2666 static int store_array(pTHX_ stcxt_t *cxt, AV *av)
2667 {
2668     SV **sav;
2669     UV len = av_len(av) + 1;
2670     UV i;
2671     int ret;
2672     SV *const recur_sv = cxt->recur_sv;
2673 
2674     TRACEME(("store_array (0x%" UVxf ")", PTR2UV(av)));
2675 
2676 #ifdef HAS_U64
2677     if (len > 0x7fffffffu) {
2678         /*
2679          * Large array by emitting SX_LOBJECT 1 U64 data
2680          */
2681         PUTMARK(SX_LOBJECT);
2682         PUTMARK(SX_ARRAY);
2683         W64LEN(len);
2684         TRACEME(("lobject size = %lu", (unsigned long)len));
2685     } else
2686 #endif
2687     {
2688         /*
2689          * Normal array by emitting SX_ARRAY, followed by the array length.
2690          */
2691         I32 l = (I32)len;
2692         PUTMARK(SX_ARRAY);
2693         WLEN(l);
2694         TRACEME(("size = %d", (int)l));
2695     }
2696 
2697     TRACEME((">array recur_depth %" IVdf ", recur_sv (0x%" UVxf ") max %" IVdf, cxt->recur_depth,
2698              PTR2UV(cxt->recur_sv), cxt->max_recur_depth));
2699     if (recur_sv != (SV*)av) {
2700         if (RECURSION_TOO_DEEP()) {
2701             /* with <= 5.14 it recurses in the cleanup also, needing 2x stack size */
2702 #if PERL_VERSION < 15
2703             cleanup_recursive_data(aTHX_ (SV*)av);
2704 #endif
2705             CROAK((MAX_DEPTH_ERROR));
2706         }
2707     }
2708 
2709     /*
2710      * Now store each item recursively.
2711      */
2712 
2713     for (i = 0; i < len; i++) {
2714         sav = av_fetch(av, i, 0);
2715         if (!sav) {
2716             TRACEME(("(#%d) nonexistent item", (int)i));
2717             STORE_SV_UNDEF();
2718             continue;
2719         }
2720 #if PATCHLEVEL >= 19
2721         /* In 5.19.3 and up, &PL_sv_undef can actually be stored in
2722          * an array; it no longer represents nonexistent elements.
2723          * Historically, we have used SX_SV_UNDEF in arrays for
2724          * nonexistent elements, so we use SX_SVUNDEF_ELEM for
2725          * &PL_sv_undef itself. */
2726         if (*sav == &PL_sv_undef) {
2727             TRACEME(("(#%d) undef item", (int)i));
2728             cxt->tagnum++;
2729             PUTMARK(SX_SVUNDEF_ELEM);
2730             continue;
2731         }
2732 #endif
2733         TRACEME(("(#%d) item", (int)i));
2734         if ((ret = store(aTHX_ cxt, *sav)))	/* Extra () for -Wall */
2735             return ret;
2736     }
2737 
2738     if (recur_sv != (SV*)av) {
2739         assert(cxt->max_recur_depth == -1 || cxt->recur_depth > 0);
2740         if (cxt->max_recur_depth != -1 && cxt->recur_depth > 0) {
2741             TRACEME(("<array recur_depth --%" IVdf, cxt->recur_depth));
2742             --cxt->recur_depth;
2743         }
2744     }
2745     TRACEME(("ok (array)"));
2746 
2747     return 0;
2748 }
2749 
2750 
2751 #if (PATCHLEVEL <= 6)
2752 
2753 /*
2754  * sortcmp
2755  *
2756  * Sort two SVs
2757  * Borrowed from perl source file pp_ctl.c, where it is used by pp_sort.
2758  */
2759 static int
sortcmp(const void * a,const void * b)2760 sortcmp(const void *a, const void *b)
2761 {
2762 #if defined(USE_ITHREADS)
2763     dTHX;
2764 #endif /* USE_ITHREADS */
2765     return sv_cmp(*(SV * const *) a, *(SV * const *) b);
2766 }
2767 
2768 #endif /* PATCHLEVEL <= 6 */
2769 
2770 /*
2771  * store_hash
2772  *
2773  * Store a hash table.
2774  *
2775  * For a "normal" hash (not restricted, no utf8 keys):
2776  *
2777  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
2778  * Values are stored as <object>.
2779  * Keys are stored as <length> <data>, the <data> section being omitted
2780  * if length is 0.
2781  *
2782  * For a "fancy" hash (restricted or utf8 keys):
2783  *
2784  * Layout is SX_FLAG_HASH <size> <hash flags> followed by each key/value pair,
2785  * in random order.
2786  * Values are stored as <object>.
2787  * Keys are stored as <flags> <length> <data>, the <data> section being omitted
2788  * if length is 0.
2789  * Currently the only hash flag is "restricted"
2790  * Key flags are as for hv.h
2791  */
store_hash(pTHX_ stcxt_t * cxt,HV * hv)2792 static int store_hash(pTHX_ stcxt_t *cxt, HV *hv)
2793 {
2794     dVAR;
2795     UV len = (UV)HvTOTALKEYS(hv);
2796     Size_t i;
2797     int ret = 0;
2798     I32 riter;
2799     HE *eiter;
2800     int flagged_hash = ((SvREADONLY(hv)
2801 #ifdef HAS_HASH_KEY_FLAGS
2802                          || HvHASKFLAGS(hv)
2803 #endif
2804                          ) ? 1 : 0);
2805     unsigned char hash_flags = (SvREADONLY(hv) ? SHV_RESTRICTED : 0);
2806     SV * const recur_sv = cxt->recur_sv;
2807 
2808     /*
2809      * Signal hash by emitting SX_HASH, followed by the table length.
2810      * Max number of keys per perl version:
2811      *    IV            - 5.12
2812      *    STRLEN  5.14  - 5.24   (size_t: U32/U64)
2813      *    SSize_t 5.22c - 5.24c  (I32/I64)
2814      *    U32     5.25c -
2815      */
2816 
2817     if (len > 0x7fffffffu) { /* keys > I32_MAX */
2818         /*
2819          * Large hash: SX_LOBJECT type hashflags? U64 data
2820          *
2821          * Stupid limitation:
2822          * Note that perl5 can store more than 2G keys, but only iterate
2823          * over 2G max. (cperl can)
2824          * We need to manually iterate over it then, unsorted.
2825          * But until perl itself cannot do that, skip that.
2826          */
2827         TRACEME(("lobject size = %lu", (unsigned long)len));
2828 #ifdef HAS_U64
2829         PUTMARK(SX_LOBJECT);
2830         if (flagged_hash) {
2831             PUTMARK(SX_FLAG_HASH);
2832             PUTMARK(hash_flags);
2833         } else {
2834             PUTMARK(SX_HASH);
2835         }
2836         W64LEN(len);
2837         return store_lhash(aTHX_ cxt, hv, hash_flags);
2838 #else
2839         /* <5.12 you could store larger hashes, but cannot iterate over them.
2840            So we reject them, it's a bug. */
2841         CROAK(("Cannot store large objects on a 32bit system"));
2842 #endif
2843     } else {
2844         I32 l = (I32)len;
2845         if (flagged_hash) {
2846             TRACEME(("store_hash (0x%" UVxf ") (flags %x)", PTR2UV(hv),
2847                      (unsigned int)hash_flags));
2848             PUTMARK(SX_FLAG_HASH);
2849             PUTMARK(hash_flags);
2850         } else {
2851             TRACEME(("store_hash (0x%" UVxf ")", PTR2UV(hv)));
2852             PUTMARK(SX_HASH);
2853         }
2854         WLEN(l);
2855         TRACEME(("size = %d, used = %d", (int)l, (int)HvUSEDKEYS(hv)));
2856     }
2857 
2858     TRACEME((">hash recur_depth %" IVdf ", recur_sv (0x%" UVxf ") max %" IVdf, cxt->recur_depth,
2859              PTR2UV(cxt->recur_sv), cxt->max_recur_depth_hash));
2860     if (recur_sv != (SV*)hv && cxt->max_recur_depth_hash != -1) {
2861         ++cxt->recur_depth;
2862     }
2863     if (RECURSION_TOO_DEEP_HASH()) {
2864 #if PERL_VERSION < 15
2865         cleanup_recursive_data(aTHX_ (SV*)hv);
2866 #endif
2867         CROAK((MAX_DEPTH_ERROR));
2868     }
2869 
2870     /*
2871      * Save possible iteration state via each() on that table.
2872      *
2873      * Note that perl as of 5.24 *can* store more than 2G keys, but *not*
2874      * iterate over it.
2875      * Lengths of hash keys are also limited to I32, which is good.
2876      */
2877 
2878     riter = HvRITER_get(hv);
2879     eiter = HvEITER_get(hv);
2880     hv_iterinit(hv);
2881 
2882     /*
2883      * Now store each item recursively.
2884      *
2885      * If canonical is defined to some true value then store each
2886      * key/value pair in sorted order otherwise the order is random.
2887      * Canonical order is irrelevant when a deep clone operation is performed.
2888      *
2889      * Fetch the value from perl only once per store() operation, and only
2890      * when needed.
2891      */
2892 
2893     if (
2894         !(cxt->optype & ST_CLONE)
2895         && (cxt->canonical == 1
2896             || (cxt->canonical < 0
2897                 && (cxt->canonical =
2898                     (SvTRUE(get_sv("Storable::canonical", GV_ADD))
2899                      ? 1 : 0))))
2900 	) {
2901         /*
2902          * Storing in order, sorted by key.
2903          * Run through the hash, building up an array of keys in a
2904          * mortal array, sort the array and then run through the
2905          * array.
2906          */
2907         AV *av = newAV();
2908         av_extend (av, len);
2909 
2910         TRACEME(("using canonical order"));
2911 
2912         for (i = 0; i < len; i++) {
2913 #ifdef HAS_RESTRICTED_HASHES
2914             HE *he = hv_iternext_flags(hv, HV_ITERNEXT_WANTPLACEHOLDERS);
2915 #else
2916             HE *he = hv_iternext(hv);
2917 #endif
2918             av_store(av, i, hv_iterkeysv(he));
2919         }
2920 
2921         STORE_HASH_SORT;
2922 
2923         for (i = 0; i < len; i++) {
2924 #ifdef HAS_RESTRICTED_HASHES
2925             int placeholders = (int)HvPLACEHOLDERS_get(hv);
2926 #endif
2927             unsigned char flags = 0;
2928             char *keyval;
2929             STRLEN keylen_tmp;
2930             I32 keylen;
2931             SV *key = av_shift(av);
2932             /* This will fail if key is a placeholder.
2933                Track how many placeholders we have, and error if we
2934                "see" too many.  */
2935             HE *he  = hv_fetch_ent(hv, key, 0, 0);
2936             SV *val;
2937 
2938             if (he) {
2939                 if (!(val =  HeVAL(he))) {
2940                     /* Internal error, not I/O error */
2941                     return 1;
2942                 }
2943             } else {
2944 #ifdef HAS_RESTRICTED_HASHES
2945                 /* Should be a placeholder.  */
2946                 if (placeholders-- < 0) {
2947                     /* This should not happen - number of
2948                        retrieves should be identical to
2949                        number of placeholders.  */
2950                     return 1;
2951                 }
2952                 /* Value is never needed, and PL_sv_undef is
2953                    more space efficient to store.  */
2954                 val = &PL_sv_undef;
2955                 ASSERT (flags == 0,
2956                         ("Flags not 0 but %d", (int)flags));
2957                 flags = SHV_K_PLACEHOLDER;
2958 #else
2959                 return 1;
2960 #endif
2961             }
2962 
2963             /*
2964              * Store value first.
2965              */
2966 
2967             TRACEME(("(#%d) value 0x%" UVxf, (int)i, PTR2UV(val)));
2968 
2969             if ((ret = store(aTHX_ cxt, val)))	/* Extra () for -Wall, grr... */
2970                 goto out;
2971 
2972             /*
2973              * Write key string.
2974              * Keys are written after values to make sure retrieval
2975              * can be optimal in terms of memory usage, where keys are
2976              * read into a fixed unique buffer called kbuf.
2977              * See retrieve_hash() for details.
2978              */
2979 
2980             /* Implementation of restricted hashes isn't nicely
2981                abstracted:  */
2982             if ((hash_flags & SHV_RESTRICTED)
2983                 && SvTRULYREADONLY(val)) {
2984                 flags |= SHV_K_LOCKED;
2985             }
2986 
2987             keyval = SvPV(key, keylen_tmp);
2988             keylen = keylen_tmp;
2989 #ifdef HAS_UTF8_HASHES
2990             /* If you build without optimisation on pre 5.6
2991                then nothing spots that SvUTF8(key) is always 0,
2992                so the block isn't optimised away, at which point
2993                the linker dislikes the reference to
2994                bytes_from_utf8.  */
2995             if (SvUTF8(key)) {
2996                 const char *keysave = keyval;
2997                 bool is_utf8 = TRUE;
2998 
2999                 /* Just casting the &klen to (STRLEN) won't work
3000                    well if STRLEN and I32 are of different widths.
3001                    --jhi */
3002                 keyval = (char*)bytes_from_utf8((U8*)keyval,
3003                                                 &keylen_tmp,
3004                                                 &is_utf8);
3005 
3006                 /* If we were able to downgrade here, then than
3007                    means that we have  a key which only had chars
3008                    0-255, but was utf8 encoded.  */
3009 
3010                 if (keyval != keysave) {
3011                     keylen = keylen_tmp;
3012                     flags |= SHV_K_WASUTF8;
3013                 } else {
3014                     /* keylen_tmp can't have changed, so no need
3015                        to assign back to keylen.  */
3016                     flags |= SHV_K_UTF8;
3017                 }
3018             }
3019 #endif
3020 
3021             if (flagged_hash) {
3022                 PUTMARK(flags);
3023                 TRACEME(("(#%d) key '%s' flags %x %u", (int)i, keyval, flags, *keyval));
3024             } else {
3025                 /* This is a workaround for a bug in 5.8.0
3026                    that causes the HEK_WASUTF8 flag to be
3027                    set on an HEK without the hash being
3028                    marked as having key flags. We just
3029                    cross our fingers and drop the flag.
3030                    AMS 20030901 */
3031                 assert (flags == 0 || flags == SHV_K_WASUTF8);
3032                 TRACEME(("(#%d) key '%s'", (int)i, keyval));
3033             }
3034             WLEN(keylen);
3035             if (keylen)
3036                 WRITE(keyval, keylen);
3037             if (flags & SHV_K_WASUTF8)
3038                 Safefree (keyval);
3039         }
3040 
3041         /*
3042          * Free up the temporary array
3043          */
3044 
3045         av_undef(av);
3046         sv_free((SV *) av);
3047 
3048     } else {
3049 
3050         /*
3051          * Storing in "random" order (in the order the keys are stored
3052          * within the hash).  This is the default and will be faster!
3053          */
3054 
3055         for (i = 0; i < len; i++) {
3056 #ifdef HV_ITERNEXT_WANTPLACEHOLDERS
3057             HE *he = hv_iternext_flags(hv, HV_ITERNEXT_WANTPLACEHOLDERS);
3058 #else
3059             HE *he = hv_iternext(hv);
3060 #endif
3061             SV *val = (he ? hv_iterval(hv, he) : 0);
3062 
3063             if (val == 0)
3064                 return 1;		/* Internal error, not I/O error */
3065 
3066             if ((ret = store_hentry(aTHX_ cxt, hv, i, he, hash_flags)))
3067                 goto out;
3068 #if 0
3069             /* Implementation of restricted hashes isn't nicely
3070                abstracted:  */
3071             flags = (((hash_flags & SHV_RESTRICTED)
3072                       && SvTRULYREADONLY(val))
3073                      ? SHV_K_LOCKED : 0);
3074 
3075             if (val == &PL_sv_placeholder) {
3076                 flags |= SHV_K_PLACEHOLDER;
3077                 val = &PL_sv_undef;
3078             }
3079 
3080             /*
3081              * Store value first.
3082              */
3083 
3084             TRACEME(("(#%d) value 0x%" UVxf, (int)i, PTR2UV(val)));
3085 
3086             if ((ret = store(aTHX_ cxt, val)))	/* Extra () for -Wall */
3087                 goto out;
3088 
3089 
3090             hek = HeKEY_hek(he);
3091             len = HEK_LEN(hek);
3092             if (len == HEf_SVKEY) {
3093                 /* This is somewhat sick, but the internal APIs are
3094                  * such that XS code could put one of these in
3095                  * a regular hash.
3096                  * Maybe we should be capable of storing one if
3097                  * found.
3098                  */
3099                 key_sv = HeKEY_sv(he);
3100                 flags |= SHV_K_ISSV;
3101             } else {
3102                 /* Regular string key. */
3103 #ifdef HAS_HASH_KEY_FLAGS
3104                 if (HEK_UTF8(hek))
3105                     flags |= SHV_K_UTF8;
3106                 if (HEK_WASUTF8(hek))
3107                     flags |= SHV_K_WASUTF8;
3108 #endif
3109                 key = HEK_KEY(hek);
3110             }
3111             /*
3112              * Write key string.
3113              * Keys are written after values to make sure retrieval
3114              * can be optimal in terms of memory usage, where keys are
3115              * read into a fixed unique buffer called kbuf.
3116              * See retrieve_hash() for details.
3117              */
3118 
3119             if (flagged_hash) {
3120                 PUTMARK(flags);
3121                 TRACEME(("(#%d) key '%s' flags %x", (int)i, key, flags));
3122             } else {
3123                 /* This is a workaround for a bug in 5.8.0
3124                    that causes the HEK_WASUTF8 flag to be
3125                    set on an HEK without the hash being
3126                    marked as having key flags. We just
3127                    cross our fingers and drop the flag.
3128                    AMS 20030901 */
3129                 assert (flags == 0 || flags == SHV_K_WASUTF8);
3130                 TRACEME(("(#%d) key '%s'", (int)i, key));
3131             }
3132             if (flags & SHV_K_ISSV) {
3133                 int ret;
3134                 if ((ret = store(aTHX_ cxt, key_sv)))
3135                     goto out;
3136             } else {
3137                 WLEN(len);
3138                 if (len)
3139                     WRITE(key, len);
3140             }
3141 #endif
3142         }
3143     }
3144 
3145     TRACEME(("ok (hash 0x%" UVxf ")", PTR2UV(hv)));
3146 
3147  out:
3148     assert(cxt->max_recur_depth_hash != -1 && cxt->recur_depth > 0);
3149     TRACEME(("<hash recur_depth --%" IVdf , cxt->recur_depth));
3150     if (cxt->max_recur_depth_hash != -1 && recur_sv != (SV*)hv && cxt->recur_depth > 0) {
3151         --cxt->recur_depth;
3152     }
3153     HvRITER_set(hv, riter);		/* Restore hash iterator state */
3154     HvEITER_set(hv, eiter);
3155 
3156     return ret;
3157 }
3158 
store_hentry(pTHX_ stcxt_t * cxt,HV * hv,UV i,HE * he,unsigned char hash_flags)3159 static int store_hentry(pTHX_
3160 	stcxt_t *cxt, HV* hv, UV i, HE *he, unsigned char hash_flags)
3161 {
3162     int ret = 0;
3163     SV* val = hv_iterval(hv, he);
3164     int flagged_hash = ((SvREADONLY(hv)
3165 #ifdef HAS_HASH_KEY_FLAGS
3166                          || HvHASKFLAGS(hv)
3167 #endif
3168                          ) ? 1 : 0);
3169     unsigned char flags = (((hash_flags & SHV_RESTRICTED)
3170                             && SvTRULYREADONLY(val))
3171                            ? SHV_K_LOCKED : 0);
3172 #ifndef DEBUGME
3173     PERL_UNUSED_ARG(i);
3174 #endif
3175     if (val == &PL_sv_placeholder) {
3176         flags |= SHV_K_PLACEHOLDER;
3177         val = &PL_sv_undef;
3178     }
3179 
3180     /*
3181      * Store value first.
3182      */
3183 
3184     TRACEME(("(#%d) value 0x%" UVxf, (int)i, PTR2UV(val)));
3185 
3186     {
3187         HEK* hek = HeKEY_hek(he);
3188         I32  len = HEK_LEN(hek);
3189         SV *key_sv = NULL;
3190         char *key = 0;
3191 
3192         if ((ret = store(aTHX_ cxt, val)))
3193             return ret;
3194         if (len == HEf_SVKEY) {
3195             key_sv = HeKEY_sv(he);
3196             flags |= SHV_K_ISSV;
3197         } else {
3198             /* Regular string key. */
3199 #ifdef HAS_HASH_KEY_FLAGS
3200             if (HEK_UTF8(hek))
3201                 flags |= SHV_K_UTF8;
3202             if (HEK_WASUTF8(hek))
3203                 flags |= SHV_K_WASUTF8;
3204 #endif
3205             key = HEK_KEY(hek);
3206         }
3207         /*
3208          * Write key string.
3209          * Keys are written after values to make sure retrieval
3210          * can be optimal in terms of memory usage, where keys are
3211          * read into a fixed unique buffer called kbuf.
3212          * See retrieve_hash() for details.
3213          */
3214 
3215         if (flagged_hash) {
3216             PUTMARK(flags);
3217             TRACEME(("(#%d) key '%s' flags %x", (int)i, key, flags));
3218         } else {
3219             /* This is a workaround for a bug in 5.8.0
3220                that causes the HEK_WASUTF8 flag to be
3221                set on an HEK without the hash being
3222                marked as having key flags. We just
3223                cross our fingers and drop the flag.
3224                AMS 20030901 */
3225             assert (flags == 0 || flags == SHV_K_WASUTF8);
3226             TRACEME(("(#%d) key '%s'", (int)i, key));
3227         }
3228         if (flags & SHV_K_ISSV) {
3229             if ((ret = store(aTHX_ cxt, key_sv)))
3230                 return ret;
3231         } else {
3232             WLEN(len);
3233             if (len)
3234                 WRITE(key, len);
3235         }
3236     }
3237     return ret;
3238 }
3239 
3240 
3241 #ifdef HAS_U64
3242 /*
3243  * store_lhash
3244  *
3245  * Store a overlong hash table, with >2G keys, which we cannot iterate
3246  * over with perl5. xhv_eiter is only I32 there. (only cperl can)
3247  * and we also do not want to sort it.
3248  * So we walk the buckets and chains manually.
3249  *
3250  * type, len and flags are already written.
3251  */
3252 
store_lhash(pTHX_ stcxt_t * cxt,HV * hv,unsigned char hash_flags)3253 static int store_lhash(pTHX_ stcxt_t *cxt, HV *hv, unsigned char hash_flags)
3254 {
3255     dVAR;
3256     int ret = 0;
3257     Size_t i;
3258     UV ix = 0;
3259     HE** array;
3260 #ifdef DEBUGME
3261     UV len = (UV)HvTOTALKEYS(hv);
3262 #endif
3263     SV * const recur_sv = cxt->recur_sv;
3264     if (hash_flags) {
3265         TRACEME(("store_lhash (0x%" UVxf ") (flags %x)", PTR2UV(hv),
3266                  (int) hash_flags));
3267     } else {
3268         TRACEME(("store_lhash (0x%" UVxf ")", PTR2UV(hv)));
3269     }
3270     TRACEME(("size = %" UVuf ", used = %" UVuf, len, (UV)HvUSEDKEYS(hv)));
3271 
3272     TRACEME(("recur_depth %" IVdf ", recur_sv (0x%" UVxf ")", cxt->recur_depth,
3273              PTR2UV(cxt->recur_sv)));
3274     if (recur_sv != (SV*)hv && cxt->max_recur_depth_hash != -1) {
3275         ++cxt->recur_depth;
3276     }
3277     if (RECURSION_TOO_DEEP_HASH()) {
3278 #if PERL_VERSION < 15
3279         cleanup_recursive_data(aTHX_ (SV*)hv);
3280 #endif
3281         CROAK((MAX_DEPTH_ERROR));
3282     }
3283 
3284     array = HvARRAY(hv);
3285     for (i = 0; i <= (Size_t)HvMAX(hv); i++) {
3286         HE* entry = array[i];
3287         if (!entry) continue;
3288         if ((ret = store_hentry(aTHX_ cxt, hv, ix++, entry, hash_flags)))
3289             return ret;
3290         while ((entry = HeNEXT(entry))) {
3291             if ((ret = store_hentry(aTHX_ cxt, hv, ix++, entry, hash_flags)))
3292                 return ret;
3293         }
3294     }
3295     if (recur_sv == (SV*)hv && cxt->max_recur_depth_hash != -1 && cxt->recur_depth > 0) {
3296         TRACEME(("recur_depth --%" IVdf, cxt->recur_depth));
3297         --cxt->recur_depth;
3298     }
3299     assert(ix == len);
3300     return ret;
3301 }
3302 #endif
3303 
3304 /*
3305  * store_code
3306  *
3307  * Store a code reference.
3308  *
3309  * Layout is SX_CODE <length> followed by a scalar containing the perl
3310  * source code of the code reference.
3311  */
store_code(pTHX_ stcxt_t * cxt,CV * cv)3312 static int store_code(pTHX_ stcxt_t *cxt, CV *cv)
3313 {
3314 #if PERL_VERSION < 6
3315     /*
3316      * retrieve_code does not work with perl 5.005 or less
3317      */
3318     return store_other(aTHX_ cxt, (SV*)cv);
3319 #else
3320     dSP;
3321     STRLEN len;
3322     STRLEN count, reallen;
3323     SV *text, *bdeparse;
3324 
3325     TRACEME(("store_code (0x%" UVxf ")", PTR2UV(cv)));
3326 
3327     if (
3328         cxt->deparse == 0 ||
3329         (cxt->deparse < 0 &&
3330          !(cxt->deparse =
3331            SvTRUE(get_sv("Storable::Deparse", GV_ADD)) ? 1 : 0))
3332 	) {
3333         return store_other(aTHX_ cxt, (SV*)cv);
3334     }
3335 
3336     /*
3337      * Require B::Deparse. At least B::Deparse 0.61 is needed for
3338      * blessed code references.
3339      */
3340     /* Ownership of both SVs is passed to load_module, which frees them. */
3341     load_module(PERL_LOADMOD_NOIMPORT, newSVpvs("B::Deparse"), newSVnv(0.61));
3342     SPAGAIN;
3343 
3344     ENTER;
3345     SAVETMPS;
3346 
3347     /*
3348      * create the B::Deparse object
3349      */
3350 
3351     PUSHMARK(sp);
3352     XPUSHs(newSVpvs_flags("B::Deparse", SVs_TEMP));
3353     PUTBACK;
3354     count = call_method("new", G_SCALAR);
3355     SPAGAIN;
3356     if (count != 1)
3357         CROAK(("Unexpected return value from B::Deparse::new\n"));
3358     bdeparse = POPs;
3359 
3360     /*
3361      * call the coderef2text method
3362      */
3363 
3364     PUSHMARK(sp);
3365     XPUSHs(bdeparse); /* XXX is this already mortal? */
3366     XPUSHs(sv_2mortal(newRV_inc((SV*)cv)));
3367     PUTBACK;
3368     count = call_method("coderef2text", G_SCALAR);
3369     SPAGAIN;
3370     if (count != 1)
3371         CROAK(("Unexpected return value from B::Deparse::coderef2text\n"));
3372 
3373     text = POPs;
3374     len = SvCUR(text);
3375     reallen = strlen(SvPV_nolen(text));
3376 
3377     /*
3378      * Empty code references or XS functions are deparsed as
3379      * "(prototype) ;" or ";".
3380      */
3381 
3382     if (len == 0 || *(SvPV_nolen(text)+reallen-1) == ';') {
3383         CROAK(("The result of B::Deparse::coderef2text was empty - maybe you're trying to serialize an XS function?\n"));
3384     }
3385 
3386     /*
3387      * Signal code by emitting SX_CODE.
3388      */
3389 
3390     PUTMARK(SX_CODE);
3391     cxt->tagnum++;   /* necessary, as SX_CODE is a SEEN() candidate */
3392     TRACEME(("size = %d", (int)len));
3393     TRACEME(("code = %s", SvPV_nolen(text)));
3394 
3395     /*
3396      * Now store the source code.
3397      */
3398 
3399     if(SvUTF8 (text))
3400         STORE_UTF8STR(SvPV_nolen(text), len);
3401     else
3402         STORE_SCALAR(SvPV_nolen(text), len);
3403 
3404     FREETMPS;
3405     LEAVE;
3406 
3407     TRACEME(("ok (code)"));
3408 
3409     return 0;
3410 #endif
3411 }
3412 
3413 #if PERL_VERSION < 8
3414 #   define PERL_MAGIC_qr                  'r' /* precompiled qr// regex */
3415 #   define BFD_Svs_SMG_OR_RMG SVs_RMG
3416 #elif ((PERL_VERSION==8) && (PERL_SUBVERSION >= 1) || (PERL_VERSION>8))
3417 #   define BFD_Svs_SMG_OR_RMG SVs_SMG
3418 #   define MY_PLACEHOLDER PL_sv_placeholder
3419 #else
3420 #   define BFD_Svs_SMG_OR_RMG SVs_RMG
3421 #   define MY_PLACEHOLDER PL_sv_undef
3422 #endif
3423 
get_regexp(pTHX_ stcxt_t * cxt,SV * sv,SV ** re,SV ** flags)3424 static int get_regexp(pTHX_ stcxt_t *cxt, SV* sv, SV **re, SV **flags) {
3425     dSP;
3426     SV* rv;
3427 #if PERL_VERSION >= 12
3428     CV *cv = get_cv("re::regexp_pattern", 0);
3429 #else
3430     CV *cv = get_cv("Storable::_regexp_pattern", 0);
3431 #endif
3432     I32 count;
3433 
3434     assert(cv);
3435 
3436     ENTER;
3437     SAVETMPS;
3438     rv = sv_2mortal((SV*)newRV_inc(sv));
3439     PUSHMARK(sp);
3440     XPUSHs(rv);
3441     PUTBACK;
3442     /* optimize to call the XS directly later */
3443     count = call_sv((SV*)cv, G_ARRAY);
3444     SPAGAIN;
3445     if (count < 2)
3446       CROAK(("re::regexp_pattern returned only %d results", (int)count));
3447     *flags = POPs;
3448     SvREFCNT_inc(*flags);
3449     *re = POPs;
3450     SvREFCNT_inc(*re);
3451 
3452     PUTBACK;
3453     FREETMPS;
3454     LEAVE;
3455 
3456     return 1;
3457 }
3458 
store_regexp(pTHX_ stcxt_t * cxt,SV * sv)3459 static int store_regexp(pTHX_ stcxt_t *cxt, SV *sv) {
3460     SV *re = NULL;
3461     SV *flags = NULL;
3462     const char *re_pv;
3463     const char *flags_pv;
3464     STRLEN re_len;
3465     STRLEN flags_len;
3466     U8 op_flags = 0;
3467 
3468     if (!get_regexp(aTHX_ cxt, sv, &re, &flags))
3469       return -1;
3470 
3471     re_pv = SvPV(re, re_len);
3472     flags_pv = SvPV(flags, flags_len);
3473 
3474     if (re_len > 0xFF) {
3475       op_flags |= SHR_U32_RE_LEN;
3476     }
3477 
3478     PUTMARK(SX_REGEXP);
3479     PUTMARK(op_flags);
3480     if (op_flags & SHR_U32_RE_LEN) {
3481       U32 re_len32 = re_len;
3482       WLEN(re_len32);
3483     }
3484     else
3485       PUTMARK(re_len);
3486     WRITE(re_pv, re_len);
3487     PUTMARK(flags_len);
3488     WRITE(flags_pv, flags_len);
3489 
3490     return 0;
3491 }
3492 
3493 /*
3494  * store_tied
3495  *
3496  * When storing a tied object (be it a tied scalar, array or hash), we lay out
3497  * a special mark, followed by the underlying tied object. For instance, when
3498  * dealing with a tied hash, we store SX_TIED_HASH <hash object>, where
3499  * <hash object> stands for the serialization of the tied hash.
3500  */
store_tied(pTHX_ stcxt_t * cxt,SV * sv)3501 static int store_tied(pTHX_ stcxt_t *cxt, SV *sv)
3502 {
3503     MAGIC *mg;
3504     SV *obj = NULL;
3505     int ret = 0;
3506     int svt = SvTYPE(sv);
3507     char mtype = 'P';
3508 
3509     TRACEME(("store_tied (0x%" UVxf ")", PTR2UV(sv)));
3510 
3511     /*
3512      * We have a small run-time penalty here because we chose to factorise
3513      * all tieds objects into the same routine, and not have a store_tied_hash,
3514      * a store_tied_array, etc...
3515      *
3516      * Don't use a switch() statement, as most compilers don't optimize that
3517      * well for 2/3 values. An if() else if() cascade is just fine. We put
3518      * tied hashes first, as they are the most likely beasts.
3519      */
3520 
3521     if (svt == SVt_PVHV) {
3522         TRACEME(("tied hash"));
3523         PUTMARK(SX_TIED_HASH);		/* Introduces tied hash */
3524     } else if (svt == SVt_PVAV) {
3525         TRACEME(("tied array"));
3526         PUTMARK(SX_TIED_ARRAY);		/* Introduces tied array */
3527     } else {
3528         TRACEME(("tied scalar"));
3529         PUTMARK(SX_TIED_SCALAR);	/* Introduces tied scalar */
3530         mtype = 'q';
3531     }
3532 
3533     if (!(mg = mg_find(sv, mtype)))
3534         CROAK(("No magic '%c' found while storing tied %s", mtype,
3535                (svt == SVt_PVHV) ? "hash" :
3536                (svt == SVt_PVAV) ? "array" : "scalar"));
3537 
3538     /*
3539      * The mg->mg_obj found by mg_find() above actually points to the
3540      * underlying tied Perl object implementation. For instance, if the
3541      * original SV was that of a tied array, then mg->mg_obj is an AV.
3542      *
3543      * Note that we store the Perl object as-is. We don't call its FETCH
3544      * method along the way. At retrieval time, we won't call its STORE
3545      * method either, but the tieing magic will be re-installed. In itself,
3546      * that ensures that the tieing semantics are preserved since further
3547      * accesses on the retrieved object will indeed call the magic methods...
3548      */
3549 
3550     /* [#17040] mg_obj is NULL for scalar self-ties. AMS 20030416 */
3551     obj = mg->mg_obj ? mg->mg_obj : newSV(0);
3552     if ((ret = store(aTHX_ cxt, obj)))
3553         return ret;
3554 
3555     TRACEME(("ok (tied)"));
3556 
3557     return 0;
3558 }
3559 
3560 /*
3561  * store_tied_item
3562  *
3563  * Stores a reference to an item within a tied structure:
3564  *
3565  *  . \$h{key}, stores both the (tied %h) object and 'key'.
3566  *  . \$a[idx], stores both the (tied @a) object and 'idx'.
3567  *
3568  * Layout is therefore either:
3569  *     SX_TIED_KEY <object> <key>
3570  *     SX_TIED_IDX <object> <index>
3571  */
store_tied_item(pTHX_ stcxt_t * cxt,SV * sv)3572 static int store_tied_item(pTHX_ stcxt_t *cxt, SV *sv)
3573 {
3574     MAGIC *mg;
3575     int ret;
3576 
3577     TRACEME(("store_tied_item (0x%" UVxf ")", PTR2UV(sv)));
3578 
3579     if (!(mg = mg_find(sv, 'p')))
3580         CROAK(("No magic 'p' found while storing reference to tied item"));
3581 
3582     /*
3583      * We discriminate between \$h{key} and \$a[idx] via mg_ptr.
3584      */
3585 
3586     if (mg->mg_ptr) {
3587         TRACEME(("store_tied_item: storing a ref to a tied hash item"));
3588         PUTMARK(SX_TIED_KEY);
3589         TRACEME(("store_tied_item: storing OBJ 0x%" UVxf, PTR2UV(mg->mg_obj)));
3590 
3591         if ((ret = store(aTHX_ cxt, mg->mg_obj)))	/* Extra () for -Wall, grr... */
3592             return ret;
3593 
3594         TRACEME(("store_tied_item: storing PTR 0x%" UVxf, PTR2UV(mg->mg_ptr)));
3595 
3596         if ((ret = store(aTHX_ cxt, (SV *) mg->mg_ptr))) /* Idem, for -Wall */
3597             return ret;
3598     } else {
3599         I32 idx = mg->mg_len;
3600 
3601         TRACEME(("store_tied_item: storing a ref to a tied array item "));
3602         PUTMARK(SX_TIED_IDX);
3603         TRACEME(("store_tied_item: storing OBJ 0x%" UVxf, PTR2UV(mg->mg_obj)));
3604 
3605         if ((ret = store(aTHX_ cxt, mg->mg_obj)))	/* Idem, for -Wall */
3606             return ret;
3607 
3608         TRACEME(("store_tied_item: storing IDX %d", (int)idx));
3609 
3610         WLEN(idx);
3611     }
3612 
3613     TRACEME(("ok (tied item)"));
3614 
3615     return 0;
3616 }
3617 
3618 /*
3619  * store_hook		-- dispatched manually, not via sv_store[]
3620  *
3621  * The blessed SV is serialized by a hook.
3622  *
3623  * Simple Layout is:
3624  *
3625  *     SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
3626  *
3627  * where <flags> indicates how long <len>, <len2> and <len3> are, whether
3628  * the trailing part [] is present, the type of object (scalar, array or hash).
3629  * There is also a bit which says how the classname is stored between:
3630  *
3631  *     <len> <classname>
3632  *     <index>
3633  *
3634  * and when the <index> form is used (classname already seen), the "large
3635  * classname" bit in <flags> indicates how large the <index> is.
3636  *
3637  * The serialized string returned by the hook is of length <len2> and comes
3638  * next.  It is an opaque string for us.
3639  *
3640  * Those <len3> object IDs which are listed last represent the extra references
3641  * not directly serialized by the hook, but which are linked to the object.
3642  *
3643  * When recursion is mandated to resolve object-IDs not yet seen, we have
3644  * instead, with <header> being flags with bits set to indicate the object type
3645  * and that recursion was indeed needed:
3646  *
3647  *     SX_HOOK <header> <object> <header> <object> <flags>
3648  *
3649  * that same header being repeated between serialized objects obtained through
3650  * recursion, until we reach flags indicating no recursion, at which point
3651  * we know we've resynchronized with a single layout, after <flags>.
3652  *
3653  * When storing a blessed ref to a tied variable, the following format is
3654  * used:
3655  *
3656  *     SX_HOOK <flags> <extra> ... [<len3> <object-IDs>] <magic object>
3657  *
3658  * The first <flags> indication carries an object of type SHT_EXTRA, and the
3659  * real object type is held in the <extra> flag.  At the very end of the
3660  * serialization stream, the underlying magic object is serialized, just like
3661  * any other tied variable.
3662  */
store_hook(pTHX_ stcxt_t * cxt,SV * sv,int type,HV * pkg,SV * hook)3663 static int store_hook(
3664                       pTHX_
3665                       stcxt_t *cxt,
3666                       SV *sv,
3667                       int type,
3668                       HV *pkg,
3669                       SV *hook)
3670 {
3671     I32 len;
3672     char *classname;
3673     STRLEN len2;
3674     SV *ref;
3675     AV *av;
3676     SV **ary;
3677     IV count;			/* really len3 + 1 */
3678     unsigned char flags;
3679     char *pv;
3680     int i;
3681     int recursed = 0;		/* counts recursion */
3682     int obj_type;		/* object type, on 2 bits */
3683     I32 classnum;
3684     int ret;
3685     int clone = cxt->optype & ST_CLONE;
3686     char mtype = '\0';		/* for blessed ref to tied structures */
3687     unsigned char eflags = '\0'; /* used when object type is SHT_EXTRA */
3688 #ifdef HAS_U64
3689     int need_large_oids = 0;
3690 #endif
3691 
3692     TRACEME(("store_hook, classname \"%s\", tagged #%d", HvNAME_get(pkg), (int)cxt->tagnum));
3693 
3694     /*
3695      * Determine object type on 2 bits.
3696      */
3697 
3698     switch (type) {
3699     case svis_REF:
3700     case svis_SCALAR:
3701         obj_type = SHT_SCALAR;
3702         break;
3703     case svis_ARRAY:
3704         obj_type = SHT_ARRAY;
3705         break;
3706     case svis_HASH:
3707         obj_type = SHT_HASH;
3708         break;
3709     case svis_TIED:
3710         /*
3711          * Produced by a blessed ref to a tied data structure, $o in the
3712          * following Perl code.
3713          *
3714          * 	my %h;
3715          *  tie %h, 'FOO';
3716          *	my $o = bless \%h, 'BAR';
3717          *
3718          * Signal the tie-ing magic by setting the object type as SHT_EXTRA
3719          * (since we have only 2 bits in <flags> to store the type), and an
3720          * <extra> byte flag will be emitted after the FIRST <flags> in the
3721          * stream, carrying what we put in 'eflags'.
3722          */
3723         obj_type = SHT_EXTRA;
3724         switch (SvTYPE(sv)) {
3725         case SVt_PVHV:
3726             eflags = (unsigned char) SHT_THASH;
3727             mtype = 'P';
3728             break;
3729         case SVt_PVAV:
3730             eflags = (unsigned char) SHT_TARRAY;
3731             mtype = 'P';
3732             break;
3733         default:
3734             eflags = (unsigned char) SHT_TSCALAR;
3735             mtype = 'q';
3736             break;
3737         }
3738         break;
3739     default:
3740         CROAK(("Unexpected object type (%d) in store_hook()", type));
3741     }
3742     flags = SHF_NEED_RECURSE | obj_type;
3743 
3744     classname = HvNAME_get(pkg);
3745     len = strlen(classname);
3746 
3747     /*
3748      * To call the hook, we need to fake a call like:
3749      *
3750      *    $object->STORABLE_freeze($cloning);
3751      *
3752      * but we don't have the $object here.  For instance, if $object is
3753      * a blessed array, what we have in 'sv' is the array, and we can't
3754      * call a method on those.
3755      *
3756      * Therefore, we need to create a temporary reference to the object and
3757      * make the call on that reference.
3758      */
3759 
3760     TRACEME(("about to call STORABLE_freeze on class %s", classname));
3761 
3762     ref = newRV_inc(sv);		/* Temporary reference */
3763     av = array_call(aTHX_ ref, hook, clone); /* @a = $object->STORABLE_freeze($c) */
3764     SvREFCNT_dec(ref);			/* Reclaim temporary reference */
3765 
3766     count = AvFILLp(av) + 1;
3767     TRACEME(("store_hook, array holds %" IVdf " items", count));
3768 
3769     /*
3770      * If they return an empty list, it means they wish to ignore the
3771      * hook for this class (and not just this instance -- that's for them
3772      * to handle if they so wish).
3773      *
3774      * Simply disable the cached entry for the hook (it won't be recomputed
3775      * since it's present in the cache) and recurse to store_blessed().
3776      */
3777 
3778     if (!count) {
3779         /* free empty list returned by the hook */
3780         av_undef(av);
3781         sv_free((SV *) av);
3782 
3783         /*
3784          * They must not change their mind in the middle of a serialization.
3785          */
3786 
3787         if (hv_fetch(cxt->hclass, classname, len, FALSE))
3788             CROAK(("Too late to ignore hooks for %s class \"%s\"",
3789                    (cxt->optype & ST_CLONE) ? "cloning" : "storing",
3790                    classname));
3791 
3792         pkg_hide(aTHX_ cxt->hook, pkg, "STORABLE_freeze");
3793 
3794         ASSERT(!pkg_can(aTHX_ cxt->hook, pkg, "STORABLE_freeze"),
3795                ("hook invisible"));
3796         TRACEME(("ignoring STORABLE_freeze in class \"%s\"", classname));
3797 
3798         return store_blessed(aTHX_ cxt, sv, type, pkg);
3799     }
3800 
3801     /*
3802      * Get frozen string.
3803      */
3804 
3805     ary = AvARRAY(av);
3806     pv = SvPV(ary[0], len2);
3807     /* We can't use pkg_can here because it only caches one method per
3808      * package */
3809     {
3810         GV* gv = gv_fetchmethod_autoload(pkg, "STORABLE_attach", FALSE);
3811         if (gv && isGV(gv)) {
3812             if (count > 1)
3813                 CROAK(("Freeze cannot return references if %s class is using STORABLE_attach", classname));
3814             goto check_done;
3815         }
3816     }
3817 
3818 #ifdef HAS_U64
3819     if (count > I32_MAX) {
3820 	CROAK(("Too many references returned by STORABLE_freeze()"));
3821     }
3822 #endif
3823 
3824     /*
3825      * If they returned more than one item, we need to serialize some
3826      * extra references if not already done.
3827      *
3828      * Loop over the array, starting at position #1, and for each item,
3829      * ensure it is a reference, serialize it if not already done, and
3830      * replace the entry with the tag ID of the corresponding serialized
3831      * object.
3832      *
3833      * We CHEAT by not calling av_fetch() and read directly within the
3834      * array, for speed.
3835      */
3836 
3837     for (i = 1; i < count; i++) {
3838 #ifdef USE_PTR_TABLE
3839         char *fake_tag;
3840 #else
3841         SV **svh;
3842 #endif
3843         SV *rsv = ary[i];
3844         SV *xsv;
3845         SV *tag;
3846         AV *av_hook = cxt->hook_seen;
3847 
3848         if (!SvROK(rsv))
3849             CROAK(("Item #%d returned by STORABLE_freeze "
3850                    "for %s is not a reference", (int)i, classname));
3851         xsv = SvRV(rsv);	/* Follow ref to know what to look for */
3852 
3853         /*
3854          * Look in hseen and see if we have a tag already.
3855          * Serialize entry if not done already, and get its tag.
3856          */
3857 
3858 #ifdef USE_PTR_TABLE
3859         /* Fakery needed because ptr_table_fetch returns zero for a
3860            failure, whereas the existing code assumes that it can
3861            safely store a tag zero. So for ptr_tables we store tag+1
3862         */
3863         if ((fake_tag = (char *)ptr_table_fetch(cxt->pseen, xsv)))
3864             goto sv_seen;	/* Avoid moving code too far to the right */
3865 #else
3866         if ((svh = hv_fetch(cxt->hseen, (char *) &xsv, sizeof(xsv), FALSE)))
3867             goto sv_seen;	/* Avoid moving code too far to the right */
3868 #endif
3869 
3870         TRACEME(("listed object %d at 0x%" UVxf " is unknown", i-1,
3871                  PTR2UV(xsv)));
3872 
3873         /*
3874          * We need to recurse to store that object and get it to be known
3875          * so that we can resolve the list of object-IDs at retrieve time.
3876          *
3877          * The first time we do this, we need to emit the proper header
3878          * indicating that we recursed, and what the type of object is (the
3879          * object we're storing via a user-hook).  Indeed, during retrieval,
3880          * we'll have to create the object before recursing to retrieve the
3881          * others, in case those would point back at that object.
3882          */
3883 
3884         /* [SX_HOOK] <flags> [<extra>] <object>*/
3885         if (!recursed++) {
3886 #ifdef HAS_U64
3887             if (len2 > INT32_MAX)
3888                 PUTMARK(SX_LOBJECT);
3889 #endif
3890 	    PUTMARK(SX_HOOK);
3891             PUTMARK(flags);
3892             if (obj_type == SHT_EXTRA)
3893                 PUTMARK(eflags);
3894         } else
3895             PUTMARK(flags);
3896 
3897         if ((ret = store(aTHX_ cxt, xsv)))	/* Given by hook for us to store */
3898             return ret;
3899 
3900 #ifdef USE_PTR_TABLE
3901         fake_tag = (char *)ptr_table_fetch(cxt->pseen, xsv);
3902         if (!fake_tag)
3903             CROAK(("Could not serialize item #%d from hook in %s",
3904                    (int)i, classname));
3905 #else
3906         svh = hv_fetch(cxt->hseen, (char *) &xsv, sizeof(xsv), FALSE);
3907         if (!svh)
3908             CROAK(("Could not serialize item #%d from hook in %s",
3909                    (int)i, classname));
3910 #endif
3911         /*
3912          * It was the first time we serialized 'xsv'.
3913          *
3914          * Keep this SV alive until the end of the serialization: if we
3915          * disposed of it right now by decrementing its refcount, and it was
3916          * a temporary value, some next temporary value allocated during
3917          * another STORABLE_freeze might take its place, and we'd wrongly
3918          * assume that new SV was already serialized, based on its presence
3919          * in cxt->hseen.
3920          *
3921          * Therefore, push it away in cxt->hook_seen.
3922          */
3923 
3924         av_store(av_hook, AvFILLp(av_hook)+1, SvREFCNT_inc(xsv));
3925 
3926     sv_seen:
3927         /*
3928          * Dispose of the REF they returned.  If we saved the 'xsv' away
3929          * in the array of returned SVs, that will not cause the underlying
3930          * referenced SV to be reclaimed.
3931          */
3932 
3933         ASSERT(SvREFCNT(xsv) > 1, ("SV will survive disposal of its REF"));
3934         SvREFCNT_dec(rsv);		/* Dispose of reference */
3935 
3936         /*
3937          * Replace entry with its tag (not a real SV, so no refcnt increment)
3938          */
3939 
3940 #ifdef USE_PTR_TABLE
3941         tag = (SV *)--fake_tag;
3942 #else
3943         tag = *svh;
3944 #endif
3945         ary[i] = tag;
3946         TRACEME(("listed object %d at 0x%" UVxf " is tag #%" UVuf,
3947                  i-1, PTR2UV(xsv), PTR2UV(tag)));
3948 #ifdef HAS_U64
3949        if ((U32)PTR2TAG(tag) != PTR2TAG(tag))
3950            need_large_oids = 1;
3951 #endif
3952     }
3953 
3954     /*
3955      * Allocate a class ID if not already done.
3956      *
3957      * This needs to be done after the recursion above, since at retrieval
3958      * time, we'll see the inner objects first.  Many thanks to
3959      * Salvador Ortiz Garcia <sog@msg.com.mx> who spot that bug and
3960      * proposed the right fix.  -- RAM, 15/09/2000
3961      */
3962 
3963  check_done:
3964     if (!known_class(aTHX_ cxt, classname, len, &classnum)) {
3965         TRACEME(("first time we see class %s, ID = %d", classname, (int)classnum));
3966         classnum = -1;			/* Mark: we must store classname */
3967     } else {
3968         TRACEME(("already seen class %s, ID = %d", classname, (int)classnum));
3969     }
3970 
3971     /*
3972      * Compute leading flags.
3973      */
3974 
3975     flags = obj_type;
3976     if (((classnum == -1) ? len : classnum) > LG_SCALAR)
3977         flags |= SHF_LARGE_CLASSLEN;
3978     if (classnum != -1)
3979         flags |= SHF_IDX_CLASSNAME;
3980     if (len2 > LG_SCALAR)
3981         flags |= SHF_LARGE_STRLEN;
3982     if (count > 1)
3983         flags |= SHF_HAS_LIST;
3984     if (count > (LG_SCALAR + 1))
3985         flags |= SHF_LARGE_LISTLEN;
3986 #ifdef HAS_U64
3987     if (need_large_oids)
3988         flags |= SHF_LARGE_LISTLEN;
3989 #endif
3990 
3991     /*
3992      * We're ready to emit either serialized form:
3993      *
3994      *   SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
3995      *   SX_HOOK <flags> <index>           <len2> <str> [<len3> <object-IDs>]
3996      *
3997      * If we recursed, the SX_HOOK has already been emitted.
3998      */
3999 
4000     TRACEME(("SX_HOOK (recursed=%d) flags=0x%x "
4001              "class=%" IVdf " len=%" IVdf " len2=%" IVdf " len3=%" IVdf,
4002              recursed, flags, (IV)classnum, (IV)len, (IV)len2, count-1));
4003 
4004     /* SX_HOOK <flags> [<extra>] */
4005     if (!recursed) {
4006 #ifdef HAS_U64
4007         if (len2 > INT32_MAX)
4008 	    PUTMARK(SX_LOBJECT);
4009 #endif
4010 	PUTMARK(SX_HOOK);
4011         PUTMARK(flags);
4012         if (obj_type == SHT_EXTRA)
4013             PUTMARK(eflags);
4014     } else
4015         PUTMARK(flags);
4016 
4017     /* <len> <classname> or <index> */
4018     if (flags & SHF_IDX_CLASSNAME) {
4019         if (flags & SHF_LARGE_CLASSLEN)
4020             WLEN(classnum);
4021         else {
4022             unsigned char cnum = (unsigned char) classnum;
4023             PUTMARK(cnum);
4024         }
4025     } else {
4026         if (flags & SHF_LARGE_CLASSLEN)
4027             WLEN(len);
4028         else {
4029             unsigned char clen = (unsigned char) len;
4030             PUTMARK(clen);
4031         }
4032         WRITE(classname, len);		/* Final \0 is omitted */
4033     }
4034 
4035     /* <len2> <frozen-str> */
4036 #ifdef HAS_U64
4037     if (len2 > INT32_MAX) {
4038         W64LEN(len2);
4039     }
4040     else
4041 #endif
4042     if (flags & SHF_LARGE_STRLEN) {
4043         U32 wlen2 = len2;		/* STRLEN might be 8 bytes */
4044         WLEN(wlen2);			/* Must write an I32 for 64-bit machines */
4045     } else {
4046         unsigned char clen = (unsigned char) len2;
4047         PUTMARK(clen);
4048     }
4049     if (len2)
4050         WRITE(pv, (SSize_t)len2);	/* Final \0 is omitted */
4051 
4052     /* [<len3> <object-IDs>] */
4053     if (flags & SHF_HAS_LIST) {
4054         int len3 = count - 1;
4055         if (flags & SHF_LARGE_LISTLEN) {
4056 #ifdef HAS_U64
4057   	    int tlen3 = need_large_oids ? -len3 : len3;
4058 	    WLEN(tlen3);
4059 #else
4060             WLEN(len3);
4061 #endif
4062 	}
4063         else {
4064             unsigned char clen = (unsigned char) len3;
4065             PUTMARK(clen);
4066         }
4067 
4068         /*
4069          * NOTA BENE, for 64-bit machines: the ary[i] below does not yield a
4070          * real pointer, rather a tag number, well under the 32-bit limit.
4071          * Which is wrong... if we have more than 2**32 SVs we can get ids over
4072          * the 32-bit limit.
4073          */
4074 
4075         for (i = 1; i < count; i++) {
4076 #ifdef HAS_U64
4077             if (need_large_oids) {
4078                 ntag_t tag = PTR2TAG(ary[i]);
4079                 W64LEN(tag);
4080                 TRACEME(("object %d, tag #%" UVuf, i-1, (UV)tag));
4081             }
4082             else
4083 #endif
4084             {
4085                 I32 tagval = htonl(LOW_32BITS(ary[i]));
4086                 WRITE_I32(tagval);
4087                 TRACEME(("object %d, tag #%d", i-1, ntohl(tagval)));
4088             }
4089         }
4090     }
4091 
4092     /*
4093      * Free the array.  We need extra care for indices after 0, since they
4094      * don't hold real SVs but integers cast.
4095      */
4096 
4097     if (count > 1)
4098         AvFILLp(av) = 0;	/* Cheat, nothing after 0 interests us */
4099     av_undef(av);
4100     sv_free((SV *) av);
4101 
4102     /*
4103      * If object was tied, need to insert serialization of the magic object.
4104      */
4105 
4106     if (obj_type == SHT_EXTRA) {
4107         MAGIC *mg;
4108 
4109         if (!(mg = mg_find(sv, mtype))) {
4110             int svt = SvTYPE(sv);
4111             CROAK(("No magic '%c' found while storing ref to tied %s with hook",
4112                    mtype, (svt == SVt_PVHV) ? "hash" :
4113                    (svt == SVt_PVAV) ? "array" : "scalar"));
4114         }
4115 
4116         TRACEME(("handling the magic object 0x%" UVxf " part of 0x%" UVxf,
4117                  PTR2UV(mg->mg_obj), PTR2UV(sv)));
4118 
4119         /*
4120          * [<magic object>]
4121          */
4122         if ((ret = store(aTHX_ cxt, mg->mg_obj)))
4123             return ret;
4124     }
4125 
4126     return 0;
4127 }
4128 
4129 /*
4130  * store_blessed	-- dispatched manually, not via sv_store[]
4131  *
4132  * Check whether there is a STORABLE_xxx hook defined in the class or in one
4133  * of its ancestors.  If there is, then redispatch to store_hook();
4134  *
4135  * Otherwise, the blessed SV is stored using the following layout:
4136  *
4137  *    SX_BLESS <flag> <len> <classname> <object>
4138  *
4139  * where <flag> indicates whether <len> is stored on 0 or 4 bytes, depending
4140  * on the high-order bit in flag: if 1, then length follows on 4 bytes.
4141  * Otherwise, the low order bits give the length, thereby giving a compact
4142  * representation for class names less than 127 chars long.
4143  *
4144  * Each <classname> seen is remembered and indexed, so that the next time
4145  * an object in the blessed in the same <classname> is stored, the following
4146  * will be emitted:
4147  *
4148  *    SX_IX_BLESS <flag> <index> <object>
4149  *
4150  * where <index> is the classname index, stored on 0 or 4 bytes depending
4151  * on the high-order bit in flag (same encoding as above for <len>).
4152  */
store_blessed(pTHX_ stcxt_t * cxt,SV * sv,int type,HV * pkg)4153 static int store_blessed(
4154                          pTHX_
4155                          stcxt_t *cxt,
4156                          SV *sv,
4157                          int type,
4158                          HV *pkg)
4159 {
4160     SV *hook;
4161     char *classname;
4162     I32 len;
4163     I32 classnum;
4164 
4165     TRACEME(("store_blessed, type %d, class \"%s\"", type, HvNAME_get(pkg)));
4166 
4167     /*
4168      * Look for a hook for this blessed SV and redirect to store_hook()
4169      * if needed.
4170      */
4171 
4172     hook = pkg_can(aTHX_ cxt->hook, pkg, "STORABLE_freeze");
4173     if (hook)
4174         return store_hook(aTHX_ cxt, sv, type, pkg, hook);
4175 
4176     /*
4177      * This is a blessed SV without any serialization hook.
4178      */
4179 
4180     classname = HvNAME_get(pkg);
4181     len = strlen(classname);
4182 
4183     TRACEME(("blessed 0x%" UVxf " in %s, no hook: tagged #%d",
4184              PTR2UV(sv), classname, (int)cxt->tagnum));
4185 
4186     /*
4187      * Determine whether it is the first time we see that class name (in which
4188      * case it will be stored in the SX_BLESS form), or whether we already
4189      * saw that class name before (in which case the SX_IX_BLESS form will be
4190      * used).
4191      */
4192 
4193     if (known_class(aTHX_ cxt, classname, len, &classnum)) {
4194         TRACEME(("already seen class %s, ID = %d", classname, (int)classnum));
4195         PUTMARK(SX_IX_BLESS);
4196         if (classnum <= LG_BLESS) {
4197             unsigned char cnum = (unsigned char) classnum;
4198             PUTMARK(cnum);
4199         } else {
4200             unsigned char flag = (unsigned char) 0x80;
4201             PUTMARK(flag);
4202             WLEN(classnum);
4203         }
4204     } else {
4205         TRACEME(("first time we see class %s, ID = %d", classname,
4206                  (int)classnum));
4207         PUTMARK(SX_BLESS);
4208         if (len <= LG_BLESS) {
4209             unsigned char clen = (unsigned char) len;
4210             PUTMARK(clen);
4211         } else {
4212             unsigned char flag = (unsigned char) 0x80;
4213             PUTMARK(flag);
4214             WLEN(len);	/* Don't BER-encode, this should be rare */
4215         }
4216         WRITE(classname, len);	/* Final \0 is omitted */
4217     }
4218 
4219     /*
4220      * Now emit the <object> part.
4221      */
4222 
4223     return SV_STORE(type)(aTHX_ cxt, sv);
4224 }
4225 
4226 /*
4227  * store_other
4228  *
4229  * We don't know how to store the item we reached, so return an error condition.
4230  * (it's probably a GLOB, some CODE reference, etc...)
4231  *
4232  * If they defined the 'forgive_me' variable at the Perl level to some
4233  * true value, then don't croak, just warn, and store a placeholder string
4234  * instead.
4235  */
store_other(pTHX_ stcxt_t * cxt,SV * sv)4236 static int store_other(pTHX_ stcxt_t *cxt, SV *sv)
4237 {
4238     STRLEN len;
4239     char buf[80];
4240 
4241     TRACEME(("store_other"));
4242 
4243     /*
4244      * Fetch the value from perl only once per store() operation.
4245      */
4246 
4247     if (
4248         cxt->forgive_me == 0 ||
4249         (cxt->forgive_me < 0 &&
4250          !(cxt->forgive_me = SvTRUE
4251            (get_sv("Storable::forgive_me", GV_ADD)) ? 1 : 0))
4252 	)
4253         CROAK(("Can't store %s items", sv_reftype(sv, FALSE)));
4254 
4255     warn("Can't store item %s(0x%" UVxf ")",
4256          sv_reftype(sv, FALSE), PTR2UV(sv));
4257 
4258     /*
4259      * Store placeholder string as a scalar instead...
4260      */
4261 
4262     (void) sprintf(buf, "You lost %s(0x%" UVxf ")%c", sv_reftype(sv, FALSE),
4263                    PTR2UV(sv), (char) 0);
4264 
4265     len = strlen(buf);
4266     if (len < 80)
4267         STORE_SCALAR(buf, len);
4268     TRACEME(("ok (dummy \"%s\", length = %" IVdf ")", buf, (IV) len));
4269 
4270     return 0;
4271 }
4272 
4273 /***
4274  *** Store driving routines
4275  ***/
4276 
4277 /*
4278  * sv_type
4279  *
4280  * WARNING: partially duplicates Perl's sv_reftype for speed.
4281  *
4282  * Returns the type of the SV, identified by an integer. That integer
4283  * may then be used to index the dynamic routine dispatch table.
4284  */
sv_type(pTHX_ SV * sv)4285 static int sv_type(pTHX_ SV *sv)
4286 {
4287     switch (SvTYPE(sv)) {
4288     case SVt_NULL:
4289 #if PERL_VERSION <= 10
4290     case SVt_IV:
4291 #endif
4292     case SVt_NV:
4293         /*
4294          * No need to check for ROK, that can't be set here since there
4295          * is no field capable of hodling the xrv_rv reference.
4296          */
4297         return svis_SCALAR;
4298     case SVt_PV:
4299 #if PERL_VERSION <= 10
4300     case SVt_RV:
4301 #else
4302     case SVt_IV:
4303 #endif
4304     case SVt_PVIV:
4305     case SVt_PVNV:
4306         /*
4307          * Starting from SVt_PV, it is possible to have the ROK flag
4308          * set, the pointer to the other SV being either stored in
4309          * the xrv_rv (in the case of a pure SVt_RV), or as the
4310          * xpv_pv field of an SVt_PV and its heirs.
4311          *
4312          * However, those SV cannot be magical or they would be an
4313          * SVt_PVMG at least.
4314          */
4315         return SvROK(sv) ? svis_REF : svis_SCALAR;
4316     case SVt_PVMG:
4317 #if PERL_VERSION <= 10
4318         if ((SvFLAGS(sv) & (SVs_OBJECT|SVf_OK|SVs_GMG|SVs_SMG|SVs_RMG))
4319 	          == (SVs_OBJECT|BFD_Svs_SMG_OR_RMG)
4320 	    && mg_find(sv, PERL_MAGIC_qr)) {
4321 	      return svis_REGEXP;
4322 	}
4323 #endif
4324     case SVt_PVLV:		/* Workaround for perl5.004_04 "LVALUE" bug */
4325         if ((SvFLAGS(sv) & (SVs_GMG|SVs_SMG|SVs_RMG)) ==
4326             (SVs_GMG|SVs_SMG|SVs_RMG) &&
4327             (mg_find(sv, 'p')))
4328             return svis_TIED_ITEM;
4329         /* FALL THROUGH */
4330 #if PERL_VERSION < 9
4331     case SVt_PVBM:
4332 #endif
4333         if ((SvFLAGS(sv) & (SVs_GMG|SVs_SMG|SVs_RMG)) ==
4334             (SVs_GMG|SVs_SMG|SVs_RMG) &&
4335             (mg_find(sv, 'q')))
4336             return svis_TIED;
4337         return SvROK(sv) ? svis_REF : svis_SCALAR;
4338     case SVt_PVAV:
4339         if (SvRMAGICAL(sv) && (mg_find(sv, 'P')))
4340             return svis_TIED;
4341         return svis_ARRAY;
4342     case SVt_PVHV:
4343         if (SvRMAGICAL(sv) && (mg_find(sv, 'P')))
4344             return svis_TIED;
4345         return svis_HASH;
4346     case SVt_PVCV:
4347         return svis_CODE;
4348 #if PERL_VERSION > 8
4349 	/* case SVt_INVLIST: */
4350 #endif
4351 #if PERL_VERSION > 10
4352     case SVt_REGEXP:
4353         return svis_REGEXP;
4354 #endif
4355     default:
4356         break;
4357     }
4358 
4359     return svis_OTHER;
4360 }
4361 
4362 /*
4363  * store
4364  *
4365  * Recursively store objects pointed to by the sv to the specified file.
4366  *
4367  * Layout is <content> or SX_OBJECT <tagnum> if we reach an already stored
4368  * object (one for which storage has started -- it may not be over if we have
4369  * a self-referenced structure). This data set forms a stored <object>.
4370  */
store(pTHX_ stcxt_t * cxt,SV * sv)4371 static int store(pTHX_ stcxt_t *cxt, SV *sv)
4372 {
4373     SV **svh;
4374     int ret;
4375     int type;
4376 #ifdef USE_PTR_TABLE
4377     struct ptr_tbl *pseen = cxt->pseen;
4378 #else
4379     HV *hseen = cxt->hseen;
4380 #endif
4381 
4382     TRACEME(("store (0x%" UVxf ")", PTR2UV(sv)));
4383 
4384     /*
4385      * If object has already been stored, do not duplicate data.
4386      * Simply emit the SX_OBJECT marker followed by its tag data.
4387      * The tag is always written in network order.
4388      *
4389      * NOTA BENE, for 64-bit machines: the "*svh" below does not yield a
4390      * real pointer, rather a tag number (watch the insertion code below).
4391      * That means it probably safe to assume it is well under the 32-bit
4392      * limit, and makes the truncation safe.
4393      *		-- RAM, 14/09/1999
4394      */
4395 
4396 #ifdef USE_PTR_TABLE
4397     svh = (SV **)ptr_table_fetch(pseen, sv);
4398 #else
4399     svh = hv_fetch(hseen, (char *) &sv, sizeof(sv), FALSE);
4400 #endif
4401     if (svh) {
4402 	ntag_t tagval;
4403 	if (sv == &PL_sv_undef) {
4404             /* We have seen PL_sv_undef before, but fake it as
4405                if we have not.
4406 
4407                Not the simplest solution to making restricted
4408                hashes work on 5.8.0, but it does mean that
4409                repeated references to the one true undef will
4410                take up less space in the output file.
4411             */
4412             /* Need to jump past the next hv_store, because on the
4413                second store of undef the old hash value will be
4414                SvREFCNT_dec()ed, and as Storable cheats horribly
4415                by storing non-SVs in the hash a SEGV will ensure.
4416                Need to increase the tag number so that the
4417                receiver has no idea what games we're up to.  This
4418                special casing doesn't affect hooks that store
4419                undef, as the hook routine does its own lookup into
4420                hseen.  Also this means that any references back
4421                to PL_sv_undef (from the pathological case of hooks
4422                storing references to it) will find the seen hash
4423                entry for the first time, as if we didn't have this
4424                hackery here. (That hseen lookup works even on 5.8.0
4425                because it's a key of &PL_sv_undef and a value
4426                which is a tag number, not a value which is
4427                PL_sv_undef.)  */
4428             cxt->tagnum++;
4429             type = svis_SCALAR;
4430             goto undef_special_case;
4431         }
4432 
4433 #ifdef USE_PTR_TABLE
4434 	tagval = PTR2TAG(((char *)svh)-1);
4435 #else
4436 	tagval = PTR2TAG(*svh);
4437 #endif
4438 #ifdef HAS_U64
4439 
4440        /* older versions of Storable streat the tag as a signed value
4441           used in an array lookup, corrupting the data structure.
4442           Ensure only a newer Storable will be able to parse this tag id
4443           if it's over the 2G mark.
4444         */
4445 	if (tagval > I32_MAX) {
4446 
4447 	    TRACEME(("object 0x%" UVxf " seen as #%" UVuf, PTR2UV(sv),
4448 		     (UV)tagval));
4449 
4450 	    PUTMARK(SX_LOBJECT);
4451 	    PUTMARK(SX_OBJECT);
4452 	    W64LEN(tagval);
4453 	    return 0;
4454 	}
4455 	else
4456 #endif
4457 	{
4458 	    I32 ltagval;
4459 
4460 	    ltagval = htonl((I32)tagval);
4461 
4462 	    TRACEME(("object 0x%" UVxf " seen as #%d", PTR2UV(sv),
4463 		     ntohl(ltagval)));
4464 
4465 	    PUTMARK(SX_OBJECT);
4466 	    WRITE_I32(ltagval);
4467 	    return 0;
4468 	}
4469     }
4470 
4471     /*
4472      * Allocate a new tag and associate it with the address of the sv being
4473      * stored, before recursing...
4474      *
4475      * In order to avoid creating new SvIVs to hold the tagnum we just
4476      * cast the tagnum to an SV pointer and store that in the hash.  This
4477      * means that we must clean up the hash manually afterwards, but gives
4478      * us a 15% throughput increase.
4479      *
4480      */
4481 
4482     cxt->tagnum++;
4483 #ifdef USE_PTR_TABLE
4484     ptr_table_store(pseen, sv, INT2PTR(SV*, 1 + cxt->tagnum));
4485 #else
4486     if (!hv_store(hseen,
4487                   (char *) &sv, sizeof(sv), INT2PTR(SV*, cxt->tagnum), 0))
4488         return -1;
4489 #endif
4490 
4491     /*
4492      * Store 'sv' and everything beneath it, using appropriate routine.
4493      * Abort immediately if we get a non-zero status back.
4494      */
4495 
4496     type = sv_type(aTHX_ sv);
4497 
4498  undef_special_case:
4499     TRACEME(("storing 0x%" UVxf " tag #%d, type %d...",
4500              PTR2UV(sv), (int)cxt->tagnum, (int)type));
4501 
4502     if (SvOBJECT(sv)) {
4503         HV *pkg = SvSTASH(sv);
4504         ret = store_blessed(aTHX_ cxt, sv, type, pkg);
4505     } else
4506         ret = SV_STORE(type)(aTHX_ cxt, sv);
4507 
4508     TRACEME(("%s (stored 0x%" UVxf ", refcnt=%d, %s)",
4509              ret ? "FAILED" : "ok", PTR2UV(sv),
4510              (int)SvREFCNT(sv), sv_reftype(sv, FALSE)));
4511 
4512     return ret;
4513 }
4514 
4515 /*
4516  * magic_write
4517  *
4518  * Write magic number and system information into the file.
4519  * Layout is <magic> <network> [<len> <byteorder> <sizeof int> <sizeof long>
4520  * <sizeof ptr>] where <len> is the length of the byteorder hexa string.
4521  * All size and lengths are written as single characters here.
4522  *
4523  * Note that no byte ordering info is emitted when <network> is true, since
4524  * integers will be emitted in network order in that case.
4525  */
magic_write(pTHX_ stcxt_t * cxt)4526 static int magic_write(pTHX_ stcxt_t *cxt)
4527 {
4528     /*
4529      * Starting with 0.6, the "use_network_order" byte flag is also used to
4530      * indicate the version number of the binary image, encoded in the upper
4531      * bits. The bit 0 is always used to indicate network order.
4532      */
4533     /*
4534      * Starting with 0.7, a full byte is dedicated to the minor version of
4535      * the binary format, which is incremented only when new markers are
4536      * introduced, for instance, but when backward compatibility is preserved.
4537      */
4538 
4539     /* Make these at compile time.  The WRITE() macro is sufficiently complex
4540        that it saves about 200 bytes doing it this way and only using it
4541        once.  */
4542     static const unsigned char network_file_header[] = {
4543         MAGICSTR_BYTES,
4544         (STORABLE_BIN_MAJOR << 1) | 1,
4545         STORABLE_BIN_WRITE_MINOR
4546     };
4547     static const unsigned char file_header[] = {
4548         MAGICSTR_BYTES,
4549         (STORABLE_BIN_MAJOR << 1) | 0,
4550         STORABLE_BIN_WRITE_MINOR,
4551         /* sizeof the array includes the 0 byte at the end:  */
4552         (char) sizeof (byteorderstr) - 1,
4553         BYTEORDER_BYTES,
4554         (unsigned char) sizeof(int),
4555         (unsigned char) sizeof(long),
4556         (unsigned char) sizeof(char *),
4557         (unsigned char) sizeof(NV)
4558     };
4559 #ifdef USE_56_INTERWORK_KLUDGE
4560     static const unsigned char file_header_56[] = {
4561         MAGICSTR_BYTES,
4562         (STORABLE_BIN_MAJOR << 1) | 0,
4563         STORABLE_BIN_WRITE_MINOR,
4564         /* sizeof the array includes the 0 byte at the end:  */
4565         (char) sizeof (byteorderstr_56) - 1,
4566         BYTEORDER_BYTES_56,
4567         (unsigned char) sizeof(int),
4568         (unsigned char) sizeof(long),
4569         (unsigned char) sizeof(char *),
4570         (unsigned char) sizeof(NV)
4571     };
4572 #endif
4573     const unsigned char *header;
4574     SSize_t length;
4575 
4576     TRACEME(("magic_write on fd=%d", cxt->fio ? PerlIO_fileno(cxt->fio) : -1));
4577 
4578     if (cxt->netorder) {
4579         header = network_file_header;
4580         length = sizeof (network_file_header);
4581     } else {
4582 #ifdef USE_56_INTERWORK_KLUDGE
4583         if (SvTRUE(get_sv("Storable::interwork_56_64bit", GV_ADD))) {
4584             header = file_header_56;
4585             length = sizeof (file_header_56);
4586         } else
4587 #endif
4588             {
4589                 header = file_header;
4590                 length = sizeof (file_header);
4591             }
4592     }
4593 
4594     if (!cxt->fio) {
4595         /* sizeof the array includes the 0 byte at the end.  */
4596         header += sizeof (magicstr) - 1;
4597         length -= sizeof (magicstr) - 1;
4598     }
4599 
4600     WRITE( (unsigned char*) header, length);
4601 
4602     if (!cxt->netorder) {
4603         TRACEME(("ok (magic_write byteorder = 0x%lx [%d], I%d L%d P%d D%d)",
4604                  (unsigned long) BYTEORDER, (int) sizeof (byteorderstr) - 1,
4605                  (int) sizeof(int), (int) sizeof(long),
4606                  (int) sizeof(char *), (int) sizeof(NV)));
4607     }
4608     return 0;
4609 }
4610 
4611 /*
4612  * do_store
4613  *
4614  * Common code for store operations.
4615  *
4616  * When memory store is requested (f = NULL) and a non null SV* is given in
4617  * 'res', it is filled with a new SV created out of the memory buffer.
4618  *
4619  * It is required to provide a non-null 'res' when the operation type is not
4620  * dclone() and store() is performed to memory.
4621  */
do_store(pTHX_ PerlIO * f,SV * sv,int optype,int network_order,SV ** res)4622 static int do_store(pTHX_
4623 	PerlIO *f,
4624         SV *sv,
4625         int optype,
4626         int network_order,
4627         SV **res)
4628 {
4629     dSTCXT;
4630     int status;
4631 
4632     ASSERT(!(f == 0 && !(optype & ST_CLONE)) || res,
4633            ("must supply result SV pointer for real recursion to memory"));
4634 
4635     TRACEMED(("do_store (optype=%d, netorder=%d)",
4636              optype, network_order));
4637 
4638     optype |= ST_STORE;
4639 
4640     /*
4641      * Workaround for CROAK leak: if they enter with a "dirty" context,
4642      * free up memory for them now.
4643      */
4644 
4645     assert(cxt);
4646     if (cxt->s_dirty)
4647         clean_context(aTHX_ cxt);
4648 
4649     /*
4650      * Now that STORABLE_xxx hooks exist, it is possible that they try to
4651      * re-enter store() via the hooks.  We need to stack contexts.
4652      */
4653 
4654     if (cxt->entry)
4655         cxt = allocate_context(aTHX_ cxt);
4656 
4657     INIT_TRACEME;
4658 
4659     cxt->entry++;
4660 
4661     ASSERT(cxt->entry == 1, ("starting new recursion"));
4662     ASSERT(!cxt->s_dirty, ("clean context"));
4663 
4664     /*
4665      * Ensure sv is actually a reference. From perl, we called something
4666      * like:
4667      *       pstore(aTHX_ FILE, \@array);
4668      * so we must get the scalar value behind that reference.
4669      */
4670 
4671     if (!SvROK(sv))
4672         CROAK(("Not a reference"));
4673     sv = SvRV(sv);		/* So follow it to know what to store */
4674 
4675     /*
4676      * If we're going to store to memory, reset the buffer.
4677      */
4678 
4679     if (!f)
4680         MBUF_INIT(0);
4681 
4682     /*
4683      * Prepare context and emit headers.
4684      */
4685 
4686     init_store_context(aTHX_ cxt, f, optype, network_order);
4687 
4688     if (-1 == magic_write(aTHX_ cxt))	/* Emit magic and ILP info */
4689         return 0;			/* Error */
4690 
4691     /*
4692      * Recursively store object...
4693      */
4694 
4695     ASSERT(is_storing(aTHX), ("within store operation"));
4696 
4697     status = store(aTHX_ cxt, sv);	/* Just do it! */
4698 
4699     /*
4700      * If they asked for a memory store and they provided an SV pointer,
4701      * make an SV string out of the buffer and fill their pointer.
4702      *
4703      * When asking for ST_REAL, it's MANDATORY for the caller to provide
4704      * an SV, since context cleanup might free the buffer if we did recurse.
4705      * (unless caller is dclone(), which is aware of that).
4706      */
4707 
4708     if (!cxt->fio && res)
4709         *res = mbuf2sv(aTHX);
4710 
4711     TRACEME(("do_store returns %d", status));
4712 
4713     /*
4714      * Final cleanup.
4715      *
4716      * The "root" context is never freed, since it is meant to be always
4717      * handy for the common case where no recursion occurs at all (i.e.
4718      * we enter store() outside of any Storable code and leave it, period).
4719      * We know it's the "root" context because there's nothing stacked
4720      * underneath it.
4721      *
4722      * OPTIMIZATION:
4723      *
4724      * When deep cloning, we don't free the context: doing so would force
4725      * us to copy the data in the memory buffer.  Sicne we know we're
4726      * about to enter do_retrieve...
4727      */
4728 
4729     clean_store_context(aTHX_ cxt);
4730     if (cxt->prev && !(cxt->optype & ST_CLONE))
4731         free_context(aTHX_ cxt);
4732 
4733     return status == 0;
4734 }
4735 
4736 /***
4737  *** Memory stores.
4738  ***/
4739 
4740 /*
4741  * mbuf2sv
4742  *
4743  * Build a new SV out of the content of the internal memory buffer.
4744  */
mbuf2sv(pTHX)4745 static SV *mbuf2sv(pTHX)
4746 {
4747     dSTCXT;
4748 
4749     assert(cxt);
4750     return newSVpv(mbase, MBUF_SIZE());
4751 }
4752 
4753 /***
4754  *** Specific retrieve callbacks.
4755  ***/
4756 
4757 /*
4758  * retrieve_other
4759  *
4760  * Return an error via croak, since it is not possible that we get here
4761  * under normal conditions, when facing a file produced via pstore().
4762  */
retrieve_other(pTHX_ stcxt_t * cxt,const char * cname)4763 static SV *retrieve_other(pTHX_ stcxt_t *cxt, const char *cname)
4764 {
4765     PERL_UNUSED_ARG(cname);
4766     if (
4767         cxt->ver_major != STORABLE_BIN_MAJOR &&
4768         cxt->ver_minor != STORABLE_BIN_MINOR
4769 	) {
4770         CROAK(("Corrupted storable %s (binary v%d.%d), current is v%d.%d",
4771                cxt->fio ? "file" : "string",
4772                cxt->ver_major, cxt->ver_minor,
4773                STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR));
4774     } else {
4775         CROAK(("Corrupted storable %s (binary v%d.%d)",
4776                cxt->fio ? "file" : "string",
4777                cxt->ver_major, cxt->ver_minor));
4778     }
4779 
4780     return (SV *) 0;		/* Just in case */
4781 }
4782 
4783 /*
4784  * retrieve_idx_blessed
4785  *
4786  * Layout is SX_IX_BLESS <index> <object> with SX_IX_BLESS already read.
4787  * <index> can be coded on either 1 or 5 bytes.
4788  */
retrieve_idx_blessed(pTHX_ stcxt_t * cxt,const char * cname)4789 static SV *retrieve_idx_blessed(pTHX_ stcxt_t *cxt, const char *cname)
4790 {
4791     I32 idx;
4792     const char *classname;
4793     SV **sva;
4794     SV *sv;
4795 
4796     PERL_UNUSED_ARG(cname);
4797     TRACEME(("retrieve_idx_blessed (#%d)", (int)cxt->tagnum));
4798     ASSERT(!cname, ("no bless-into class given here, got %s", cname));
4799 
4800     GETMARK(idx);			/* Index coded on a single char? */
4801     if (idx & 0x80)
4802         RLEN(idx);
4803 
4804     /*
4805      * Fetch classname in 'aclass'
4806      */
4807 
4808     sva = av_fetch(cxt->aclass, idx, FALSE);
4809     if (!sva)
4810         CROAK(("Class name #%" IVdf " should have been seen already",
4811                (IV) idx));
4812 
4813     classname = SvPVX(*sva);	/* We know it's a PV, by construction */
4814 
4815     TRACEME(("class ID %d => %s", (int)idx, classname));
4816 
4817     /*
4818      * Retrieve object and bless it.
4819      */
4820 
4821     sv = retrieve(aTHX_ cxt, classname); /* First SV which is SEEN
4822                                             will be blessed */
4823 
4824     return sv;
4825 }
4826 
4827 /*
4828  * retrieve_blessed
4829  *
4830  * Layout is SX_BLESS <len> <classname> <object> with SX_BLESS already read.
4831  * <len> can be coded on either 1 or 5 bytes.
4832  */
retrieve_blessed(pTHX_ stcxt_t * cxt,const char * cname)4833 static SV *retrieve_blessed(pTHX_ stcxt_t *cxt, const char *cname)
4834 {
4835     U32 len;
4836     SV *sv;
4837     char buf[LG_BLESS + 1];		/* Avoid malloc() if possible */
4838     char *classname = buf;
4839     char *malloced_classname = NULL;
4840 
4841     PERL_UNUSED_ARG(cname);
4842     TRACEME(("retrieve_blessed (#%d)", (int)cxt->tagnum));
4843     ASSERT(!cname, ("no bless-into class given here, got %s", cname));
4844 
4845     /*
4846      * Decode class name length and read that name.
4847      *
4848      * Short classnames have two advantages: their length is stored on one
4849      * single byte, and the string can be read on the stack.
4850      */
4851 
4852     GETMARK(len);			/* Length coded on a single char? */
4853     if (len & 0x80) {
4854         RLEN(len);
4855         TRACEME(("** allocating %ld bytes for class name", (long)len+1));
4856         if (len > I32_MAX)
4857             CROAK(("Corrupted classname length %lu", (long)len));
4858         PL_nomemok = TRUE; /* handle error by ourselves */
4859         New(10003, classname, len+1, char);
4860         PL_nomemok = FALSE;
4861         if (!classname)
4862             CROAK(("Out of memory with len %ld", (long)len));
4863         PL_nomemok = FALSE;
4864         malloced_classname = classname;
4865     }
4866     SAFEPVREAD(classname, (I32)len, malloced_classname);
4867     classname[len] = '\0';		/* Mark string end */
4868 
4869     /*
4870      * It's a new classname, otherwise it would have been an SX_IX_BLESS.
4871      */
4872 
4873     TRACEME(("new class name \"%s\" will bear ID = %d", classname,
4874              (int)cxt->classnum));
4875 
4876     if (!av_store(cxt->aclass, cxt->classnum++, newSVpvn(classname, len))) {
4877         Safefree(malloced_classname);
4878         return (SV *) 0;
4879     }
4880 
4881     /*
4882      * Retrieve object and bless it.
4883      */
4884 
4885     sv = retrieve(aTHX_ cxt, classname); /* First SV which is SEEN will be blessed */
4886     if (malloced_classname)
4887         Safefree(malloced_classname);
4888 
4889     return sv;
4890 }
4891 
4892 /*
4893  * retrieve_hook
4894  *
4895  * Layout: SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
4896  * with leading mark already read, as usual.
4897  *
4898  * When recursion was involved during serialization of the object, there
4899  * is an unknown amount of serialized objects after the SX_HOOK mark.  Until
4900  * we reach a <flags> marker with the recursion bit cleared.
4901  *
4902  * If the first <flags> byte contains a type of SHT_EXTRA, then the real type
4903  * is held in the <extra> byte, and if the object is tied, the serialized
4904  * magic object comes at the very end:
4905  *
4906  *     SX_HOOK <flags> <extra> ... [<len3> <object-IDs>] <magic object>
4907  *
4908  * This means the STORABLE_thaw hook will NOT get a tied variable during its
4909  * processing (since we won't have seen the magic object by the time the hook
4910  * is called).  See comments below for why it was done that way.
4911  */
retrieve_hook_common(pTHX_ stcxt_t * cxt,const char * cname,int large)4912 static SV *retrieve_hook_common(pTHX_ stcxt_t *cxt, const char *cname, int large)
4913 {
4914     U32 len;
4915     char buf[LG_BLESS + 1];		/* Avoid malloc() if possible */
4916     char *classname = buf;
4917     unsigned int flags;
4918     STRLEN len2;
4919     SV *frozen;
4920     I32 len3 = 0;
4921     AV *av = 0;
4922     SV *hook;
4923     SV *sv;
4924     SV *rv;
4925     GV *attach;
4926     HV *stash;
4927     int obj_type;
4928     int clone = cxt->optype & ST_CLONE;
4929     char mtype = '\0';
4930     unsigned int extra_type = 0;
4931 #ifdef HAS_U64
4932     int has_large_oids = 0;
4933 #endif
4934 
4935     PERL_UNUSED_ARG(cname);
4936     TRACEME(("retrieve_hook (#%d)", (int)cxt->tagnum));
4937     ASSERT(!cname, ("no bless-into class given here, got %s", cname));
4938 
4939 #ifndef HAS_U64
4940     assert(!large);
4941     PERL_UNUSED_ARG(large);
4942 #endif
4943 
4944     /*
4945      * Read flags, which tell us about the type, and whether we need
4946      * to recurse.
4947      */
4948 
4949     GETMARK(flags);
4950 
4951     /*
4952      * Create the (empty) object, and mark it as seen.
4953      *
4954      * This must be done now, because tags are incremented, and during
4955      * serialization, the object tag was affected before recursion could
4956      * take place.
4957      */
4958 
4959     obj_type = flags & SHF_TYPE_MASK;
4960     switch (obj_type) {
4961     case SHT_SCALAR:
4962         sv = newSV(0);
4963         break;
4964     case SHT_ARRAY:
4965         sv = (SV *) newAV();
4966         break;
4967     case SHT_HASH:
4968         sv = (SV *) newHV();
4969         break;
4970     case SHT_EXTRA:
4971         /*
4972          * Read <extra> flag to know the type of the object.
4973          * Record associated magic type for later.
4974          */
4975         GETMARK(extra_type);
4976         switch (extra_type) {
4977         case SHT_TSCALAR:
4978             sv = newSV(0);
4979             mtype = 'q';
4980             break;
4981         case SHT_TARRAY:
4982             sv = (SV *) newAV();
4983             mtype = 'P';
4984             break;
4985         case SHT_THASH:
4986             sv = (SV *) newHV();
4987             mtype = 'P';
4988             break;
4989         default:
4990             return retrieve_other(aTHX_ cxt, 0);/* Let it croak */
4991         }
4992         break;
4993     default:
4994         return retrieve_other(aTHX_ cxt, 0);	/* Let it croak */
4995     }
4996     SEEN0_NN(sv, 0);				/* Don't bless yet */
4997 
4998     /*
4999      * Whilst flags tell us to recurse, do so.
5000      *
5001      * We don't need to remember the addresses returned by retrieval, because
5002      * all the references will be obtained through indirection via the object
5003      * tags in the object-ID list.
5004      *
5005      * We need to decrement the reference count for these objects
5006      * because, if the user doesn't save a reference to them in the hook,
5007      * they must be freed when this context is cleaned.
5008      */
5009 
5010     while (flags & SHF_NEED_RECURSE) {
5011         TRACEME(("retrieve_hook recursing..."));
5012         rv = retrieve(aTHX_ cxt, 0);
5013         if (!rv)
5014             return (SV *) 0;
5015         SvREFCNT_dec(rv);
5016         TRACEME(("retrieve_hook back with rv=0x%" UVxf,
5017                  PTR2UV(rv)));
5018         GETMARK(flags);
5019     }
5020 
5021     if (flags & SHF_IDX_CLASSNAME) {
5022         SV **sva;
5023         I32 idx;
5024 
5025         /*
5026          * Fetch index from 'aclass'
5027          */
5028 
5029         if (flags & SHF_LARGE_CLASSLEN)
5030             RLEN(idx);
5031         else
5032             GETMARK(idx);
5033 
5034         sva = av_fetch(cxt->aclass, idx, FALSE);
5035         if (!sva)
5036             CROAK(("Class name #%" IVdf " should have been seen already",
5037                    (IV) idx));
5038 
5039         classname = SvPVX(*sva);	/* We know it's a PV, by construction */
5040         TRACEME(("class ID %d => %s", (int)idx, classname));
5041 
5042     } else {
5043         /*
5044          * Decode class name length and read that name.
5045          *
5046          * NOTA BENE: even if the length is stored on one byte, we don't read
5047          * on the stack.  Just like retrieve_blessed(), we limit the name to
5048          * LG_BLESS bytes.  This is an arbitrary decision.
5049          */
5050         char *malloced_classname = NULL;
5051 
5052         if (flags & SHF_LARGE_CLASSLEN)
5053             RLEN(len);
5054         else
5055             GETMARK(len);
5056 
5057         TRACEME(("** allocating %ld bytes for class name", (long)len+1));
5058         if (len > I32_MAX) /* security */
5059             CROAK(("Corrupted classname length %lu", (long)len));
5060         else if (len > LG_BLESS) { /* security: signed len */
5061             PL_nomemok = TRUE;     /* handle error by ourselves */
5062             New(10003, classname, len+1, char);
5063             PL_nomemok = FALSE;
5064             if (!classname)
5065                 CROAK(("Out of memory with len %u", (unsigned)len+1));
5066             malloced_classname = classname;
5067         }
5068 
5069         SAFEPVREAD(classname, (I32)len, malloced_classname);
5070         classname[len] = '\0';		/* Mark string end */
5071 
5072         /*
5073          * Record new classname.
5074          */
5075 
5076         if (!av_store(cxt->aclass, cxt->classnum++,
5077                       newSVpvn(classname, len))) {
5078             Safefree(malloced_classname);
5079             return (SV *) 0;
5080         }
5081     }
5082 
5083     TRACEME(("class name: %s", classname));
5084 
5085     /*
5086      * Decode user-frozen string length and read it in an SV.
5087      *
5088      * For efficiency reasons, we read data directly into the SV buffer.
5089      * To understand that code, read retrieve_scalar()
5090      */
5091 
5092 #ifdef HAS_U64
5093     if (large) {
5094         READ_U64(len2);
5095     }
5096     else
5097 #endif
5098     if (flags & SHF_LARGE_STRLEN) {
5099         U32 len32;
5100         RLEN(len32);
5101         len2 = len32;
5102     }
5103     else
5104         GETMARK(len2);
5105 
5106     frozen = NEWSV(10002, len2 ? len2 : 1);
5107     if (len2) {
5108         SAFEREAD(SvPVX(frozen), len2, frozen);
5109     }
5110     SvCUR_set(frozen, len2);
5111     *SvEND(frozen) = '\0';
5112     (void) SvPOK_only(frozen);		/* Validates string pointer */
5113     if (cxt->s_tainted)			/* Is input source tainted? */
5114         SvTAINT(frozen);
5115 
5116     TRACEME(("frozen string: %d bytes", (int)len2));
5117 
5118     /*
5119      * Decode object-ID list length, if present.
5120      */
5121 
5122     if (flags & SHF_HAS_LIST) {
5123         if (flags & SHF_LARGE_LISTLEN) {
5124             RLEN(len3);
5125 	    if (len3 < 0) {
5126 #ifdef HAS_U64
5127 	        ++has_large_oids;
5128 		len3 = -len3;
5129 #else
5130 		CROAK(("Large object ids in hook data not supported on 32-bit platforms"));
5131 #endif
5132 
5133 	    }
5134 	}
5135 	else
5136             GETMARK(len3);
5137         if (len3) {
5138             av = newAV();
5139             av_extend(av, len3 + 1);	/* Leave room for [0] */
5140             AvFILLp(av) = len3;		/* About to be filled anyway */
5141         }
5142     }
5143 
5144     TRACEME(("has %d object IDs to link", (int)len3));
5145 
5146     /*
5147      * Read object-ID list into array.
5148      * Because we pre-extended it, we can cheat and fill it manually.
5149      *
5150      * We read object tags and we can convert them into SV* on the fly
5151      * because we know all the references listed in there (as tags)
5152      * have been already serialized, hence we have a valid correspondence
5153      * between each of those tags and the recreated SV.
5154      */
5155 
5156     if (av) {
5157         SV **ary = AvARRAY(av);
5158         int i;
5159         for (i = 1; i <= len3; i++) {	/* We leave [0] alone */
5160             ntag_t tag;
5161             SV **svh;
5162             SV *xsv;
5163 
5164 #ifdef HAS_U64
5165 	    if (has_large_oids) {
5166 		READ_U64(tag);
5167 	    }
5168 	    else {
5169 		U32 tmp;
5170 		READ_I32(tmp);
5171 		tag = ntohl(tmp);
5172 	    }
5173 #else
5174 	    READ_I32(tag);
5175 	    tag = ntohl(tag);
5176 #endif
5177 
5178             svh = av_fetch(cxt->aseen, tag, FALSE);
5179             if (!svh) {
5180                 if (tag == cxt->where_is_undef) {
5181                     /* av_fetch uses PL_sv_undef internally, hence this
5182                        somewhat gruesome hack. */
5183                     xsv = &PL_sv_undef;
5184                     svh = &xsv;
5185                 } else {
5186                     CROAK(("Object #%" IVdf
5187                            " should have been retrieved already",
5188                            (IV) tag));
5189                 }
5190             }
5191             xsv = *svh;
5192             ary[i] = SvREFCNT_inc(xsv);
5193         }
5194     }
5195 
5196     /*
5197      * Look up the STORABLE_attach hook
5198      * If blessing is disabled, just return what we've got.
5199      */
5200     if (!(cxt->flags & FLAG_BLESS_OK)) {
5201         TRACEME(("skipping bless because flags is %d", cxt->flags));
5202         return sv;
5203     }
5204 
5205     /*
5206      * Bless the object and look up the STORABLE_thaw hook.
5207      */
5208     stash = gv_stashpv(classname, GV_ADD);
5209 
5210     /* Handle attach case; again can't use pkg_can because it only
5211      * caches one method */
5212     attach = gv_fetchmethod_autoload(stash, "STORABLE_attach", FALSE);
5213     if (attach && isGV(attach)) {
5214         SV* attached;
5215         SV* attach_hook = newRV_inc((SV*) GvCV(attach));
5216 
5217         if (av)
5218             CROAK(("STORABLE_attach called with unexpected references"));
5219         av = newAV();
5220         av_extend(av, 1);
5221         AvFILLp(av) = 0;
5222         AvARRAY(av)[0] = SvREFCNT_inc(frozen);
5223         rv = newSVpv(classname, 0);
5224         attached = scalar_call(aTHX_ rv, attach_hook, clone, av, G_SCALAR);
5225         /* Free memory after a call */
5226         SvREFCNT_dec(rv);
5227         SvREFCNT_dec(frozen);
5228         av_undef(av);
5229         sv_free((SV *) av);
5230         SvREFCNT_dec(attach_hook);
5231         if (attached &&
5232             SvROK(attached) &&
5233             sv_derived_from(attached, classname)
5234             ) {
5235             UNSEE();
5236             /* refcnt of unneeded sv is 2 at this point
5237                (one from newHV, second from SEEN call) */
5238             SvREFCNT_dec(sv);
5239             SvREFCNT_dec(sv);
5240             /* we need to free RV but preserve value that RV point to */
5241             sv = SvRV(attached);
5242             SEEN0_NN(sv, 0);
5243             SvRV_set(attached, NULL);
5244             SvREFCNT_dec(attached);
5245             if (!(flags & SHF_IDX_CLASSNAME) && classname != buf)
5246                 Safefree(classname);
5247             return sv;
5248         }
5249         CROAK(("STORABLE_attach did not return a %s object", classname));
5250     }
5251 
5252     /*
5253      * Bless the object and look up the STORABLE_thaw hook.
5254      */
5255 
5256     BLESS(sv, stash);
5257 
5258     hook = pkg_can(aTHX_ cxt->hook, stash, "STORABLE_thaw");
5259     if (!hook) {
5260         /*
5261          * Hook not found.  Maybe they did not require the module where this
5262          * hook is defined yet?
5263          *
5264          * If the load below succeeds, we'll be able to find the hook.
5265          * Still, it only works reliably when each class is defined in a
5266          * file of its own.
5267          */
5268 
5269         TRACEME(("No STORABLE_thaw defined for objects of class %s", classname));
5270         TRACEME(("Going to load module '%s'", classname));
5271         load_module(PERL_LOADMOD_NOIMPORT, newSVpv(classname, 0), Nullsv);
5272 
5273         /*
5274          * We cache results of pkg_can, so we need to uncache before attempting
5275          * the lookup again.
5276          */
5277 
5278         pkg_uncache(aTHX_ cxt->hook, SvSTASH(sv), "STORABLE_thaw");
5279         hook = pkg_can(aTHX_ cxt->hook, SvSTASH(sv), "STORABLE_thaw");
5280 
5281         if (!hook)
5282             CROAK(("No STORABLE_thaw defined for objects of class %s "
5283                    "(even after a \"require %s;\")", classname, classname));
5284     }
5285 
5286     /*
5287      * If we don't have an 'av' yet, prepare one.
5288      * Then insert the frozen string as item [0].
5289      */
5290 
5291     if (!av) {
5292         av = newAV();
5293         av_extend(av, 1);
5294         AvFILLp(av) = 0;
5295     }
5296     AvARRAY(av)[0] = SvREFCNT_inc(frozen);
5297 
5298     /*
5299      * Call the hook as:
5300      *
5301      *   $object->STORABLE_thaw($cloning, $frozen, @refs);
5302      *
5303      * where $object is our blessed (empty) object, $cloning is a boolean
5304      * telling whether we're running a deep clone, $frozen is the frozen
5305      * string the user gave us in his serializing hook, and @refs, which may
5306      * be empty, is the list of extra references he returned along for us
5307      * to serialize.
5308      *
5309      * In effect, the hook is an alternate creation routine for the class,
5310      * the object itself being already created by the runtime.
5311      */
5312 
5313     TRACEME(("calling STORABLE_thaw on %s at 0x%" UVxf " (%" IVdf " args)",
5314              classname, PTR2UV(sv), (IV) AvFILLp(av) + 1));
5315 
5316     rv = newRV_inc(sv);
5317     (void) scalar_call(aTHX_ rv, hook, clone, av, G_SCALAR|G_DISCARD);
5318     SvREFCNT_dec(rv);
5319 
5320     /*
5321      * Final cleanup.
5322      */
5323 
5324     SvREFCNT_dec(frozen);
5325     av_undef(av);
5326     sv_free((SV *) av);
5327     if (!(flags & SHF_IDX_CLASSNAME) && classname != buf)
5328         Safefree(classname);
5329 
5330     /*
5331      * If we had an <extra> type, then the object was not as simple, and
5332      * we need to restore extra magic now.
5333      */
5334 
5335     if (!extra_type)
5336         return sv;
5337 
5338     TRACEME(("retrieving magic object for 0x%" UVxf "...", PTR2UV(sv)));
5339 
5340     rv = retrieve(aTHX_ cxt, 0);	/* Retrieve <magic object> */
5341 
5342     TRACEME(("restoring the magic object 0x%" UVxf " part of 0x%" UVxf,
5343              PTR2UV(rv), PTR2UV(sv)));
5344 
5345     switch (extra_type) {
5346     case SHT_TSCALAR:
5347         sv_upgrade(sv, SVt_PVMG);
5348         break;
5349     case SHT_TARRAY:
5350         sv_upgrade(sv, SVt_PVAV);
5351         AvREAL_off((AV *)sv);
5352         break;
5353     case SHT_THASH:
5354         sv_upgrade(sv, SVt_PVHV);
5355         break;
5356     default:
5357         CROAK(("Forgot to deal with extra type %d", extra_type));
5358         break;
5359     }
5360 
5361     /*
5362      * Adding the magic only now, well after the STORABLE_thaw hook was called
5363      * means the hook cannot know it deals with an object whose variable is
5364      * tied.  But this is happening when retrieving $o in the following case:
5365      *
5366      *	my %h;
5367      *  tie %h, 'FOO';
5368      *	my $o = bless \%h, 'BAR';
5369      *
5370      * The 'BAR' class is NOT the one where %h is tied into.  Therefore, as
5371      * far as the 'BAR' class is concerned, the fact that %h is not a REAL
5372      * hash but a tied one should not matter at all, and remain transparent.
5373      * This means the magic must be restored by Storable AFTER the hook is
5374      * called.
5375      *
5376      * That looks very reasonable to me, but then I've come up with this
5377      * after a bug report from David Nesting, who was trying to store such
5378      * an object and caused Storable to fail.  And unfortunately, it was
5379      * also the easiest way to retrofit support for blessed ref to tied objects
5380      * into the existing design.  -- RAM, 17/02/2001
5381      */
5382 
5383     sv_magic(sv, rv, mtype, (char *)NULL, 0);
5384     SvREFCNT_dec(rv);			/* Undo refcnt inc from sv_magic() */
5385 
5386     return sv;
5387 }
5388 
retrieve_hook(pTHX_ stcxt_t * cxt,const char * cname)5389 static SV *retrieve_hook(pTHX_ stcxt_t *cxt, const char *cname) {
5390     return retrieve_hook_common(aTHX_ cxt, cname, FALSE);
5391 }
5392 
5393 /*
5394  * retrieve_ref
5395  *
5396  * Retrieve reference to some other scalar.
5397  * Layout is SX_REF <object>, with SX_REF already read.
5398  */
retrieve_ref(pTHX_ stcxt_t * cxt,const char * cname)5399 static SV *retrieve_ref(pTHX_ stcxt_t *cxt, const char *cname)
5400 {
5401     SV *rv;
5402     SV *sv;
5403     HV *stash;
5404 
5405     TRACEME(("retrieve_ref (#%d)", (int)cxt->tagnum));
5406 
5407     /*
5408      * We need to create the SV that holds the reference to the yet-to-retrieve
5409      * object now, so that we may record the address in the seen table.
5410      * Otherwise, if the object to retrieve references us, we won't be able
5411      * to resolve the SX_OBJECT we'll see at that point! Hence we cannot
5412      * do the retrieve first and use rv = newRV(sv) since it will be too late
5413      * for SEEN() recording.
5414      */
5415 
5416     rv = NEWSV(10002, 0);
5417     if (cname)
5418         stash = gv_stashpv(cname, GV_ADD);
5419     else
5420         stash = 0;
5421     SEEN_NN(rv, stash, 0);	/* Will return if rv is null */
5422     sv = retrieve(aTHX_ cxt, 0);/* Retrieve <object> */
5423     if (!sv)
5424         return (SV *) 0;	/* Failed */
5425 
5426     /*
5427      * WARNING: breaks RV encapsulation.
5428      *
5429      * Now for the tricky part. We have to upgrade our existing SV, so that
5430      * it is now an RV on sv... Again, we cheat by duplicating the code
5431      * held in newSVrv(), since we already got our SV from retrieve().
5432      *
5433      * We don't say:
5434      *
5435      *		SvRV(rv) = SvREFCNT_inc(sv);
5436      *
5437      * here because the reference count we got from retrieve() above is
5438      * already correct: if the object was retrieved from the file, then
5439      * its reference count is one. Otherwise, if it was retrieved via
5440      * an SX_OBJECT indication, a ref count increment was done.
5441      */
5442 
5443     if (cname) {
5444         /* No need to do anything, as rv will already be PVMG.  */
5445         assert (SvTYPE(rv) == SVt_RV || SvTYPE(rv) >= SVt_PV);
5446     } else {
5447         sv_upgrade(rv, SVt_RV);
5448     }
5449 
5450     SvRV_set(rv, sv);		/* $rv = \$sv */
5451     SvROK_on(rv);
5452     /*if (cxt->entry && ++cxt->ref_cnt > MAX_REF_CNT) {
5453         CROAK(("Max. recursion depth with nested refs exceeded"));
5454     }*/
5455 
5456     TRACEME(("ok (retrieve_ref at 0x%" UVxf ")", PTR2UV(rv)));
5457 
5458     return rv;
5459 }
5460 
5461 /*
5462  * retrieve_weakref
5463  *
5464  * Retrieve weak reference to some other scalar.
5465  * Layout is SX_WEAKREF <object>, with SX_WEAKREF already read.
5466  */
retrieve_weakref(pTHX_ stcxt_t * cxt,const char * cname)5467 static SV *retrieve_weakref(pTHX_ stcxt_t *cxt, const char *cname)
5468 {
5469     SV *sv;
5470 
5471     TRACEME(("retrieve_weakref (#%d)", (int)cxt->tagnum));
5472 
5473     sv = retrieve_ref(aTHX_ cxt, cname);
5474     if (sv) {
5475 #ifdef SvWEAKREF
5476         sv_rvweaken(sv);
5477 #else
5478         WEAKREF_CROAK();
5479 #endif
5480     }
5481     return sv;
5482 }
5483 
5484 /*
5485  * retrieve_overloaded
5486  *
5487  * Retrieve reference to some other scalar with overloading.
5488  * Layout is SX_OVERLOAD <object>, with SX_OVERLOAD already read.
5489  */
retrieve_overloaded(pTHX_ stcxt_t * cxt,const char * cname)5490 static SV *retrieve_overloaded(pTHX_ stcxt_t *cxt, const char *cname)
5491 {
5492     SV *rv;
5493     SV *sv;
5494     HV *stash;
5495 
5496     TRACEME(("retrieve_overloaded (#%d)", (int)cxt->tagnum));
5497 
5498     /*
5499      * Same code as retrieve_ref(), duplicated to avoid extra call.
5500      */
5501 
5502     rv = NEWSV(10002, 0);
5503     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5504     SEEN_NN(rv, stash, 0);	     /* Will return if rv is null */
5505     cxt->in_retrieve_overloaded = 1; /* so sv_bless doesn't call S_reset_amagic */
5506     sv = retrieve(aTHX_ cxt, 0);     /* Retrieve <object> */
5507     cxt->in_retrieve_overloaded = 0;
5508     if (!sv)
5509         return (SV *) 0;	/* Failed */
5510 
5511     /*
5512      * WARNING: breaks RV encapsulation.
5513      */
5514 
5515     SvUPGRADE(rv, SVt_RV);
5516     SvRV_set(rv, sv);		/* $rv = \$sv */
5517     SvROK_on(rv);
5518 
5519     /*
5520      * Restore overloading magic.
5521      */
5522 
5523     stash = SvTYPE(sv) ? (HV *) SvSTASH (sv) : 0;
5524     if (!stash) {
5525         CROAK(("Cannot restore overloading on %s(0x%" UVxf
5526                ") (package <unknown>)",
5527                sv_reftype(sv, FALSE),
5528                PTR2UV(sv)));
5529     }
5530     if (!Gv_AMG(stash)) {
5531         const char *package = HvNAME_get(stash);
5532         TRACEME(("No overloading defined for package %s", package));
5533         TRACEME(("Going to load module '%s'", package));
5534         load_module(PERL_LOADMOD_NOIMPORT, newSVpv(package, 0), Nullsv);
5535         if (!Gv_AMG(stash)) {
5536             CROAK(("Cannot restore overloading on %s(0x%" UVxf
5537                    ") (package %s) (even after a \"require %s;\")",
5538                    sv_reftype(sv, FALSE),
5539                    PTR2UV(sv),
5540                    package, package));
5541         }
5542     }
5543 
5544     SvAMAGIC_on(rv);
5545 
5546     TRACEME(("ok (retrieve_overloaded at 0x%" UVxf ")", PTR2UV(rv)));
5547 
5548     return rv;
5549 }
5550 
5551 /*
5552  * retrieve_weakoverloaded
5553  *
5554  * Retrieve weak overloaded reference to some other scalar.
5555  * Layout is SX_WEAKOVERLOADED <object>, with SX_WEAKOVERLOADED already read.
5556  */
retrieve_weakoverloaded(pTHX_ stcxt_t * cxt,const char * cname)5557 static SV *retrieve_weakoverloaded(pTHX_ stcxt_t *cxt, const char *cname)
5558 {
5559     SV *sv;
5560 
5561     TRACEME(("retrieve_weakoverloaded (#%d)", (int)cxt->tagnum));
5562 
5563     sv = retrieve_overloaded(aTHX_ cxt, cname);
5564     if (sv) {
5565 #ifdef SvWEAKREF
5566         sv_rvweaken(sv);
5567 #else
5568         WEAKREF_CROAK();
5569 #endif
5570     }
5571     return sv;
5572 }
5573 
5574 /*
5575  * retrieve_tied_array
5576  *
5577  * Retrieve tied array
5578  * Layout is SX_TIED_ARRAY <object>, with SX_TIED_ARRAY already read.
5579  */
retrieve_tied_array(pTHX_ stcxt_t * cxt,const char * cname)5580 static SV *retrieve_tied_array(pTHX_ stcxt_t *cxt, const char *cname)
5581 {
5582     SV *tv;
5583     SV *sv;
5584     HV *stash;
5585 
5586     TRACEME(("retrieve_tied_array (#%d)", (int)cxt->tagnum));
5587 
5588     if (!(cxt->flags & FLAG_TIE_OK)) {
5589         CROAK(("Tying is disabled."));
5590     }
5591 
5592     tv = NEWSV(10002, 0);
5593     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5594     SEEN_NN(tv, stash, 0);		/* Will return if tv is null */
5595     sv = retrieve(aTHX_ cxt, 0);	/* Retrieve <object> */
5596     if (!sv)
5597         return (SV *) 0;		/* Failed */
5598 
5599     sv_upgrade(tv, SVt_PVAV);
5600     sv_magic(tv, sv, 'P', (char *)NULL, 0);
5601     SvREFCNT_dec(sv);			/* Undo refcnt inc from sv_magic() */
5602 
5603     TRACEME(("ok (retrieve_tied_array at 0x%" UVxf ")", PTR2UV(tv)));
5604 
5605     return tv;
5606 }
5607 
5608 /*
5609  * retrieve_tied_hash
5610  *
5611  * Retrieve tied hash
5612  * Layout is SX_TIED_HASH <object>, with SX_TIED_HASH already read.
5613  */
retrieve_tied_hash(pTHX_ stcxt_t * cxt,const char * cname)5614 static SV *retrieve_tied_hash(pTHX_ stcxt_t *cxt, const char *cname)
5615 {
5616     SV *tv;
5617     SV *sv;
5618     HV *stash;
5619 
5620     TRACEME(("retrieve_tied_hash (#%d)", (int)cxt->tagnum));
5621 
5622     if (!(cxt->flags & FLAG_TIE_OK)) {
5623         CROAK(("Tying is disabled."));
5624     }
5625 
5626     tv = NEWSV(10002, 0);
5627     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5628     SEEN_NN(tv, stash, 0);		/* Will return if tv is null */
5629     sv = retrieve(aTHX_ cxt, 0);	/* Retrieve <object> */
5630     if (!sv)
5631         return (SV *) 0;		/* Failed */
5632 
5633     sv_upgrade(tv, SVt_PVHV);
5634     sv_magic(tv, sv, 'P', (char *)NULL, 0);
5635     SvREFCNT_dec(sv);			/* Undo refcnt inc from sv_magic() */
5636 
5637     TRACEME(("ok (retrieve_tied_hash at 0x%" UVxf ")", PTR2UV(tv)));
5638 
5639     return tv;
5640 }
5641 
5642 /*
5643  * retrieve_tied_scalar
5644  *
5645  * Retrieve tied scalar
5646  * Layout is SX_TIED_SCALAR <object>, with SX_TIED_SCALAR already read.
5647  */
retrieve_tied_scalar(pTHX_ stcxt_t * cxt,const char * cname)5648 static SV *retrieve_tied_scalar(pTHX_ stcxt_t *cxt, const char *cname)
5649 {
5650     SV *tv;
5651     SV *sv, *obj = NULL;
5652     HV *stash;
5653 
5654     TRACEME(("retrieve_tied_scalar (#%d)", (int)cxt->tagnum));
5655 
5656     if (!(cxt->flags & FLAG_TIE_OK)) {
5657         CROAK(("Tying is disabled."));
5658     }
5659 
5660     tv = NEWSV(10002, 0);
5661     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5662     SEEN_NN(tv, stash, 0);		/* Will return if rv is null */
5663     sv = retrieve(aTHX_ cxt, 0);	/* Retrieve <object> */
5664     if (!sv) {
5665         return (SV *) 0;		/* Failed */
5666     }
5667     else if (SvTYPE(sv) != SVt_NULL) {
5668         obj = sv;
5669     }
5670 
5671     sv_upgrade(tv, SVt_PVMG);
5672     sv_magic(tv, obj, 'q', (char *)NULL, 0);
5673 
5674     if (obj) {
5675         /* Undo refcnt inc from sv_magic() */
5676         SvREFCNT_dec(obj);
5677     }
5678 
5679     TRACEME(("ok (retrieve_tied_scalar at 0x%" UVxf ")", PTR2UV(tv)));
5680 
5681     return tv;
5682 }
5683 
5684 /*
5685  * retrieve_tied_key
5686  *
5687  * Retrieve reference to value in a tied hash.
5688  * Layout is SX_TIED_KEY <object> <key>, with SX_TIED_KEY already read.
5689  */
retrieve_tied_key(pTHX_ stcxt_t * cxt,const char * cname)5690 static SV *retrieve_tied_key(pTHX_ stcxt_t *cxt, const char *cname)
5691 {
5692     SV *tv;
5693     SV *sv;
5694     SV *key;
5695     HV *stash;
5696 
5697     TRACEME(("retrieve_tied_key (#%d)", (int)cxt->tagnum));
5698 
5699     if (!(cxt->flags & FLAG_TIE_OK)) {
5700         CROAK(("Tying is disabled."));
5701     }
5702 
5703     tv = NEWSV(10002, 0);
5704     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5705     SEEN_NN(tv, stash, 0);		/* Will return if tv is null */
5706     sv = retrieve(aTHX_ cxt, 0);	/* Retrieve <object> */
5707     if (!sv)
5708         return (SV *) 0;		/* Failed */
5709 
5710     key = retrieve(aTHX_ cxt, 0);	/* Retrieve <key> */
5711     if (!key)
5712         return (SV *) 0;		/* Failed */
5713 
5714     sv_upgrade(tv, SVt_PVMG);
5715     sv_magic(tv, sv, 'p', (char *)key, HEf_SVKEY);
5716     SvREFCNT_dec(key);			/* Undo refcnt inc from sv_magic() */
5717     SvREFCNT_dec(sv);			/* Undo refcnt inc from sv_magic() */
5718 
5719     return tv;
5720 }
5721 
5722 /*
5723  * retrieve_tied_idx
5724  *
5725  * Retrieve reference to value in a tied array.
5726  * Layout is SX_TIED_IDX <object> <idx>, with SX_TIED_IDX already read.
5727  */
retrieve_tied_idx(pTHX_ stcxt_t * cxt,const char * cname)5728 static SV *retrieve_tied_idx(pTHX_ stcxt_t *cxt, const char *cname)
5729 {
5730     SV *tv;
5731     SV *sv;
5732     HV *stash;
5733     I32 idx;
5734 
5735     TRACEME(("retrieve_tied_idx (#%d)", (int)cxt->tagnum));
5736 
5737     if (!(cxt->flags & FLAG_TIE_OK)) {
5738         CROAK(("Tying is disabled."));
5739     }
5740 
5741     tv = NEWSV(10002, 0);
5742     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5743     SEEN_NN(tv, stash, 0);		/* Will return if tv is null */
5744     sv = retrieve(aTHX_ cxt, 0);	/* Retrieve <object> */
5745     if (!sv)
5746         return (SV *) 0;		/* Failed */
5747 
5748     RLEN(idx);				/* Retrieve <idx> */
5749 
5750     sv_upgrade(tv, SVt_PVMG);
5751     sv_magic(tv, sv, 'p', (char *)NULL, idx);
5752     SvREFCNT_dec(sv);			/* Undo refcnt inc from sv_magic() */
5753 
5754     return tv;
5755 }
5756 
5757 /*
5758  * get_lstring
5759  *
5760  * Helper to read a string
5761  */
get_lstring(pTHX_ stcxt_t * cxt,UV len,int isutf8,const char * cname)5762 static SV *get_lstring(pTHX_ stcxt_t *cxt, UV len, int isutf8, const char *cname)
5763 {
5764     SV *sv;
5765     HV *stash;
5766 
5767     TRACEME(("get_lstring (#%d), len = %" UVuf, (int)cxt->tagnum, len));
5768 
5769     /*
5770      * Allocate an empty scalar of the suitable length.
5771      */
5772 
5773     sv = NEWSV(10002, len);
5774     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5775     SEEN_NN(sv, stash, 0);	/* Associate this new scalar with tag "tagnum" */
5776 
5777     if (len ==  0) {
5778         SvPVCLEAR(sv);
5779         return sv;
5780     }
5781 
5782     /*
5783      * WARNING: duplicates parts of sv_setpv and breaks SV data encapsulation.
5784      *
5785      * Now, for efficiency reasons, read data directly inside the SV buffer,
5786      * and perform the SV final settings directly by duplicating the final
5787      * work done by sv_setpv. Since we're going to allocate lots of scalars
5788      * this way, it's worth the hassle and risk.
5789      */
5790 
5791     SAFEREAD(SvPVX(sv), len, sv);
5792     SvCUR_set(sv, len);			/* Record C string length */
5793     *SvEND(sv) = '\0';			/* Ensure it's null terminated anyway */
5794     (void) SvPOK_only(sv);		/* Validate string pointer */
5795     if (cxt->s_tainted)			/* Is input source tainted? */
5796         SvTAINT(sv);			/* External data cannot be trusted */
5797 
5798     /* Check for CVE-215-1592 */
5799     if (cname && len == 13 && strEQc(cname, "CGITempFile")
5800         && strEQc(SvPVX(sv), "mt-config.cgi")) {
5801 #if defined(USE_CPERL) && defined(WARN_SECURITY)
5802         Perl_warn_security(aTHX_
5803             "Movable-Type CVE-2015-1592 Storable metasploit attack");
5804 #else
5805         Perl_warn(aTHX_
5806             "SECURITY: Movable-Type CVE-2015-1592 Storable metasploit attack");
5807 #endif
5808     }
5809 
5810     if (isutf8) {
5811         TRACEME(("large utf8 string len %" UVuf " '%s'", len,
5812                  len >= 2048 ? "<string too long>" : SvPVX(sv)));
5813 #ifdef HAS_UTF8_SCALARS
5814         SvUTF8_on(sv);
5815 #else
5816         if (cxt->use_bytes < 0)
5817             cxt->use_bytes
5818                 = (SvTRUE(get_sv("Storable::drop_utf8", GV_ADD))
5819                    ? 1 : 0);
5820         if (cxt->use_bytes == 0)
5821             UTF8_CROAK();
5822 #endif
5823     } else {
5824         TRACEME(("large string len %" UVuf " '%s'", len,
5825                  len >= 2048 ? "<string too long>" : SvPVX(sv)));
5826     }
5827     TRACEME(("ok (get_lstring at 0x%" UVxf ")", PTR2UV(sv)));
5828 
5829     return sv;
5830 }
5831 
5832 /*
5833  * retrieve_lscalar
5834  *
5835  * Retrieve defined long (string) scalar.
5836  *
5837  * Layout is SX_LSCALAR <length> <data>, with SX_LSCALAR already read.
5838  * The scalar is "long" in that <length> is larger than LG_SCALAR so it
5839  * was not stored on a single byte, but in 4 bytes. For strings longer than
5840  * 4 byte (>2GB) see retrieve_lobject.
5841  */
retrieve_lscalar(pTHX_ stcxt_t * cxt,const char * cname)5842 static SV *retrieve_lscalar(pTHX_ stcxt_t *cxt, const char *cname)
5843 {
5844     U32 len;
5845     RLEN(len);
5846     return get_lstring(aTHX_ cxt, len, 0, cname);
5847 }
5848 
5849 /*
5850  * retrieve_scalar
5851  *
5852  * Retrieve defined short (string) scalar.
5853  *
5854  * Layout is SX_SCALAR <length> <data>, with SX_SCALAR already read.
5855  * The scalar is "short" so <length> is single byte. If it is 0, there
5856  * is no <data> section.
5857  */
retrieve_scalar(pTHX_ stcxt_t * cxt,const char * cname)5858 static SV *retrieve_scalar(pTHX_ stcxt_t *cxt, const char *cname)
5859 {
5860     int len;
5861     /*SV *sv;
5862       HV *stash;*/
5863 
5864     GETMARK(len);
5865     TRACEME(("retrieve_scalar (#%d), len = %d", (int)cxt->tagnum, len));
5866     return get_lstring(aTHX_ cxt, (UV)len, 0, cname);
5867 }
5868 
5869 /*
5870  * retrieve_utf8str
5871  *
5872  * Like retrieve_scalar(), but tag result as utf8.
5873  * If we're retrieving UTF8 data in a non-UTF8 perl, croaks.
5874  */
retrieve_utf8str(pTHX_ stcxt_t * cxt,const char * cname)5875 static SV *retrieve_utf8str(pTHX_ stcxt_t *cxt, const char *cname)
5876 {
5877     int len;
5878     /*SV *sv;*/
5879 
5880     TRACEME(("retrieve_utf8str"));
5881     GETMARK(len);
5882     return get_lstring(aTHX_ cxt, (UV)len, 1, cname);
5883 }
5884 
5885 /*
5886  * retrieve_lutf8str
5887  *
5888  * Like retrieve_lscalar(), but tag result as utf8.
5889  * If we're retrieving UTF8 data in a non-UTF8 perl, croaks.
5890  */
retrieve_lutf8str(pTHX_ stcxt_t * cxt,const char * cname)5891 static SV *retrieve_lutf8str(pTHX_ stcxt_t *cxt, const char *cname)
5892 {
5893     U32 len;
5894 
5895     TRACEME(("retrieve_lutf8str"));
5896 
5897     RLEN(len);
5898     return get_lstring(aTHX_ cxt, (UV)len, 1, cname);
5899 }
5900 
5901 /*
5902  * retrieve_vstring
5903  *
5904  * Retrieve a vstring, and then retrieve the stringy scalar following it,
5905  * attaching the vstring to the scalar via magic.
5906  * If we're retrieving a vstring in a perl without vstring magic, croaks.
5907  *
5908  * The vstring layout mirrors an SX_SCALAR string:
5909  * SX_VSTRING <length> <data> with SX_VSTRING already read.
5910  */
retrieve_vstring(pTHX_ stcxt_t * cxt,const char * cname)5911 static SV *retrieve_vstring(pTHX_ stcxt_t *cxt, const char *cname)
5912 {
5913 #ifdef SvVOK
5914     char s[256];
5915     int len;
5916     SV *sv;
5917 
5918     GETMARK(len);
5919     TRACEME(("retrieve_vstring (#%d), len = %d", (int)cxt->tagnum, len));
5920 
5921     READ(s, len);
5922     sv = retrieve(aTHX_ cxt, cname);
5923     if (!sv)
5924         return (SV *) 0;		/* Failed */
5925     sv_magic(sv,NULL,PERL_MAGIC_vstring,s,len);
5926     /* 5.10.0 and earlier seem to need this */
5927     SvRMAGICAL_on(sv);
5928 
5929     TRACEME(("ok (retrieve_vstring at 0x%" UVxf ")", PTR2UV(sv)));
5930     return sv;
5931 #else
5932     VSTRING_CROAK();
5933     return Nullsv;
5934 #endif
5935 }
5936 
5937 /*
5938  * retrieve_lvstring
5939  *
5940  * Like retrieve_vstring, but for longer vstrings.
5941  */
retrieve_lvstring(pTHX_ stcxt_t * cxt,const char * cname)5942 static SV *retrieve_lvstring(pTHX_ stcxt_t *cxt, const char *cname)
5943 {
5944 #ifdef SvVOK
5945     char *s;
5946     U32 len;
5947     SV *sv;
5948 
5949     RLEN(len);
5950     TRACEME(("retrieve_lvstring (#%d), len = %" UVuf,
5951              (int)cxt->tagnum, (UV)len));
5952 
5953     /* Since we'll no longer produce such large vstrings, reject them
5954        here too.
5955     */
5956     if (len >= I32_MAX) {
5957         CROAK(("vstring too large to fetch"));
5958     }
5959 
5960     New(10003, s, len+1, char);
5961     SAFEPVREAD(s, (I32)len, s);
5962 
5963     sv = retrieve(aTHX_ cxt, cname);
5964     if (!sv) {
5965         Safefree(s);
5966         return (SV *) 0;		/* Failed */
5967     }
5968     sv_magic(sv,NULL,PERL_MAGIC_vstring,s,len);
5969     /* 5.10.0 and earlier seem to need this */
5970     SvRMAGICAL_on(sv);
5971 
5972     Safefree(s);
5973 
5974     TRACEME(("ok (retrieve_lvstring at 0x%" UVxf ")", PTR2UV(sv)));
5975     return sv;
5976 #else
5977     VSTRING_CROAK();
5978     return Nullsv;
5979 #endif
5980 }
5981 
5982 /*
5983  * retrieve_integer
5984  *
5985  * Retrieve defined integer.
5986  * Layout is SX_INTEGER <data>, whith SX_INTEGER already read.
5987  */
retrieve_integer(pTHX_ stcxt_t * cxt,const char * cname)5988 static SV *retrieve_integer(pTHX_ stcxt_t *cxt, const char *cname)
5989 {
5990     SV *sv;
5991     HV *stash;
5992     IV iv;
5993 
5994     TRACEME(("retrieve_integer (#%d)", (int)cxt->tagnum));
5995 
5996     READ(&iv, sizeof(iv));
5997     sv = newSViv(iv);
5998     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
5999     SEEN_NN(sv, stash, 0);	/* Associate this new scalar with tag "tagnum" */
6000 
6001     TRACEME(("integer %" IVdf, iv));
6002     TRACEME(("ok (retrieve_integer at 0x%" UVxf ")", PTR2UV(sv)));
6003 
6004     return sv;
6005 }
6006 
6007 /*
6008  * retrieve_lobject
6009  *
6010  * Retrieve overlong scalar, array or hash.
6011  * Layout is SX_LOBJECT type U64_len ...
6012  */
retrieve_lobject(pTHX_ stcxt_t * cxt,const char * cname)6013 static SV *retrieve_lobject(pTHX_ stcxt_t *cxt, const char *cname)
6014 {
6015     int type;
6016 #ifdef HAS_U64
6017     UV  len;
6018     SV *sv;
6019     int hash_flags = 0;
6020 #endif
6021 
6022     TRACEME(("retrieve_lobject (#%d)", (int)cxt->tagnum));
6023 
6024     GETMARK(type);
6025     TRACEME(("object type %d", type));
6026 #ifdef HAS_U64
6027 
6028     if (type == SX_FLAG_HASH) {
6029 	/* we write the flags immediately after the op.  I could have
6030 	   changed the writer, but this may allow someone to recover
6031 	   data they're already frozen, though such a very large hash
6032 	   seems unlikely.
6033 	*/
6034 	GETMARK(hash_flags);
6035     }
6036     else if (type == SX_HOOK) {
6037         return retrieve_hook_common(aTHX_ cxt, cname, TRUE);
6038     }
6039 
6040     READ_U64(len);
6041     TRACEME(("wlen %" UVuf, len));
6042     switch (type) {
6043     case SX_OBJECT:
6044         {
6045             /* not a large object, just a large index */
6046             SV **svh = av_fetch(cxt->aseen, len, FALSE);
6047             if (!svh)
6048                 CROAK(("Object #%" UVuf " should have been retrieved already",
6049                       len));
6050             sv = *svh;
6051             TRACEME(("had retrieved #%" UVuf " at 0x%" UVxf, len, PTR2UV(sv)));
6052             SvREFCNT_inc(sv);
6053         }
6054         break;
6055     case SX_LSCALAR:
6056         sv = get_lstring(aTHX_ cxt, len, 0, cname);
6057         break;
6058     case SX_LUTF8STR:
6059         sv = get_lstring(aTHX_ cxt, len, 1, cname);
6060         break;
6061     case SX_ARRAY:
6062         sv = get_larray(aTHX_ cxt, len, cname);
6063         break;
6064     /* <5.12 you could store larger hashes, but cannot iterate over them.
6065        So we reject them, it's a bug. */
6066     case SX_FLAG_HASH:
6067         sv = get_lhash(aTHX_ cxt, len, hash_flags, cname);
6068         break;
6069     case SX_HASH:
6070         sv = get_lhash(aTHX_ cxt, len, 0, cname);
6071         break;
6072     default:
6073         CROAK(("Unexpected type %d in retrieve_lobject\n", type));
6074     }
6075 
6076     TRACEME(("ok (retrieve_lobject at 0x%" UVxf ")", PTR2UV(sv)));
6077     return sv;
6078 #else
6079     PERL_UNUSED_ARG(cname);
6080 
6081     /* previously this (brokenly) checked the length value and only failed if
6082        the length was over 4G.
6083        Since this op should only occur with objects over 4GB (or 2GB) we can just
6084        reject it.
6085     */
6086     CROAK(("Invalid large object op for this 32bit system"));
6087 #endif
6088 }
6089 
6090 /*
6091  * retrieve_netint
6092  *
6093  * Retrieve defined integer in network order.
6094  * Layout is SX_NETINT <data>, whith SX_NETINT already read.
6095  */
retrieve_netint(pTHX_ stcxt_t * cxt,const char * cname)6096 static SV *retrieve_netint(pTHX_ stcxt_t *cxt, const char *cname)
6097 {
6098     SV *sv;
6099     HV *stash;
6100     I32 iv;
6101 
6102     TRACEME(("retrieve_netint (#%d)", (int)cxt->tagnum));
6103 
6104     READ_I32(iv);
6105 #ifdef HAS_NTOHL
6106     sv = newSViv((int) ntohl(iv));
6107     TRACEME(("network integer %d", (int) ntohl(iv)));
6108 #else
6109     sv = newSViv(iv);
6110     TRACEME(("network integer (as-is) %d", iv));
6111 #endif
6112     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6113     SEEN_NN(sv, stash, 0);	/* Associate this new scalar with tag "tagnum" */
6114 
6115     TRACEME(("ok (retrieve_netint at 0x%" UVxf ")", PTR2UV(sv)));
6116 
6117     return sv;
6118 }
6119 
6120 /*
6121  * retrieve_double
6122  *
6123  * Retrieve defined double.
6124  * Layout is SX_DOUBLE <data>, whith SX_DOUBLE already read.
6125  */
retrieve_double(pTHX_ stcxt_t * cxt,const char * cname)6126 static SV *retrieve_double(pTHX_ stcxt_t *cxt, const char *cname)
6127 {
6128     SV *sv;
6129     HV *stash;
6130     NV nv;
6131 
6132     TRACEME(("retrieve_double (#%d)", (int)cxt->tagnum));
6133 
6134     READ(&nv, sizeof(nv));
6135     sv = newSVnv(nv);
6136     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6137     SEEN_NN(sv, stash, 0);	/* Associate this new scalar with tag "tagnum" */
6138 
6139     TRACEME(("double %" NVff, nv));
6140     TRACEME(("ok (retrieve_double at 0x%" UVxf ")", PTR2UV(sv)));
6141 
6142     return sv;
6143 }
6144 
6145 /*
6146  * retrieve_byte
6147  *
6148  * Retrieve defined byte (small integer within the [-128, +127] range).
6149  * Layout is SX_BYTE <data>, whith SX_BYTE already read.
6150  */
retrieve_byte(pTHX_ stcxt_t * cxt,const char * cname)6151 static SV *retrieve_byte(pTHX_ stcxt_t *cxt, const char *cname)
6152 {
6153     SV *sv;
6154     HV *stash;
6155     int siv;
6156 #ifdef _MSC_VER
6157     /* MSVC 2017 doesn't handle the AIX workaround well */
6158     int tmp;
6159 #else
6160     signed char tmp;	/* Workaround for AIX cc bug --H.Merijn Brand */
6161 #endif
6162 
6163     TRACEME(("retrieve_byte (#%d)", (int)cxt->tagnum));
6164 
6165     GETMARK(siv);
6166     TRACEME(("small integer read as %d", (unsigned char) siv));
6167     tmp = (unsigned char) siv - 128;
6168     sv = newSViv(tmp);
6169     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6170     SEEN_NN(sv, stash, 0);	/* Associate this new scalar with tag "tagnum" */
6171 
6172     TRACEME(("byte %d", tmp));
6173     TRACEME(("ok (retrieve_byte at 0x%" UVxf ")", PTR2UV(sv)));
6174 
6175     return sv;
6176 }
6177 
6178 /*
6179  * retrieve_undef
6180  *
6181  * Return the undefined value.
6182  */
retrieve_undef(pTHX_ stcxt_t * cxt,const char * cname)6183 static SV *retrieve_undef(pTHX_ stcxt_t *cxt, const char *cname)
6184 {
6185     SV *sv;
6186     HV *stash;
6187 
6188     TRACEME(("retrieve_undef"));
6189 
6190     sv = newSV(0);
6191     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6192     SEEN_NN(sv, stash, 0);
6193 
6194     return sv;
6195 }
6196 
6197 /*
6198  * retrieve_sv_undef
6199  *
6200  * Return the immortal undefined value.
6201  */
retrieve_sv_undef(pTHX_ stcxt_t * cxt,const char * cname)6202 static SV *retrieve_sv_undef(pTHX_ stcxt_t *cxt, const char *cname)
6203 {
6204     SV *sv = &PL_sv_undef;
6205     HV *stash;
6206 
6207     TRACEME(("retrieve_sv_undef"));
6208 
6209     /* Special case PL_sv_undef, as av_fetch uses it internally to mark
6210        deleted elements, and will return NULL (fetch failed) whenever it
6211        is fetched.  */
6212     if (cxt->where_is_undef == UNSET_NTAG_T) {
6213         cxt->where_is_undef = cxt->tagnum;
6214     }
6215     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6216     SEEN_NN(sv, stash, 1);
6217     return sv;
6218 }
6219 
6220 /*
6221  * retrieve_sv_yes
6222  *
6223  * Return the immortal yes value.
6224  */
retrieve_sv_yes(pTHX_ stcxt_t * cxt,const char * cname)6225 static SV *retrieve_sv_yes(pTHX_ stcxt_t *cxt, const char *cname)
6226 {
6227     SV *sv = &PL_sv_yes;
6228     HV *stash;
6229 
6230     TRACEME(("retrieve_sv_yes"));
6231 
6232     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6233     SEEN_NN(sv, stash, 1);
6234     return sv;
6235 }
6236 
6237 /*
6238  * retrieve_sv_no
6239  *
6240  * Return the immortal no value.
6241  */
retrieve_sv_no(pTHX_ stcxt_t * cxt,const char * cname)6242 static SV *retrieve_sv_no(pTHX_ stcxt_t *cxt, const char *cname)
6243 {
6244     SV *sv = &PL_sv_no;
6245     HV *stash;
6246 
6247     TRACEME(("retrieve_sv_no"));
6248 
6249     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6250     SEEN_NN(sv, stash, 1);
6251     return sv;
6252 }
6253 
6254 /*
6255  * retrieve_svundef_elem
6256  *
6257  * Return &PL_sv_placeholder, representing &PL_sv_undef in an array.  This
6258  * is a bit of a hack, but we already use SX_SV_UNDEF to mean a nonexistent
6259  * element, for historical reasons.
6260  */
retrieve_svundef_elem(pTHX_ stcxt_t * cxt,const char * cname)6261 static SV *retrieve_svundef_elem(pTHX_ stcxt_t *cxt, const char *cname)
6262 {
6263     TRACEME(("retrieve_svundef_elem"));
6264 
6265     /* SEEN reads the contents of its SV argument, which we are not
6266        supposed to do with &PL_sv_placeholder. */
6267     SEEN_NN(&PL_sv_undef, cname, 1);
6268 
6269     return &PL_sv_placeholder;
6270 }
6271 
6272 /*
6273  * retrieve_array
6274  *
6275  * Retrieve a whole array.
6276  * Layout is SX_ARRAY <size> followed by each item, in increasing index order.
6277  * Each item is stored as <object>.
6278  *
6279  * When we come here, SX_ARRAY has been read already.
6280  */
retrieve_array(pTHX_ stcxt_t * cxt,const char * cname)6281 static SV *retrieve_array(pTHX_ stcxt_t *cxt, const char *cname)
6282 {
6283     I32 len, i;
6284     AV *av;
6285     SV *sv;
6286     HV *stash;
6287     bool seen_null = FALSE;
6288 
6289     TRACEME(("retrieve_array (#%d)", (int)cxt->tagnum));
6290 
6291     /*
6292      * Read length, and allocate array, then pre-extend it.
6293      */
6294 
6295     RLEN(len);
6296     TRACEME(("size = %d", (int)len));
6297     av = newAV();
6298     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6299     SEEN_NN(av, stash, 0); /* Will return if array not allocated nicely */
6300     if (len)
6301         av_extend(av, len);
6302     else
6303         return (SV *) av;	/* No data follow if array is empty */
6304 
6305     /*
6306      * Now get each item in turn...
6307      */
6308 
6309     for (i = 0; i < len; i++) {
6310         TRACEME(("(#%d) item", (int)i));
6311         sv = retrieve(aTHX_ cxt, 0);	/* Retrieve item */
6312         if (!sv)
6313             return (SV *) 0;
6314         if (sv == &PL_sv_undef) {
6315             seen_null = TRUE;
6316             continue;
6317         }
6318         if (sv == &PL_sv_placeholder)
6319             sv = &PL_sv_undef;
6320         if (av_store(av, i, sv) == 0)
6321             return (SV *) 0;
6322     }
6323     if (seen_null) av_fill(av, len-1);
6324 
6325     TRACEME(("ok (retrieve_array at 0x%" UVxf ")", PTR2UV(av)));
6326 
6327     return (SV *) av;
6328 }
6329 
6330 #ifdef HAS_U64
6331 
6332 /* internal method with len already read */
6333 
get_larray(pTHX_ stcxt_t * cxt,UV len,const char * cname)6334 static SV *get_larray(pTHX_ stcxt_t *cxt, UV len, const char *cname)
6335 {
6336     UV i;
6337     AV *av;
6338     SV *sv;
6339     HV *stash;
6340     bool seen_null = FALSE;
6341 
6342     TRACEME(("get_larray (#%d) %lu", (int)cxt->tagnum, (unsigned long)len));
6343 
6344     /*
6345      * allocate array, then pre-extend it.
6346      */
6347 
6348     av = newAV();
6349     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6350     SEEN_NN(av, stash, 0); /* Will return if array not allocated nicely */
6351     assert(len);
6352     av_extend(av, len);
6353 
6354     /*
6355      * Now get each item in turn...
6356      */
6357 
6358     for (i = 0; i < len; i++) {
6359         TRACEME(("(#%d) item", (int)i));
6360         sv = retrieve(aTHX_ cxt, 0);		/* Retrieve item */
6361         if (!sv)
6362             return (SV *) 0;
6363         if (sv == &PL_sv_undef) {
6364             seen_null = TRUE;
6365             continue;
6366         }
6367         if (sv == &PL_sv_placeholder)
6368             sv = &PL_sv_undef;
6369         if (av_store(av, i, sv) == 0)
6370             return (SV *) 0;
6371     }
6372     if (seen_null) av_fill(av, len-1);
6373 
6374     TRACEME(("ok (get_larray at 0x%" UVxf ")", PTR2UV(av)));
6375 
6376     return (SV *) av;
6377 }
6378 
6379 /*
6380  * get_lhash
6381  *
6382  * Retrieve a overlong hash table.
6383  * <len> is already read. What follows is each key/value pair, in random order.
6384  * Keys are stored as <length> <data>, the <data> section being omitted
6385  * if length is 0.
6386  * Values are stored as <object>.
6387  *
6388  */
get_lhash(pTHX_ stcxt_t * cxt,UV len,int hash_flags,const char * cname)6389 static SV *get_lhash(pTHX_ stcxt_t *cxt, UV len, int hash_flags, const char *cname)
6390 {
6391     UV size;
6392     UV i;
6393     HV *hv;
6394     SV *sv;
6395     HV *stash;
6396 
6397     TRACEME(("get_lhash (#%d)", (int)cxt->tagnum));
6398 
6399 #ifdef HAS_RESTRICTED_HASHES
6400     PERL_UNUSED_ARG(hash_flags);
6401 #else
6402     if (hash_flags & SHV_RESTRICTED) {
6403         if (cxt->derestrict < 0)
6404             cxt->derestrict = (SvTRUE
6405                 (get_sv("Storable::downgrade_restricted", GV_ADD))
6406                                ? 1 : 0);
6407         if (cxt->derestrict == 0)
6408             RESTRICTED_HASH_CROAK();
6409     }
6410 #endif
6411 
6412     TRACEME(("size = %lu", (unsigned long)len));
6413     hv = newHV();
6414     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6415     SEEN_NN(hv, stash, 0);	/* Will return if table not allocated properly */
6416     if (len == 0)
6417         return (SV *) hv;	/* No data follow if table empty */
6418     TRACEME(("split %lu", (unsigned long)len+1));
6419     hv_ksplit(hv, len+1);	/* pre-extend hash to save multiple splits */
6420 
6421     /*
6422      * Now get each key/value pair in turn...
6423      */
6424 
6425     for (i = 0; i < len; i++) {
6426         /*
6427          * Get value first.
6428          */
6429 
6430         TRACEME(("(#%d) value", (int)i));
6431         sv = retrieve(aTHX_ cxt, 0);
6432         if (!sv)
6433             return (SV *) 0;
6434 
6435         /*
6436          * Get key.
6437          * Since we're reading into kbuf, we must ensure we're not
6438          * recursing between the read and the hv_store() where it's used.
6439          * Hence the key comes after the value.
6440          */
6441 
6442         RLEN(size);		/* Get key size */
6443         KBUFCHK((STRLEN)size);	/* Grow hash key read pool if needed */
6444         if (size)
6445             READ(kbuf, size);
6446         kbuf[size] = '\0';	/* Mark string end, just in case */
6447         TRACEME(("(#%d) key '%s'", (int)i, kbuf));
6448 
6449         /*
6450          * Enter key/value pair into hash table.
6451          */
6452 
6453         if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0)
6454             return (SV *) 0;
6455     }
6456 
6457     TRACEME(("ok (get_lhash at 0x%" UVxf ")", PTR2UV(hv)));
6458     return (SV *) hv;
6459 }
6460 #endif
6461 
6462 /*
6463  * retrieve_hash
6464  *
6465  * Retrieve a whole hash table.
6466  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
6467  * Keys are stored as <length> <data>, the <data> section being omitted
6468  * if length is 0.
6469  * Values are stored as <object>.
6470  *
6471  * When we come here, SX_HASH has been read already.
6472  */
retrieve_hash(pTHX_ stcxt_t * cxt,const char * cname)6473 static SV *retrieve_hash(pTHX_ stcxt_t *cxt, const char *cname)
6474 {
6475     I32 len;
6476     I32 size;
6477     I32 i;
6478     HV *hv;
6479     SV *sv;
6480     HV *stash;
6481 
6482     TRACEME(("retrieve_hash (#%d)", (int)cxt->tagnum));
6483 
6484     /*
6485      * Read length, allocate table.
6486      */
6487 
6488     RLEN(len);
6489     TRACEME(("size = %d", (int)len));
6490     hv = newHV();
6491     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6492     SEEN_NN(hv, stash, 0);	/* Will return if table not allocated properly */
6493     if (len == 0)
6494         return (SV *) hv;	/* No data follow if table empty */
6495     TRACEME(("split %d", (int)len+1));
6496     hv_ksplit(hv, len+1);	/* pre-extend hash to save multiple splits */
6497 
6498     /*
6499      * Now get each key/value pair in turn...
6500      */
6501 
6502     for (i = 0; i < len; i++) {
6503         /*
6504          * Get value first.
6505          */
6506 
6507         TRACEME(("(#%d) value", (int)i));
6508         sv = retrieve(aTHX_ cxt, 0);
6509         if (!sv)
6510             return (SV *) 0;
6511 
6512         /*
6513          * Get key.
6514          * Since we're reading into kbuf, we must ensure we're not
6515          * recursing between the read and the hv_store() where it's used.
6516          * Hence the key comes after the value.
6517          */
6518 
6519         RLEN(size);		/* Get key size */
6520         KBUFCHK((STRLEN)size);	/* Grow hash key read pool if needed */
6521         if (size)
6522             READ(kbuf, size);
6523         kbuf[size] = '\0';	/* Mark string end, just in case */
6524         TRACEME(("(#%d) key '%s'", (int)i, kbuf));
6525 
6526         /*
6527          * Enter key/value pair into hash table.
6528          */
6529 
6530         if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0)
6531             return (SV *) 0;
6532     }
6533 
6534     TRACEME(("ok (retrieve_hash at 0x%" UVxf ")", PTR2UV(hv)));
6535 
6536     return (SV *) hv;
6537 }
6538 
6539 /*
6540  * retrieve_hash
6541  *
6542  * Retrieve a whole hash table.
6543  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
6544  * Keys are stored as <length> <data>, the <data> section being omitted
6545  * if length is 0.
6546  * Values are stored as <object>.
6547  *
6548  * When we come here, SX_HASH has been read already.
6549  */
retrieve_flag_hash(pTHX_ stcxt_t * cxt,const char * cname)6550 static SV *retrieve_flag_hash(pTHX_ stcxt_t *cxt, const char *cname)
6551 {
6552     dVAR;
6553     I32 len;
6554     I32 size;
6555     I32 i;
6556     HV *hv;
6557     SV *sv;
6558     HV *stash;
6559     int hash_flags;
6560 
6561     GETMARK(hash_flags);
6562     TRACEME(("retrieve_flag_hash (#%d)", (int)cxt->tagnum));
6563     /*
6564      * Read length, allocate table.
6565      */
6566 
6567 #ifndef HAS_RESTRICTED_HASHES
6568     if (hash_flags & SHV_RESTRICTED) {
6569         if (cxt->derestrict < 0)
6570             cxt->derestrict = (SvTRUE
6571                 (get_sv("Storable::downgrade_restricted", GV_ADD))
6572                                ? 1 : 0);
6573         if (cxt->derestrict == 0)
6574             RESTRICTED_HASH_CROAK();
6575     }
6576 #endif
6577 
6578     RLEN(len);
6579     TRACEME(("size = %d, flags = %d", (int)len, hash_flags));
6580     hv = newHV();
6581     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6582     SEEN_NN(hv, stash, 0);	/* Will return if table not allocated properly */
6583     if (len == 0)
6584         return (SV *) hv;	/* No data follow if table empty */
6585     TRACEME(("split %d", (int)len+1));
6586     hv_ksplit(hv, len+1);	/* pre-extend hash to save multiple splits */
6587 
6588     /*
6589      * Now get each key/value pair in turn...
6590      */
6591 
6592     for (i = 0; i < len; i++) {
6593         int flags;
6594         int store_flags = 0;
6595         /*
6596          * Get value first.
6597          */
6598 
6599         TRACEME(("(#%d) value", (int)i));
6600         sv = retrieve(aTHX_ cxt, 0);
6601         if (!sv)
6602             return (SV *) 0;
6603 
6604         GETMARK(flags);
6605 #ifdef HAS_RESTRICTED_HASHES
6606         if ((hash_flags & SHV_RESTRICTED) && (flags & SHV_K_LOCKED))
6607             SvREADONLY_on(sv);
6608 #endif
6609 
6610         if (flags & SHV_K_ISSV) {
6611             /* XXX you can't set a placeholder with an SV key.
6612                Then again, you can't get an SV key.
6613                Without messing around beyond what the API is supposed to do.
6614             */
6615             SV *keysv;
6616             TRACEME(("(#%d) keysv, flags=%d", (int)i, flags));
6617             keysv = retrieve(aTHX_ cxt, 0);
6618             if (!keysv)
6619                 return (SV *) 0;
6620 
6621             if (!hv_store_ent(hv, keysv, sv, 0))
6622                 return (SV *) 0;
6623         } else {
6624             /*
6625              * Get key.
6626              * Since we're reading into kbuf, we must ensure we're not
6627              * recursing between the read and the hv_store() where it's used.
6628              * Hence the key comes after the value.
6629              */
6630 
6631             if (flags & SHV_K_PLACEHOLDER) {
6632                 SvREFCNT_dec (sv);
6633                 sv = &PL_sv_placeholder;
6634                 store_flags |= HVhek_PLACEHOLD;
6635             }
6636             if (flags & SHV_K_UTF8) {
6637 #ifdef HAS_UTF8_HASHES
6638                 store_flags |= HVhek_UTF8;
6639 #else
6640                 if (cxt->use_bytes < 0)
6641                     cxt->use_bytes
6642                         = (SvTRUE(get_sv("Storable::drop_utf8", GV_ADD))
6643                            ? 1 : 0);
6644                 if (cxt->use_bytes == 0)
6645                     UTF8_CROAK();
6646 #endif
6647             }
6648 #ifdef HAS_UTF8_HASHES
6649             if (flags & SHV_K_WASUTF8)
6650                 store_flags |= HVhek_WASUTF8;
6651 #endif
6652 
6653             RLEN(size);		/* Get key size */
6654             KBUFCHK((STRLEN)size);/* Grow hash key read pool if needed */
6655             if (size)
6656                 READ(kbuf, size);
6657             kbuf[size] = '\0';	/* Mark string end, just in case */
6658             TRACEME(("(#%d) key '%s' flags %X store_flags %X", (int)i, kbuf,
6659                      flags, store_flags));
6660 
6661             /*
6662              * Enter key/value pair into hash table.
6663              */
6664 
6665 #ifdef HAS_RESTRICTED_HASHES
6666             if (hv_store_flags(hv, kbuf, size, sv, 0, store_flags) == 0)
6667                 return (SV *) 0;
6668 #else
6669             if (!(store_flags & HVhek_PLACEHOLD))
6670                 if (hv_store(hv, kbuf, size, sv, 0) == 0)
6671                     return (SV *) 0;
6672 #endif
6673         }
6674     }
6675 #ifdef HAS_RESTRICTED_HASHES
6676     if (hash_flags & SHV_RESTRICTED)
6677         SvREADONLY_on(hv);
6678 #endif
6679 
6680     TRACEME(("ok (retrieve_hash at 0x%" UVxf ")", PTR2UV(hv)));
6681 
6682     return (SV *) hv;
6683 }
6684 
6685 /*
6686  * retrieve_code
6687  *
6688  * Return a code reference.
6689  */
retrieve_code(pTHX_ stcxt_t * cxt,const char * cname)6690 static SV *retrieve_code(pTHX_ stcxt_t *cxt, const char *cname)
6691 {
6692 #if PERL_VERSION < 6
6693     CROAK(("retrieve_code does not work with perl 5.005 or less\n"));
6694 #else
6695     dSP;
6696     I32 type, count;
6697     IV tagnum;
6698     SV *cv;
6699     SV *sv, *text, *sub, *errsv;
6700     HV *stash;
6701 
6702     TRACEME(("retrieve_code (#%d)", (int)cxt->tagnum));
6703 
6704     /*
6705      *  Insert dummy SV in the aseen array so that we don't screw
6706      *  up the tag numbers.  We would just make the internal
6707      *  scalar an untagged item in the stream, but
6708      *  retrieve_scalar() calls SEEN().  So we just increase the
6709      *  tag number.
6710      */
6711     tagnum = cxt->tagnum;
6712     sv = newSViv(0);
6713     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6714     SEEN_NN(sv, stash, 0);
6715 
6716     /*
6717      * Retrieve the source of the code reference
6718      * as a small or large scalar
6719      */
6720 
6721     GETMARK(type);
6722     switch (type) {
6723     case SX_SCALAR:
6724         text = retrieve_scalar(aTHX_ cxt, cname);
6725         break;
6726     case SX_LSCALAR:
6727         text = retrieve_lscalar(aTHX_ cxt, cname);
6728         break;
6729     case SX_UTF8STR:
6730         text = retrieve_utf8str(aTHX_ cxt, cname);
6731         break;
6732     case SX_LUTF8STR:
6733         text = retrieve_lutf8str(aTHX_ cxt, cname);
6734         break;
6735     default:
6736         CROAK(("Unexpected type %d in retrieve_code\n", (int)type));
6737     }
6738 
6739     if (!text) {
6740         CROAK(("Unable to retrieve code\n"));
6741     }
6742 
6743     /*
6744      * prepend "sub " to the source
6745      */
6746 
6747     sub = newSVpvs("sub ");
6748     if (SvUTF8(text))
6749         SvUTF8_on(sub);
6750     sv_catpv(sub, SvPV_nolen(text)); /* XXX no sv_catsv! */
6751     SvREFCNT_dec(text);
6752 
6753     /*
6754      * evaluate the source to a code reference and use the CV value
6755      */
6756 
6757     if (cxt->eval == NULL) {
6758         cxt->eval = get_sv("Storable::Eval", GV_ADD);
6759         SvREFCNT_inc(cxt->eval);
6760     }
6761     if (!SvTRUE(cxt->eval)) {
6762         if (cxt->forgive_me == 0 ||
6763             (cxt->forgive_me < 0 &&
6764              !(cxt->forgive_me = SvTRUE
6765                (get_sv("Storable::forgive_me", GV_ADD)) ? 1 : 0))
6766             ) {
6767             CROAK(("Can't eval, please set $Storable::Eval to a true value"));
6768         } else {
6769             sv = newSVsv(sub);
6770             /* fix up the dummy entry... */
6771             av_store(cxt->aseen, tagnum, SvREFCNT_inc(sv));
6772             return sv;
6773         }
6774     }
6775 
6776     ENTER;
6777     SAVETMPS;
6778 
6779     errsv = get_sv("@", GV_ADD);
6780     SvPVCLEAR(errsv);	/* clear $@ */
6781     if (SvROK(cxt->eval) && SvTYPE(SvRV(cxt->eval)) == SVt_PVCV) {
6782         PUSHMARK(sp);
6783         XPUSHs(sv_2mortal(newSVsv(sub)));
6784         PUTBACK;
6785         count = call_sv(cxt->eval, G_SCALAR);
6786         if (count != 1)
6787             CROAK(("Unexpected return value from $Storable::Eval callback\n"));
6788     } else {
6789         eval_sv(sub, G_SCALAR);
6790     }
6791     SPAGAIN;
6792     cv = POPs;
6793     PUTBACK;
6794 
6795     if (SvTRUE(errsv)) {
6796         CROAK(("code %s caused an error: %s",
6797                SvPV_nolen(sub), SvPV_nolen(errsv)));
6798     }
6799 
6800     if (cv && SvROK(cv) && SvTYPE(SvRV(cv)) == SVt_PVCV) {
6801         sv = SvRV(cv);
6802     } else {
6803         CROAK(("code %s did not evaluate to a subroutine reference\n",
6804                SvPV_nolen(sub)));
6805     }
6806 
6807     SvREFCNT_inc(sv); /* XXX seems to be necessary */
6808     SvREFCNT_dec(sub);
6809 
6810     FREETMPS;
6811     LEAVE;
6812     /* fix up the dummy entry... */
6813     av_store(cxt->aseen, tagnum, SvREFCNT_inc(sv));
6814 
6815     return sv;
6816 #endif
6817 }
6818 
retrieve_regexp(pTHX_ stcxt_t * cxt,const char * cname)6819 static SV *retrieve_regexp(pTHX_ stcxt_t *cxt, const char *cname) {
6820 #if PERL_VERSION >= 8
6821     int op_flags;
6822     U32 re_len;
6823     STRLEN flags_len;
6824     SV *re;
6825     SV *flags;
6826     SV *re_ref;
6827     SV *sv;
6828     dSP;
6829     I32 count;
6830     HV *stash;
6831 
6832     ENTER;
6833     SAVETMPS;
6834 
6835     GETMARK(op_flags);
6836     if (op_flags & SHR_U32_RE_LEN) {
6837         RLEN(re_len);
6838     }
6839     else
6840         GETMARK(re_len);
6841 
6842     re = sv_2mortal(NEWSV(10002, re_len ? re_len : 1));
6843     READ(SvPVX(re), re_len);
6844     SvCUR_set(re, re_len);
6845     *SvEND(re) = '\0';
6846     SvPOK_only(re);
6847 
6848     GETMARK(flags_len);
6849     flags = sv_2mortal(NEWSV(10002, flags_len ? flags_len : 1));
6850     READ(SvPVX(flags), flags_len);
6851     SvCUR_set(flags, flags_len);
6852     *SvEND(flags) = '\0';
6853     SvPOK_only(flags);
6854 
6855     PUSHMARK(SP);
6856 
6857     XPUSHs(re);
6858     XPUSHs(flags);
6859 
6860     PUTBACK;
6861 
6862     count = call_pv("Storable::_make_re", G_SCALAR);
6863 
6864     SPAGAIN;
6865 
6866     if (count != 1)
6867         CROAK(("Bad count %d calling _make_re", (int)count));
6868 
6869     re_ref = POPs;
6870 
6871     PUTBACK;
6872 
6873     if (!SvROK(re_ref))
6874       CROAK(("_make_re didn't return a reference"));
6875 
6876     sv = SvRV(re_ref);
6877     SvREFCNT_inc(sv);
6878     stash = cname ? gv_stashpv(cname, GV_ADD) : 0;
6879     SEEN_NN(sv, stash, 0);
6880 
6881     FREETMPS;
6882     LEAVE;
6883 
6884     return sv;
6885 #else
6886     CROAK(("retrieve_regexp does not work with 5.6 or earlier"));
6887 #endif
6888 }
6889 
6890 /*
6891  * old_retrieve_array
6892  *
6893  * Retrieve a whole array in pre-0.6 binary format.
6894  *
6895  * Layout is SX_ARRAY <size> followed by each item, in increasing index order.
6896  * Each item is stored as SX_ITEM <object> or SX_IT_UNDEF for "holes".
6897  *
6898  * When we come here, SX_ARRAY has been read already.
6899  */
old_retrieve_array(pTHX_ stcxt_t * cxt,const char * cname)6900 static SV *old_retrieve_array(pTHX_ stcxt_t *cxt, const char *cname)
6901 {
6902     I32 len;
6903     I32 i;
6904     AV *av;
6905     SV *sv;
6906     int c;
6907 
6908     PERL_UNUSED_ARG(cname);
6909     TRACEME(("old_retrieve_array (#%d)", (int)cxt->tagnum));
6910 
6911     /*
6912      * Read length, and allocate array, then pre-extend it.
6913      */
6914 
6915     RLEN(len);
6916     TRACEME(("size = %d", (int)len));
6917     av = newAV();
6918     SEEN0_NN(av, 0);	/* Will return if array not allocated nicely */
6919     if (len)
6920         av_extend(av, len);
6921     else
6922         return (SV *) av;	/* No data follow if array is empty */
6923 
6924     /*
6925      * Now get each item in turn...
6926      */
6927 
6928     for (i = 0; i < len; i++) {
6929         GETMARK(c);
6930         if (c == SX_IT_UNDEF) {
6931             TRACEME(("(#%d) undef item", (int)i));
6932             continue;		/* av_extend() already filled us with undef */
6933         }
6934         if (c != SX_ITEM)
6935             (void) retrieve_other(aTHX_ cxt, 0);/* Will croak out */
6936         TRACEME(("(#%d) item", (int)i));
6937         sv = retrieve(aTHX_ cxt, 0);		/* Retrieve item */
6938         if (!sv)
6939             return (SV *) 0;
6940         if (av_store(av, i, sv) == 0)
6941             return (SV *) 0;
6942     }
6943 
6944     TRACEME(("ok (old_retrieve_array at 0x%" UVxf ")", PTR2UV(av)));
6945 
6946     return (SV *) av;
6947 }
6948 
6949 /*
6950  * old_retrieve_hash
6951  *
6952  * Retrieve a whole hash table in pre-0.6 binary format.
6953  *
6954  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
6955  * Keys are stored as SX_KEY <length> <data>, the <data> section being omitted
6956  * if length is 0.
6957  * Values are stored as SX_VALUE <object> or SX_VL_UNDEF for "holes".
6958  *
6959  * When we come here, SX_HASH has been read already.
6960  */
old_retrieve_hash(pTHX_ stcxt_t * cxt,const char * cname)6961 static SV *old_retrieve_hash(pTHX_ stcxt_t *cxt, const char *cname)
6962 {
6963     I32 len;
6964     I32 size;
6965     I32 i;
6966     HV *hv;
6967     SV *sv = (SV *) 0;
6968     int c;
6969     SV *sv_h_undef = (SV *) 0;	/* hv_store() bug */
6970 
6971     PERL_UNUSED_ARG(cname);
6972     TRACEME(("old_retrieve_hash (#%d)", (int)cxt->tagnum));
6973 
6974     /*
6975      * Read length, allocate table.
6976      */
6977 
6978     RLEN(len);
6979     TRACEME(("size = %d", (int)len));
6980     hv = newHV();
6981     SEEN0_NN(hv, 0);		/* Will return if table not allocated properly */
6982     if (len == 0)
6983         return (SV *) hv;	/* No data follow if table empty */
6984     TRACEME(("split %d", (int)len+1));
6985     hv_ksplit(hv, len+1);	/* pre-extend hash to save multiple splits */
6986 
6987     /*
6988      * Now get each key/value pair in turn...
6989      */
6990 
6991     for (i = 0; i < len; i++) {
6992         /*
6993          * Get value first.
6994          */
6995 
6996         GETMARK(c);
6997         if (c == SX_VL_UNDEF) {
6998             TRACEME(("(#%d) undef value", (int)i));
6999             /*
7000              * Due to a bug in hv_store(), it's not possible to pass
7001              * &PL_sv_undef to hv_store() as a value, otherwise the
7002              * associated key will not be creatable any more. -- RAM, 14/01/97
7003              */
7004             if (!sv_h_undef)
7005                 sv_h_undef = newSVsv(&PL_sv_undef);
7006             sv = SvREFCNT_inc(sv_h_undef);
7007         } else if (c == SX_VALUE) {
7008             TRACEME(("(#%d) value", (int)i));
7009             sv = retrieve(aTHX_ cxt, 0);
7010             if (!sv)
7011                 return (SV *) 0;
7012         } else
7013             (void) retrieve_other(aTHX_ cxt, 0); /* Will croak out */
7014 
7015         /*
7016          * Get key.
7017          * Since we're reading into kbuf, we must ensure we're not
7018          * recursing between the read and the hv_store() where it's used.
7019          * Hence the key comes after the value.
7020          */
7021 
7022         GETMARK(c);
7023         if (c != SX_KEY)
7024             (void) retrieve_other(aTHX_ cxt, 0); /* Will croak out */
7025         RLEN(size);				/* Get key size */
7026         KBUFCHK((STRLEN)size);			/* Grow hash key read pool if needed */
7027         if (size)
7028             READ(kbuf, size);
7029         kbuf[size] = '\0';			/* Mark string end, just in case */
7030         TRACEME(("(#%d) key '%s'", (int)i, kbuf));
7031 
7032         /*
7033          * Enter key/value pair into hash table.
7034          */
7035 
7036         if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0)
7037             return (SV *) 0;
7038     }
7039 
7040     TRACEME(("ok (retrieve_hash at 0x%" UVxf ")", PTR2UV(hv)));
7041 
7042     return (SV *) hv;
7043 }
7044 
7045 /***
7046  *** Retrieval engine.
7047  ***/
7048 
7049 /*
7050  * magic_check
7051  *
7052  * Make sure the stored data we're trying to retrieve has been produced
7053  * on an ILP compatible system with the same byteorder. It croaks out in
7054  * case an error is detected. [ILP = integer-long-pointer sizes]
7055  * Returns null if error is detected, &PL_sv_undef otherwise.
7056  *
7057  * Note that there's no byte ordering info emitted when network order was
7058  * used at store time.
7059  */
magic_check(pTHX_ stcxt_t * cxt)7060 static SV *magic_check(pTHX_ stcxt_t *cxt)
7061 {
7062     /* The worst case for a malicious header would be old magic (which is
7063        longer), major, minor, byteorder length byte of 255, 255 bytes of
7064        garbage, sizeof int, long, pointer, NV.
7065        So the worse of that we can read is 255 bytes of garbage plus 4.
7066        Err, I am assuming 8 bit bytes here. Please file a bug report if you're
7067        compiling perl on a system with chars that are larger than 8 bits.
7068        (Even Crays aren't *that* perverse).
7069     */
7070     unsigned char buf[4 + 255];
7071     unsigned char *current;
7072     int c;
7073     int length;
7074     int use_network_order;
7075     int use_NV_size;
7076     int old_magic = 0;
7077     int version_major;
7078     int version_minor = 0;
7079 
7080     TRACEME(("magic_check"));
7081 
7082     /*
7083      * The "magic number" is only for files, not when freezing in memory.
7084      */
7085 
7086     if (cxt->fio) {
7087         /* This includes the '\0' at the end.  I want to read the extra byte,
7088            which is usually going to be the major version number.  */
7089         STRLEN len = sizeof(magicstr);
7090         STRLEN old_len;
7091 
7092         READ(buf, (SSize_t)(len));	/* Not null-terminated */
7093 
7094         /* Point at the byte after the byte we read.  */
7095         current = buf + --len;	/* Do the -- outside of macros.  */
7096 
7097         if (memNE(buf, magicstr, len)) {
7098             /*
7099              * Try to read more bytes to check for the old magic number, which
7100              * was longer.
7101              */
7102 
7103             TRACEME(("trying for old magic number"));
7104 
7105             old_len = sizeof(old_magicstr) - 1;
7106             READ(current + 1, (SSize_t)(old_len - len));
7107 
7108             if (memNE(buf, old_magicstr, old_len))
7109                 CROAK(("File is not a perl storable"));
7110             old_magic++;
7111             current = buf + old_len;
7112         }
7113         use_network_order = *current;
7114     } else {
7115         GETMARK(use_network_order);
7116     }
7117 
7118     /*
7119      * Starting with 0.6, the "use_network_order" byte flag is also used to
7120      * indicate the version number of the binary, and therefore governs the
7121      * setting of sv_retrieve_vtbl. See magic_write().
7122      */
7123     if (old_magic && use_network_order > 1) {
7124         /*  0.1 dump - use_network_order is really byte order length */
7125         version_major = -1;
7126     }
7127     else {
7128         version_major = use_network_order >> 1;
7129     }
7130     cxt->retrieve_vtbl = (SV*(**)(pTHX_ stcxt_t *cxt, const char *cname)) (version_major > 0 ? sv_retrieve : sv_old_retrieve);
7131 
7132     TRACEME(("magic_check: netorder = 0x%x", use_network_order));
7133 
7134 
7135     /*
7136      * Starting with 0.7 (binary major 2), a full byte is dedicated to the
7137      * minor version of the protocol.  See magic_write().
7138      */
7139 
7140     if (version_major > 1)
7141         GETMARK(version_minor);
7142 
7143     cxt->ver_major = version_major;
7144     cxt->ver_minor = version_minor;
7145 
7146     TRACEME(("binary image version is %d.%d", version_major, version_minor));
7147 
7148     /*
7149      * Inter-operability sanity check: we can't retrieve something stored
7150      * using a format more recent than ours, because we have no way to
7151      * know what has changed, and letting retrieval go would mean a probable
7152      * failure reporting a "corrupted" storable file.
7153      */
7154 
7155     if (
7156         version_major > STORABLE_BIN_MAJOR ||
7157         (version_major == STORABLE_BIN_MAJOR &&
7158          version_minor > STORABLE_BIN_MINOR)
7159         ) {
7160         int croak_now = 1;
7161         TRACEME(("but I am version is %d.%d", STORABLE_BIN_MAJOR,
7162                  STORABLE_BIN_MINOR));
7163 
7164         if (version_major == STORABLE_BIN_MAJOR) {
7165             TRACEME(("cxt->accept_future_minor is %d",
7166                      cxt->accept_future_minor));
7167             if (cxt->accept_future_minor < 0)
7168                 cxt->accept_future_minor
7169                     = (SvTRUE(get_sv("Storable::accept_future_minor",
7170                                           GV_ADD))
7171                        ? 1 : 0);
7172             if (cxt->accept_future_minor == 1)
7173                 croak_now = 0;  /* Don't croak yet.  */
7174         }
7175         if (croak_now) {
7176             CROAK(("Storable binary image v%d.%d more recent than I am (v%d.%d)",
7177                    version_major, version_minor,
7178                    STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR));
7179         }
7180     }
7181 
7182     /*
7183      * If they stored using network order, there's no byte ordering
7184      * information to check.
7185      */
7186 
7187     if ((cxt->netorder = (use_network_order & 0x1)))	/* Extra () for -Wall */
7188         return &PL_sv_undef;			/* No byte ordering info */
7189 
7190     /* In C truth is 1, falsehood is 0. Very convenient.  */
7191     use_NV_size = version_major >= 2 && version_minor >= 2;
7192 
7193     if (version_major >= 0) {
7194         GETMARK(c);
7195     }
7196     else {
7197         c = use_network_order;
7198     }
7199     length = c + 3 + use_NV_size;
7200     READ(buf, length);	/* Not null-terminated */
7201 
7202     TRACEME(("byte order '%.*s' %d", c, buf, c));
7203 
7204 #ifdef USE_56_INTERWORK_KLUDGE
7205     /* No point in caching this in the context as we only need it once per
7206        retrieve, and we need to recheck it each read.  */
7207     if (SvTRUE(get_sv("Storable::interwork_56_64bit", GV_ADD))) {
7208         if ((c != (sizeof (byteorderstr_56) - 1))
7209             || memNE(buf, byteorderstr_56, c))
7210             CROAK(("Byte order is not compatible"));
7211     } else
7212 #endif
7213     {
7214         if ((c != (sizeof (byteorderstr) - 1))
7215           || memNE(buf, byteorderstr, c))
7216             CROAK(("Byte order is not compatible"));
7217     }
7218 
7219     current = buf + c;
7220 
7221     /* sizeof(int) */
7222     if ((int) *current++ != sizeof(int))
7223         CROAK(("Integer size is not compatible"));
7224 
7225     /* sizeof(long) */
7226     if ((int) *current++ != sizeof(long))
7227         CROAK(("Long integer size is not compatible"));
7228 
7229     /* sizeof(char *) */
7230     if ((int) *current != sizeof(char *))
7231         CROAK(("Pointer size is not compatible"));
7232 
7233     if (use_NV_size) {
7234         /* sizeof(NV) */
7235         if ((int) *++current != sizeof(NV))
7236             CROAK(("Double size is not compatible"));
7237     }
7238 
7239     return &PL_sv_undef;	/* OK */
7240 }
7241 
7242 /*
7243  * retrieve
7244  *
7245  * Recursively retrieve objects from the specified file and return their
7246  * root SV (which may be an AV or an HV for what we care).
7247  * Returns null if there is a problem.
7248  */
retrieve(pTHX_ stcxt_t * cxt,const char * cname)7249 static SV *retrieve(pTHX_ stcxt_t *cxt, const char *cname)
7250 {
7251     int type;
7252     SV **svh;
7253     SV *sv;
7254 
7255     TRACEME(("retrieve"));
7256 
7257     /*
7258      * Grab address tag which identifies the object if we are retrieving
7259      * an older format. Since the new binary format counts objects and no
7260      * longer explicitly tags them, we must keep track of the correspondence
7261      * ourselves.
7262      *
7263      * The following section will disappear one day when the old format is
7264      * no longer supported, hence the final "goto" in the "if" block.
7265      */
7266 
7267     if (cxt->hseen) {			/* Retrieving old binary */
7268         stag_t tag;
7269         if (cxt->netorder) {
7270             I32 nettag;
7271             READ(&nettag, sizeof(I32));	/* Ordered sequence of I32 */
7272             tag = (stag_t) nettag;
7273         } else
7274             READ(&tag, sizeof(stag_t));	/* Original address of the SV */
7275 
7276         GETMARK(type);
7277         if (type == SX_OBJECT) {
7278             I32 tagn;
7279             svh = hv_fetch(cxt->hseen, (char *) &tag, sizeof(tag), FALSE);
7280             if (!svh)
7281                 CROAK(("Old tag 0x%" UVxf " should have been mapped already",
7282                        (UV) tag));
7283             tagn = SvIV(*svh);	/* Mapped tag number computed earlier below */
7284 
7285             /*
7286              * The following code is common with the SX_OBJECT case below.
7287              */
7288 
7289             svh = av_fetch(cxt->aseen, tagn, FALSE);
7290             if (!svh)
7291                 CROAK(("Object #%" IVdf " should have been retrieved already",
7292                        (IV) tagn));
7293             sv = *svh;
7294             TRACEME(("has retrieved #%d at 0x%" UVxf, (int)tagn, PTR2UV(sv)));
7295             SvREFCNT_inc(sv);	/* One more reference to this same sv */
7296             return sv;		/* The SV pointer where object was retrieved */
7297         }
7298 
7299         /*
7300          * Map new object, but don't increase tagnum. This will be done
7301          * by each of the retrieve_* functions when they call SEEN().
7302          *
7303          * The mapping associates the "tag" initially present with a unique
7304          * tag number. See test for SX_OBJECT above to see how this is perused.
7305          */
7306 
7307         if (!hv_store(cxt->hseen, (char *) &tag, sizeof(tag),
7308                       newSViv(cxt->tagnum), 0))
7309             return (SV *) 0;
7310 
7311         goto first_time;
7312     }
7313 
7314     /*
7315      * Regular post-0.6 binary format.
7316      */
7317 
7318     GETMARK(type);
7319 
7320     TRACEME(("retrieve type = %d", type));
7321 
7322     /*
7323      * Are we dealing with an object we should have already retrieved?
7324      */
7325 
7326     if (type == SX_OBJECT) {
7327         I32 tag;
7328         READ_I32(tag);
7329         tag = ntohl(tag);
7330 #ifndef HAS_U64
7331         /* A 32-bit system can't have over 2**31 objects anyway */
7332         if (tag < 0)
7333             CROAK(("Object #%" IVdf " out of range", (IV)tag));
7334 #endif
7335         /* Older versions of Storable on with 64-bit support on 64-bit
7336            systems can produce values above the 2G boundary (or wrapped above
7337            the 4G boundary, which we can't do much about), treat those as
7338            unsigned.
7339            This same commit stores tag ids over the 2G boundary as long tags
7340            since older Storables will mis-handle them as short tags.
7341          */
7342         svh = av_fetch(cxt->aseen, (U32)tag, FALSE);
7343         if (!svh)
7344             CROAK(("Object #%" IVdf " should have been retrieved already",
7345                    (IV) tag));
7346         sv = *svh;
7347         TRACEME(("had retrieved #%d at 0x%" UVxf, (int)tag, PTR2UV(sv)));
7348         SvREFCNT_inc(sv);	/* One more reference to this same sv */
7349         return sv;		/* The SV pointer where object was retrieved */
7350     } else if (type >= SX_LAST && cxt->ver_minor > STORABLE_BIN_MINOR) {
7351         if (cxt->accept_future_minor < 0)
7352             cxt->accept_future_minor
7353                 = (SvTRUE(get_sv("Storable::accept_future_minor",
7354                                       GV_ADD))
7355                    ? 1 : 0);
7356         if (cxt->accept_future_minor == 1) {
7357             CROAK(("Storable binary image v%d.%d contains data of type %d. "
7358                    "This Storable is v%d.%d and can only handle data types up to %d",
7359                    cxt->ver_major, cxt->ver_minor, type,
7360                    STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR, SX_LAST - 1));
7361         }
7362     }
7363 
7364  first_time:	/* Will disappear when support for old format is dropped */
7365 
7366     /*
7367      * Okay, first time through for this one.
7368      */
7369 
7370     sv = RETRIEVE(cxt, type)(aTHX_ cxt, cname);
7371     if (!sv)
7372         return (SV *) 0;		/* Failed */
7373 
7374     /*
7375      * Old binary formats (pre-0.7).
7376      *
7377      * Final notifications, ended by SX_STORED may now follow.
7378      * Currently, the only pertinent notification to apply on the
7379      * freshly retrieved object is either:
7380      *    SX_CLASS <char-len> <classname> for short classnames.
7381      *    SX_LG_CLASS <int-len> <classname> for larger one (rare!).
7382      * Class name is then read into the key buffer pool used by
7383      * hash table key retrieval.
7384      */
7385 
7386     if (cxt->ver_major < 2) {
7387         while ((type = GETCHAR()) != SX_STORED) {
7388             I32 len;
7389             HV* stash;
7390             switch (type) {
7391             case SX_CLASS:
7392                 GETMARK(len);		/* Length coded on a single char */
7393                 break;
7394             case SX_LG_CLASS:		/* Length coded on a regular integer */
7395                 RLEN(len);
7396                 break;
7397             case EOF:
7398             default:
7399                 return (SV *) 0;	/* Failed */
7400             }
7401             KBUFCHK((STRLEN)len);	/* Grow buffer as necessary */
7402             if (len)
7403                 READ(kbuf, len);
7404             kbuf[len] = '\0';		/* Mark string end */
7405             stash = gv_stashpvn(kbuf, len, GV_ADD);
7406             BLESS(sv, stash);
7407         }
7408     }
7409 
7410     TRACEME(("ok (retrieved 0x%" UVxf ", refcnt=%d, %s)", PTR2UV(sv),
7411              (int)SvREFCNT(sv) - 1, sv_reftype(sv, FALSE)));
7412 
7413     return sv;	/* Ok */
7414 }
7415 
7416 /*
7417  * do_retrieve
7418  *
7419  * Retrieve data held in file and return the root object.
7420  * Common routine for pretrieve and mretrieve.
7421  */
do_retrieve(pTHX_ PerlIO * f,SV * in,int optype,int flags)7422 static SV *do_retrieve(
7423                        pTHX_
7424                        PerlIO *f,
7425                        SV *in,
7426                        int optype,
7427                        int flags)
7428 {
7429     dSTCXT;
7430     SV *sv;
7431     int is_tainted;		/* Is input source tainted? */
7432     int pre_06_fmt = 0;		/* True with pre Storable 0.6 formats */
7433 
7434     TRACEMED(("do_retrieve (optype = 0x%x, flags=0x%x)",
7435 	     (unsigned)optype, (unsigned)flags));
7436 
7437     optype |= ST_RETRIEVE;
7438     cxt->flags = flags;
7439 
7440     /*
7441      * Sanity assertions for retrieve dispatch tables.
7442      */
7443 
7444     ASSERT(sizeof(sv_old_retrieve) == sizeof(sv_retrieve),
7445            ("old and new retrieve dispatch table have same size"));
7446     ASSERT(sv_old_retrieve[(int)SX_LAST] == retrieve_other,
7447            ("SX_LAST entry correctly initialized in old dispatch table"));
7448     ASSERT(sv_retrieve[(int)SX_LAST] == retrieve_other,
7449            ("SX_LAST entry correctly initialized in new dispatch table"));
7450 
7451     /*
7452      * Workaround for CROAK leak: if they enter with a "dirty" context,
7453      * free up memory for them now.
7454      */
7455 
7456     assert(cxt);
7457     if (cxt->s_dirty)
7458         clean_context(aTHX_ cxt);
7459 
7460     /*
7461      * Now that STORABLE_xxx hooks exist, it is possible that they try to
7462      * re-enter retrieve() via the hooks.
7463      */
7464 
7465     if (cxt->entry) {
7466         cxt = allocate_context(aTHX_ cxt);
7467         cxt->flags = flags;
7468     }
7469     INIT_TRACEME;
7470 
7471     cxt->entry++;
7472 
7473     ASSERT(cxt->entry == 1, ("starting new recursion"));
7474     ASSERT(!cxt->s_dirty, ("clean context"));
7475 
7476     /*
7477      * Prepare context.
7478      *
7479      * Data is loaded into the memory buffer when f is NULL, unless 'in' is
7480      * also NULL, in which case we're expecting the data to already lie
7481      * in the buffer (dclone case).
7482      */
7483 
7484     KBUFINIT();			 /* Allocate hash key reading pool once */
7485 
7486     if (!f && in) {
7487 #ifdef SvUTF8_on
7488         if (SvUTF8(in)) {
7489             STRLEN length;
7490             const char *orig = SvPV(in, length);
7491             char *asbytes;
7492             /* This is quite deliberate. I want the UTF8 routines
7493                to encounter the '\0' which perl adds at the end
7494                of all scalars, so that any new string also has
7495                this.
7496             */
7497             STRLEN klen_tmp = length + 1;
7498             bool is_utf8 = TRUE;
7499 
7500             /* Just casting the &klen to (STRLEN) won't work
7501                well if STRLEN and I32 are of different widths.
7502                --jhi */
7503             asbytes = (char*)bytes_from_utf8((U8*)orig,
7504                                              &klen_tmp,
7505                                              &is_utf8);
7506             if (is_utf8) {
7507                 CROAK(("Frozen string corrupt - contains characters outside 0-255"));
7508             }
7509             if (asbytes != orig) {
7510                 /* String has been converted.
7511                    There is no need to keep any reference to
7512                    the old string.  */
7513                 in = sv_newmortal();
7514                 /* We donate the SV the malloc()ed string
7515                    bytes_from_utf8 returned us.  */
7516                 SvUPGRADE(in, SVt_PV);
7517                 SvPOK_on(in);
7518                 SvPV_set(in, asbytes);
7519                 SvLEN_set(in, klen_tmp);
7520                 SvCUR_set(in, klen_tmp - 1);
7521             }
7522         }
7523 #endif
7524         MBUF_SAVE_AND_LOAD(in);
7525     }
7526 
7527     /*
7528      * Magic number verifications.
7529      *
7530      * This needs to be done before calling init_retrieve_context()
7531      * since the format indication in the file are necessary to conduct
7532      * some of the initializations.
7533      */
7534 
7535     cxt->fio = f;			/* Where I/O are performed */
7536 
7537     if (!magic_check(aTHX_ cxt))
7538         CROAK(("Magic number checking on storable %s failed",
7539                cxt->fio ? "file" : "string"));
7540 
7541     TRACEME(("data stored in %s format",
7542              cxt->netorder ? "net order" : "native"));
7543 
7544     /*
7545      * Check whether input source is tainted, so that we don't wrongly
7546      * taint perfectly good values...
7547      *
7548      * We assume file input is always tainted.  If both 'f' and 'in' are
7549      * NULL, then we come from dclone, and tainted is already filled in
7550      * the context.  That's a kludge, but the whole dclone() thing is
7551      * already quite a kludge anyway! -- RAM, 15/09/2000.
7552      */
7553 
7554     is_tainted = f ? 1 : (in ? SvTAINTED(in) : cxt->s_tainted);
7555     TRACEME(("input source is %s", is_tainted ? "tainted" : "trusted"));
7556     init_retrieve_context(aTHX_ cxt, optype, is_tainted);
7557 
7558     ASSERT(is_retrieving(aTHX), ("within retrieve operation"));
7559 
7560     sv = retrieve(aTHX_ cxt, 0); /* Recursively retrieve object, get root SV */
7561 
7562     /*
7563      * Final cleanup.
7564      */
7565 
7566     if (!f && in)
7567         MBUF_RESTORE();
7568 
7569     pre_06_fmt = cxt->hseen != NULL;	/* Before we clean context */
7570 
7571     /*
7572      * The "root" context is never freed.
7573      */
7574 
7575     clean_retrieve_context(aTHX_ cxt);
7576     if (cxt->prev)			/* This context was stacked */
7577         free_context(aTHX_ cxt);	/* It was not the "root" context */
7578 
7579     /*
7580      * Prepare returned value.
7581      */
7582 
7583     if (!sv) {
7584         TRACEMED(("retrieve ERROR"));
7585 #if (PATCHLEVEL <= 4)
7586         /* perl 5.00405 seems to screw up at this point with an
7587            'attempt to modify a read only value' error reported in the
7588            eval { $self = pretrieve(*FILE) } in _retrieve.
7589            I can't see what the cause of this error is, but I suspect a
7590            bug in 5.004, as it seems to be capable of issuing spurious
7591            errors or core dumping with matches on $@. I'm not going to
7592            spend time on what could be a fruitless search for the cause,
7593            so here's a bodge. If you're running 5.004 and don't like
7594            this inefficiency, either upgrade to a newer perl, or you are
7595            welcome to find the problem and send in a patch.
7596         */
7597         return newSV(0);
7598 #else
7599         return &PL_sv_undef;		/* Something went wrong, return undef */
7600 #endif
7601     }
7602 
7603     TRACEMED(("retrieve got %s(0x%" UVxf ")",
7604              sv_reftype(sv, FALSE), PTR2UV(sv)));
7605 
7606     /*
7607      * Backward compatibility with Storable-0.5@9 (which we know we
7608      * are retrieving if hseen is non-null): don't create an extra RV
7609      * for objects since we special-cased it at store time.
7610      *
7611      * Build a reference to the SV returned by pretrieve even if it is
7612      * already one and not a scalar, for consistency reasons.
7613      */
7614 
7615     if (pre_06_fmt) {			/* Was not handling overloading by then */
7616         SV *rv;
7617         TRACEMED(("fixing for old formats -- pre 0.6"));
7618         if (sv_type(aTHX_ sv) == svis_REF && (rv = SvRV(sv)) && SvOBJECT(rv)) {
7619             TRACEME(("ended do_retrieve() with an object -- pre 0.6"));
7620             return sv;
7621         }
7622     }
7623 
7624     /*
7625      * If reference is overloaded, restore behaviour.
7626      *
7627      * NB: minor glitch here: normally, overloaded refs are stored specially
7628      * so that we can croak when behaviour cannot be re-installed, and also
7629      * avoid testing for overloading magic at each reference retrieval.
7630      *
7631      * Unfortunately, the root reference is implicitly stored, so we must
7632      * check for possible overloading now.  Furthermore, if we don't restore
7633      * overloading, we cannot croak as if the original ref was, because we
7634      * have no way to determine whether it was an overloaded ref or not in
7635      * the first place.
7636      *
7637      * It's a pity that overloading magic is attached to the rv, and not to
7638      * the underlying sv as blessing is.
7639      */
7640 
7641     if (SvOBJECT(sv)) {
7642         HV *stash = (HV *) SvSTASH(sv);
7643         SV *rv = newRV_noinc(sv);
7644         if (stash && Gv_AMG(stash)) {
7645             SvAMAGIC_on(rv);
7646             TRACEMED(("restored overloading on root reference"));
7647         }
7648         TRACEMED(("ended do_retrieve() with an object"));
7649         return rv;
7650     }
7651 
7652     TRACEMED(("regular do_retrieve() end"));
7653 
7654     return newRV_noinc(sv);
7655 }
7656 
7657 /*
7658  * pretrieve
7659  *
7660  * Retrieve data held in file and return the root object, undef on error.
7661  */
pretrieve(pTHX_ PerlIO * f,IV flag)7662 static SV *pretrieve(pTHX_ PerlIO *f, IV flag)
7663 {
7664     TRACEMED(("pretrieve"));
7665     return do_retrieve(aTHX_ f, Nullsv, 0, (int)flag);
7666 }
7667 
7668 /*
7669  * mretrieve
7670  *
7671  * Retrieve data held in scalar and return the root object, undef on error.
7672  */
mretrieve(pTHX_ SV * sv,IV flag)7673 static SV *mretrieve(pTHX_ SV *sv, IV flag)
7674 {
7675     TRACEMED(("mretrieve"));
7676     return do_retrieve(aTHX_ (PerlIO*) 0, sv, 0, (int)flag);
7677 }
7678 
7679 /***
7680  *** Deep cloning
7681  ***/
7682 
7683 /*
7684  * dclone
7685  *
7686  * Deep clone: returns a fresh copy of the original referenced SV tree.
7687  *
7688  * This is achieved by storing the object in memory and restoring from
7689  * there. Not that efficient, but it should be faster than doing it from
7690  * pure perl anyway.
7691  */
dclone(pTHX_ SV * sv)7692 static SV *dclone(pTHX_ SV *sv)
7693 {
7694     dSTCXT;
7695     STRLEN size;
7696     stcxt_t *real_context;
7697     SV *out;
7698 
7699     TRACEMED(("dclone"));
7700 
7701     /*
7702      * Workaround for CROAK leak: if they enter with a "dirty" context,
7703      * free up memory for them now.
7704      */
7705 
7706     assert(cxt);
7707     if (cxt->s_dirty)
7708         clean_context(aTHX_ cxt);
7709 
7710     /*
7711      * Tied elements seem to need special handling.
7712      */
7713 
7714     if ((SvTYPE(sv) == SVt_PVLV
7715 #if PERL_VERSION < 8
7716          || SvTYPE(sv) == SVt_PVMG
7717 #endif
7718          ) && (SvFLAGS(sv) & (SVs_GMG|SVs_SMG|SVs_RMG)) ==
7719         (SVs_GMG|SVs_SMG|SVs_RMG) &&
7720         mg_find(sv, 'p')) {
7721         mg_get(sv);
7722     }
7723 
7724     /*
7725      * do_store() optimizes for dclone by not freeing its context, should
7726      * we need to allocate one because we're deep cloning from a hook.
7727      */
7728 
7729     if (!do_store(aTHX_ (PerlIO*) 0, sv, ST_CLONE, FALSE, (SV**) 0))
7730         return &PL_sv_undef;				/* Error during store */
7731 
7732     /*
7733      * Because of the above optimization, we have to refresh the context,
7734      * since a new one could have been allocated and stacked by do_store().
7735      */
7736 
7737     { dSTCXT; real_context = cxt; }		/* Sub-block needed for macro */
7738     cxt = real_context;					/* And we need this temporary... */
7739 
7740     /*
7741      * Now, 'cxt' may refer to a new context.
7742      */
7743 
7744     assert(cxt);
7745     ASSERT(!cxt->s_dirty, ("clean context"));
7746     ASSERT(!cxt->entry, ("entry will not cause new context allocation"));
7747 
7748     size = MBUF_SIZE();
7749     TRACEME(("dclone stored %ld bytes", (long)size));
7750     MBUF_INIT(size);
7751 
7752     /*
7753      * Since we're passing do_retrieve() both a NULL file and sv, we need
7754      * to pre-compute the taintedness of the input by setting cxt->tainted
7755      * to whatever state our own input string was.	-- RAM, 15/09/2000
7756      *
7757      * do_retrieve() will free non-root context.
7758      */
7759 
7760     cxt->s_tainted = SvTAINTED(sv);
7761     out = do_retrieve(aTHX_ (PerlIO*) 0, Nullsv, ST_CLONE, FLAG_BLESS_OK | FLAG_TIE_OK);
7762 
7763     TRACEMED(("dclone returns 0x%" UVxf, PTR2UV(out)));
7764 
7765     return out;
7766 }
7767 
7768 /***
7769  *** Glue with perl.
7770  ***/
7771 
7772 /*
7773  * The Perl IO GV object distinguishes between input and output for sockets
7774  * but not for plain files. To allow Storable to transparently work on
7775  * plain files and sockets transparently, we have to ask xsubpp to fetch the
7776  * right object for us. Hence the OutputStream and InputStream declarations.
7777  *
7778  * Before perl 5.004_05, those entries in the standard typemap are not
7779  * defined in perl include files, so we do that here.
7780  */
7781 
7782 #ifndef OutputStream
7783 #define OutputStream	PerlIO *
7784 #define InputStream	PerlIO *
7785 #endif	/* !OutputStream */
7786 
7787 static int
storable_free(pTHX_ SV * sv,MAGIC * mg)7788 storable_free(pTHX_ SV *sv, MAGIC* mg) {
7789     stcxt_t *cxt = (stcxt_t *)SvPVX(sv);
7790 
7791     PERL_UNUSED_ARG(mg);
7792 #ifdef USE_PTR_TABLE
7793     if (cxt->pseen)
7794         ptr_table_free(cxt->pseen);
7795 #endif
7796     if (kbuf)
7797         Safefree(kbuf);
7798     if (!cxt->membuf_ro && mbase)
7799         Safefree(mbase);
7800     if (cxt->membuf_ro && (cxt->msaved).arena)
7801         Safefree((cxt->msaved).arena);
7802     return 0;
7803 }
7804 
7805 MODULE = Storable	PACKAGE = Storable
7806 
7807 PROTOTYPES: ENABLE
7808 
7809 BOOT:
7810 {
7811     HV *stash = gv_stashpvn("Storable", 8, GV_ADD);
7812     newCONSTSUB(stash, "BIN_MAJOR", newSViv(STORABLE_BIN_MAJOR));
7813     newCONSTSUB(stash, "BIN_MINOR", newSViv(STORABLE_BIN_MINOR));
7814     newCONSTSUB(stash, "BIN_WRITE_MINOR", newSViv(STORABLE_BIN_WRITE_MINOR));
7815 
7816     newCONSTSUB(stash, "CAN_FLOCK", CAN_FLOCK);
7817 
7818     init_perinterp(aTHX);
7819     gv_fetchpv("Storable::drop_utf8",   GV_ADDMULTI, SVt_PV);
7820 #ifdef DEBUGME
7821     /* Only disable the used only once warning if we are in debugging mode.  */
7822     gv_fetchpv("Storable::DEBUGME",   GV_ADDMULTI, SVt_PV);
7823 #endif
7824 #ifdef USE_56_INTERWORK_KLUDGE
7825     gv_fetchpv("Storable::interwork_56_64bit",   GV_ADDMULTI, SVt_PV);
7826 #endif
7827     }
7828 
7829 void
7830 init_perinterp()
7831 CODE:
7832     init_perinterp(aTHX);
7833 
7834 # pstore
7835 #
7836 # Store the transitive data closure of given object to disk.
7837 # Returns undef on error, a true value otherwise.
7838 
7839 # net_pstore
7840 #
7841 # Same as pstore(), but network order is used for integers and doubles are
7842 # emitted as strings.
7843 
7844 SV *
7845 pstore(f,obj)
7846     OutputStream f
7847     SV*		obj
7848 ALIAS:
7849     net_pstore = 1
7850 PPCODE:
7851     RETVAL = do_store(aTHX_ f, obj, 0, ix, (SV **)0) ? &PL_sv_yes : &PL_sv_undef;
7852     /* do_store() can reallocate the stack, so need a sequence point to ensure
7853        that ST(0) knows about it. Hence using two statements.  */
7854     ST(0) = RETVAL;
7855     XSRETURN(1);
7856 
7857 # mstore
7858 #
7859 # Store the transitive data closure of given object to memory.
7860 # Returns undef on error, a scalar value containing the data otherwise.
7861 
7862 # net_mstore
7863 #
7864 # Same as mstore(), but network order is used for integers and doubles are
7865 # emitted as strings.
7866 
7867 SV *
7868 mstore(obj)
7869     SV*	obj
7870 ALIAS:
7871     net_mstore = 1
7872 CODE:
7873     RETVAL = &PL_sv_undef;
7874     if (!do_store(aTHX_ (PerlIO*) 0, obj, 0, ix, &RETVAL))
7875         RETVAL = &PL_sv_undef;
7876 OUTPUT:
7877     RETVAL
7878 
7879 SV *
7880 pretrieve(f, flag = 6)
7881     InputStream	f
7882     IV		flag
7883 CODE:
7884     RETVAL = pretrieve(aTHX_ f, flag);
7885 OUTPUT:
7886     RETVAL
7887 
7888 SV *
7889 mretrieve(sv, flag = 6)
7890     SV* sv
7891     IV	flag
7892 CODE:
7893     RETVAL = mretrieve(aTHX_ sv, flag);
7894 OUTPUT:
7895     RETVAL
7896 
7897 SV *
7898 dclone(sv)
7899     SV*	sv
7900 CODE:
7901     RETVAL = dclone(aTHX_ sv);
7902 OUTPUT:
7903     RETVAL
7904 
7905 void
7906 last_op_in_netorder()
7907 ALIAS:
7908     is_storing = ST_STORE
7909     is_retrieving = ST_RETRIEVE
7910 PREINIT:
7911     bool result;
7912 CODE:
7913     if (ix) {
7914         dSTCXT;
7915         assert(cxt);
7916         result = cxt->entry && (cxt->optype & ix) ? TRUE : FALSE;
7917     } else {
7918         result = !!last_op_in_netorder(aTHX);
7919     }
7920     ST(0) = boolSV(result);
7921 
7922 
7923 IV
7924 stack_depth()
7925 CODE:
7926     RETVAL = SvIV(get_sv("Storable::recursion_limit", GV_ADD));
7927 OUTPUT:
7928     RETVAL
7929 
7930 IV
7931 stack_depth_hash()
7932 CODE:
7933     RETVAL = SvIV(get_sv("Storable::recursion_limit_hash", GV_ADD));
7934 OUTPUT:
7935     RETVAL
7936