1 /* sv.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall
5 * and others
6 *
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
9 *
10 */
11
12 /*
13 * 'I wonder what the Entish is for "yes" and "no",' he thought.
14 * --Pippin
15 *
16 * [p.480 of _The Lord of the Rings_, III/iv: "Treebeard"]
17 */
18
19 /*
20 *
21 *
22 * This file contains the code that creates, manipulates and destroys
23 * scalar values (SVs). The other types (AV, HV, GV, etc.) reuse the
24 * structure of an SV, so their creation and destruction is handled
25 * here; higher-level functions are in av.c, hv.c, and so on. Opcode
26 * level functions (eg. substr, split, join) for each of the types are
27 * in the pp*.c files.
28 */
29
30 #include "EXTERN.h"
31 #define PERL_IN_SV_C
32 #include "perl.h"
33 #include "regcomp.h"
34 #ifdef __VMS
35 # include <rms.h>
36 #endif
37
38 #ifdef __Lynx__
39 /* Missing proto on LynxOS */
40 char *gconvert(double, int, int, char *);
41 #endif
42
43 #ifdef USE_QUADMATH
44 # define SNPRINTF_G(nv, buffer, size, ndig) \
45 quadmath_snprintf(buffer, size, "%.*Qg", (int)ndig, (NV)(nv))
46 #else
47 # define SNPRINTF_G(nv, buffer, size, ndig) \
48 PERL_UNUSED_RESULT(Gconvert((NV)(nv), (int)ndig, 0, buffer))
49 #endif
50
51 #ifndef SV_COW_THRESHOLD
52 # define SV_COW_THRESHOLD 0 /* COW iff len > K */
53 #endif
54 #ifndef SV_COWBUF_THRESHOLD
55 # define SV_COWBUF_THRESHOLD 1250 /* COW iff len > K */
56 #endif
57 #ifndef SV_COW_MAX_WASTE_THRESHOLD
58 # define SV_COW_MAX_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
59 #endif
60 #ifndef SV_COWBUF_WASTE_THRESHOLD
61 # define SV_COWBUF_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
62 #endif
63 #ifndef SV_COW_MAX_WASTE_FACTOR_THRESHOLD
64 # define SV_COW_MAX_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
65 #endif
66 #ifndef SV_COWBUF_WASTE_FACTOR_THRESHOLD
67 # define SV_COWBUF_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
68 #endif
69 /* Work around compiler warnings about unsigned >= THRESHOLD when thres-
70 hold is 0. */
71 #if SV_COW_THRESHOLD
72 # define GE_COW_THRESHOLD(cur) ((cur) >= SV_COW_THRESHOLD)
73 #else
74 # define GE_COW_THRESHOLD(cur) 1
75 #endif
76 #if SV_COWBUF_THRESHOLD
77 # define GE_COWBUF_THRESHOLD(cur) ((cur) >= SV_COWBUF_THRESHOLD)
78 #else
79 # define GE_COWBUF_THRESHOLD(cur) 1
80 #endif
81 #if SV_COW_MAX_WASTE_THRESHOLD
82 # define GE_COW_MAX_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COW_MAX_WASTE_THRESHOLD)
83 #else
84 # define GE_COW_MAX_WASTE_THRESHOLD(cur,len) 1
85 #endif
86 #if SV_COWBUF_WASTE_THRESHOLD
87 # define GE_COWBUF_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COWBUF_WASTE_THRESHOLD)
88 #else
89 # define GE_COWBUF_WASTE_THRESHOLD(cur,len) 1
90 #endif
91 #if SV_COW_MAX_WASTE_FACTOR_THRESHOLD
92 # define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COW_MAX_WASTE_FACTOR_THRESHOLD * (cur))
93 #else
94 # define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) 1
95 #endif
96 #if SV_COWBUF_WASTE_FACTOR_THRESHOLD
97 # define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COWBUF_WASTE_FACTOR_THRESHOLD * (cur))
98 #else
99 # define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) 1
100 #endif
101
102 #define CHECK_COW_THRESHOLD(cur,len) (\
103 GE_COW_THRESHOLD((cur)) && \
104 GE_COW_MAX_WASTE_THRESHOLD((cur),(len)) && \
105 GE_COW_MAX_WASTE_FACTOR_THRESHOLD((cur),(len)) \
106 )
107 #define CHECK_COWBUF_THRESHOLD(cur,len) (\
108 GE_COWBUF_THRESHOLD((cur)) && \
109 GE_COWBUF_WASTE_THRESHOLD((cur),(len)) && \
110 GE_COWBUF_WASTE_FACTOR_THRESHOLD((cur),(len)) \
111 )
112
113 #ifdef PERL_UTF8_CACHE_ASSERT
114 /* if adding more checks watch out for the following tests:
115 * t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t
116 * lib/utf8.t lib/Unicode/Collate/t/index.t
117 * --jhi
118 */
119 # define ASSERT_UTF8_CACHE(cache) \
120 STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); \
121 assert((cache)[2] <= (cache)[3]); \
122 assert((cache)[3] <= (cache)[1]);} \
123 } STMT_END
124 #else
125 # define ASSERT_UTF8_CACHE(cache) NOOP
126 #endif
127
128 static const char S_destroy[] = "DESTROY";
129 #define S_destroy_len (sizeof(S_destroy)-1)
130
131 /* ============================================================================
132
133 An SV (or AV, HV, etc.) is allocated in two parts: the head (struct
134 sv, av, hv...) contains type and reference count information, and for
135 many types, a pointer to the body (struct xrv, xpv, xpviv...), which
136 contains fields specific to each type. Some types store all they need
137 in the head, so don't have a body.
138
139 In all but the most memory-paranoid configurations (ex: PURIFY), heads
140 and bodies are allocated out of arenas, which by default are
141 approximately 4K chunks of memory parcelled up into N heads or bodies.
142 Sv-bodies are allocated by their sv-type, guaranteeing size
143 consistency needed to allocate safely from arrays.
144
145 For SV-heads, the first slot in each arena is reserved, and holds a
146 link to the next arena, some flags, and a note of the number of slots.
147 Snaked through each arena chain is a linked list of free items; when
148 this becomes empty, an extra arena is allocated and divided up into N
149 items which are threaded into the free list.
150
151 SV-bodies are similar, but they use arena-sets by default, which
152 separate the link and info from the arena itself, and reclaim the 1st
153 slot in the arena. SV-bodies are further described later.
154
155 The following global variables are associated with arenas:
156
157 PL_sv_arenaroot pointer to list of SV arenas
158 PL_sv_root pointer to list of free SV structures
159
160 PL_body_arenas head of linked-list of body arenas
161 PL_body_roots[] array of pointers to list of free bodies of svtype
162 arrays are indexed by the svtype needed
163
164 A few special SV heads are not allocated from an arena, but are
165 instead directly created in the interpreter structure, eg PL_sv_undef.
166 The size of arenas can be changed from the default by setting
167 PERL_ARENA_SIZE appropriately at compile time.
168
169 The SV arena serves the secondary purpose of allowing still-live SVs
170 to be located and destroyed during final cleanup.
171
172 At the lowest level, the macros new_SV() and del_SV() grab and free
173 an SV head. (If debugging with -DD, del_SV() calls the function S_del_sv()
174 to return the SV to the free list with error checking.) new_SV() calls
175 more_sv() / sv_add_arena() to add an extra arena if the free list is empty.
176 SVs in the free list have their SvTYPE field set to all ones.
177
178 At the time of very final cleanup, sv_free_arenas() is called from
179 perl_destruct() to physically free all the arenas allocated since the
180 start of the interpreter.
181
182 The internal function visit() scans the SV arenas list, and calls a specified
183 function for each SV it finds which is still live, I<i.e.> which has an SvTYPE
184 other than all 1's, and a non-zero SvREFCNT. visit() is used by the
185 following functions (specified as [function that calls visit()] / [function
186 called by visit() for each SV]):
187
188 sv_report_used() / do_report_used()
189 dump all remaining SVs (debugging aid)
190
191 sv_clean_objs() / do_clean_objs(),do_clean_named_objs(),
192 do_clean_named_io_objs(),do_curse()
193 Attempt to free all objects pointed to by RVs,
194 try to do the same for all objects indir-
195 ectly referenced by typeglobs too, and
196 then do a final sweep, cursing any
197 objects that remain. Called once from
198 perl_destruct(), prior to calling sv_clean_all()
199 below.
200
201 sv_clean_all() / do_clean_all()
202 SvREFCNT_dec(sv) each remaining SV, possibly
203 triggering an sv_free(). It also sets the
204 SVf_BREAK flag on the SV to indicate that the
205 refcnt has been artificially lowered, and thus
206 stopping sv_free() from giving spurious warnings
207 about SVs which unexpectedly have a refcnt
208 of zero. called repeatedly from perl_destruct()
209 until there are no SVs left.
210
211 =head2 Arena allocator API Summary
212
213 Private API to rest of sv.c
214
215 new_SV(), del_SV(),
216
217 new_XPVNV(), del_body()
218 etc
219
220 Public API:
221
222 sv_report_used(), sv_clean_objs(), sv_clean_all(), sv_free_arenas()
223
224 =cut
225
226 * ========================================================================= */
227
228 /*
229 * "A time to plant, and a time to uproot what was planted..."
230 */
231
232 #ifdef DEBUG_LEAKING_SCALARS
233 # define FREE_SV_DEBUG_FILE(sv) STMT_START { \
234 if ((sv)->sv_debug_file) { \
235 PerlMemShared_free((sv)->sv_debug_file); \
236 sv->sv_debug_file = NULL; \
237 } \
238 } STMT_END
239 # define DEBUG_SV_SERIAL(sv) \
240 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%" UVxf ": (%05ld) del_SV\n", \
241 PTR2UV(sv), (long)(sv)->sv_debug_serial))
242 #else
243 # define FREE_SV_DEBUG_FILE(sv)
244 # define DEBUG_SV_SERIAL(sv) NOOP
245 #endif
246
247 /* Mark an SV head as unused, and add to free list.
248 *
249 * If SVf_BREAK is set, skip adding it to the free list, as this SV had
250 * its refcount artificially decremented during global destruction, so
251 * there may be dangling pointers to it. The last thing we want in that
252 * case is for it to be reused. */
253
254 #define plant_SV(p) \
255 STMT_START { \
256 const U32 old_flags = SvFLAGS(p); \
257 MEM_LOG_DEL_SV(p, __FILE__, __LINE__, FUNCTION__); \
258 DEBUG_SV_SERIAL(p); \
259 FREE_SV_DEBUG_FILE(p); \
260 POISON_SV_HEAD(p); \
261 SvFLAGS(p) = SVTYPEMASK; \
262 if (!(old_flags & SVf_BREAK)) { \
263 SvARENA_CHAIN_SET(p, PL_sv_root); \
264 PL_sv_root = (p); \
265 } \
266 --PL_sv_count; \
267 } STMT_END
268
269
270 /* make some more SVs by adding another arena */
271
272 SV*
Perl_more_sv(pTHX)273 Perl_more_sv(pTHX)
274 {
275 SV* sv;
276 char *chunk; /* must use New here to match call to */
277 Newx(chunk,PERL_ARENA_SIZE,char); /* Safefree() in sv_free_arenas() */
278 sv_add_arena(chunk, PERL_ARENA_SIZE, 0);
279 uproot_SV(sv);
280 return sv;
281 }
282
283 /* del_SV(): return an empty SV head to the free list */
284
285 #ifdef DEBUGGING
286
287 #define del_SV(p) \
288 STMT_START { \
289 if (DEBUG_D_TEST) \
290 del_sv(p); \
291 else \
292 plant_SV(p); \
293 } STMT_END
294
295 STATIC void
S_del_sv(pTHX_ SV * p)296 S_del_sv(pTHX_ SV *p)
297 {
298 PERL_ARGS_ASSERT_DEL_SV;
299
300 if (DEBUG_D_TEST) {
301 SV* sva;
302 bool ok = 0;
303 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
304 const SV * const sv = sva + 1;
305 const SV * const svend = &sva[SvREFCNT(sva)];
306 if (p >= sv && p < svend) {
307 ok = 1;
308 break;
309 }
310 }
311 if (!ok) {
312 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
313 "Attempt to free non-arena SV: 0x%" UVxf
314 pTHX__FORMAT, PTR2UV(p) pTHX__VALUE);
315 return;
316 }
317 }
318 plant_SV(p);
319 }
320
321 #else /* ! DEBUGGING */
322
323 #define del_SV(p) plant_SV(p)
324
325 #endif /* DEBUGGING */
326
327
328 /*
329 =for apidoc_section $SV
330
331 =for apidoc sv_add_arena
332
333 Given a chunk of memory, link it to the head of the list of arenas,
334 and split it into a list of free SVs.
335
336 =cut
337 */
338
339 static void
S_sv_add_arena(pTHX_ char * const ptr,const U32 size,const U32 flags)340 S_sv_add_arena(pTHX_ char *const ptr, const U32 size, const U32 flags)
341 {
342 SV *const sva = MUTABLE_SV(ptr);
343 SV* sv;
344 SV* svend;
345
346 PERL_ARGS_ASSERT_SV_ADD_ARENA;
347
348 /* The first SV in an arena isn't an SV. */
349 SvANY(sva) = (void *) PL_sv_arenaroot; /* ptr to next arena */
350 SvREFCNT(sva) = size / sizeof(SV); /* number of SV slots */
351 SvFLAGS(sva) = flags; /* FAKE if not to be freed */
352
353 PL_sv_arenaroot = sva;
354 PL_sv_root = sva + 1;
355
356 svend = &sva[SvREFCNT(sva) - 1];
357 sv = sva + 1;
358 while (sv < svend) {
359 SvARENA_CHAIN_SET(sv, (sv + 1));
360 #ifdef DEBUGGING
361 SvREFCNT(sv) = 0;
362 #endif
363 /* Must always set typemask because it's always checked in on cleanup
364 when the arenas are walked looking for objects. */
365 SvFLAGS(sv) = SVTYPEMASK;
366 sv++;
367 }
368 SvARENA_CHAIN_SET(sv, 0);
369 #ifdef DEBUGGING
370 SvREFCNT(sv) = 0;
371 #endif
372 SvFLAGS(sv) = SVTYPEMASK;
373 }
374
375 /* visit(): call the named function for each non-free SV in the arenas
376 * whose flags field matches the flags/mask args. */
377
378 STATIC SSize_t
S_visit(pTHX_ SVFUNC_t f,const U32 flags,const U32 mask)379 S_visit(pTHX_ SVFUNC_t f, const U32 flags, const U32 mask)
380 {
381 SV* sva;
382 I32 visited = 0;
383
384 PERL_ARGS_ASSERT_VISIT;
385
386 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
387 const SV * const svend = &sva[SvREFCNT(sva)];
388 SV* sv;
389 for (sv = sva + 1; sv < svend; ++sv) {
390 if (!SvIS_FREED(sv)
391 && (sv->sv_flags & mask) == flags
392 && SvREFCNT(sv))
393 {
394 (*f)(aTHX_ sv);
395 ++visited;
396 }
397 }
398 }
399 return visited;
400 }
401
402 #ifdef DEBUGGING
403
404 /* called by sv_report_used() for each live SV */
405
406 static void
do_report_used(pTHX_ SV * const sv)407 do_report_used(pTHX_ SV *const sv)
408 {
409 if (!SvIS_FREED(sv)) {
410 PerlIO_printf(Perl_debug_log, "****\n");
411 sv_dump(sv);
412 }
413 }
414 #endif
415
416 /*
417 =for apidoc sv_report_used
418
419 Dump the contents of all SVs not yet freed (debugging aid).
420
421 =cut
422 */
423
424 void
Perl_sv_report_used(pTHX)425 Perl_sv_report_used(pTHX)
426 {
427 #ifdef DEBUGGING
428 visit(do_report_used, 0, 0);
429 #else
430 PERL_UNUSED_CONTEXT;
431 #endif
432 }
433
434 /* called by sv_clean_objs() for each live SV */
435
436 static void
do_clean_objs(pTHX_ SV * const ref)437 do_clean_objs(pTHX_ SV *const ref)
438 {
439 assert (SvROK(ref));
440 {
441 SV * const target = SvRV(ref);
442 if (SvOBJECT(target)) {
443 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(ref)));
444 if (SvWEAKREF(ref)) {
445 sv_del_backref(target, ref);
446 SvWEAKREF_off(ref);
447 SvRV_set(ref, NULL);
448 } else {
449 SvROK_off(ref);
450 SvRV_set(ref, NULL);
451 SvREFCNT_dec_NN(target);
452 }
453 }
454 }
455 }
456
457
458 /* clear any slots in a GV which hold objects - except IO;
459 * called by sv_clean_objs() for each live GV */
460
461 static void
do_clean_named_objs(pTHX_ SV * const sv)462 do_clean_named_objs(pTHX_ SV *const sv)
463 {
464 SV *obj;
465 assert(SvTYPE(sv) == SVt_PVGV);
466 assert(isGV_with_GP(sv));
467 if (!GvGP(sv))
468 return;
469
470 /* freeing GP entries may indirectly free the current GV;
471 * hold onto it while we mess with the GP slots */
472 SvREFCNT_inc(sv);
473
474 if ( ((obj = GvSV(sv) )) && SvOBJECT(obj)) {
475 DEBUG_D((PerlIO_printf(Perl_debug_log,
476 "Cleaning named glob SV object:\n "), sv_dump(obj)));
477 GvSV(sv) = NULL;
478 SvREFCNT_dec_NN(obj);
479 }
480 if ( ((obj = MUTABLE_SV(GvAV(sv)) )) && SvOBJECT(obj)) {
481 DEBUG_D((PerlIO_printf(Perl_debug_log,
482 "Cleaning named glob AV object:\n "), sv_dump(obj)));
483 GvAV(sv) = NULL;
484 SvREFCNT_dec_NN(obj);
485 }
486 if ( ((obj = MUTABLE_SV(GvHV(sv)) )) && SvOBJECT(obj)) {
487 DEBUG_D((PerlIO_printf(Perl_debug_log,
488 "Cleaning named glob HV object:\n "), sv_dump(obj)));
489 GvHV(sv) = NULL;
490 SvREFCNT_dec_NN(obj);
491 }
492 if ( ((obj = MUTABLE_SV(GvCV(sv)) )) && SvOBJECT(obj)) {
493 DEBUG_D((PerlIO_printf(Perl_debug_log,
494 "Cleaning named glob CV object:\n "), sv_dump(obj)));
495 GvCV_set(sv, NULL);
496 SvREFCNT_dec_NN(obj);
497 }
498 SvREFCNT_dec_NN(sv); /* undo the inc above */
499 }
500
501 /* clear any IO slots in a GV which hold objects (except stderr, defout);
502 * called by sv_clean_objs() for each live GV */
503
504 static void
do_clean_named_io_objs(pTHX_ SV * const sv)505 do_clean_named_io_objs(pTHX_ SV *const sv)
506 {
507 SV *obj;
508 assert(SvTYPE(sv) == SVt_PVGV);
509 assert(isGV_with_GP(sv));
510 if (!GvGP(sv) || sv == (SV*)PL_stderrgv || sv == (SV*)PL_defoutgv)
511 return;
512
513 SvREFCNT_inc(sv);
514 if ( ((obj = MUTABLE_SV(GvIO(sv)) )) && SvOBJECT(obj)) {
515 DEBUG_D((PerlIO_printf(Perl_debug_log,
516 "Cleaning named glob IO object:\n "), sv_dump(obj)));
517 GvIOp(sv) = NULL;
518 SvREFCNT_dec_NN(obj);
519 }
520 SvREFCNT_dec_NN(sv); /* undo the inc above */
521 }
522
523 /* Void wrapper to pass to visit() */
524 static void
do_curse(pTHX_ SV * const sv)525 do_curse(pTHX_ SV * const sv) {
526 if ((PL_stderrgv && GvGP(PL_stderrgv) && (SV*)GvIO(PL_stderrgv) == sv)
527 || (PL_defoutgv && GvGP(PL_defoutgv) && (SV*)GvIO(PL_defoutgv) == sv))
528 return;
529 (void)curse(sv, 0);
530 }
531
532 /*
533 =for apidoc sv_clean_objs
534
535 Attempt to destroy all objects not yet freed.
536
537 =cut
538 */
539
540 void
Perl_sv_clean_objs(pTHX)541 Perl_sv_clean_objs(pTHX)
542 {
543 GV *olddef, *olderr;
544 PL_in_clean_objs = TRUE;
545 visit(do_clean_objs, SVf_ROK, SVf_ROK);
546 /* Some barnacles may yet remain, clinging to typeglobs.
547 * Run the non-IO destructors first: they may want to output
548 * error messages, close files etc */
549 visit(do_clean_named_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
550 visit(do_clean_named_io_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
551 /* And if there are some very tenacious barnacles clinging to arrays,
552 closures, or what have you.... */
553 visit(do_curse, SVs_OBJECT, SVs_OBJECT);
554 olddef = PL_defoutgv;
555 PL_defoutgv = NULL; /* disable skip of PL_defoutgv */
556 if (olddef && isGV_with_GP(olddef))
557 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olddef));
558 olderr = PL_stderrgv;
559 PL_stderrgv = NULL; /* disable skip of PL_stderrgv */
560 if (olderr && isGV_with_GP(olderr))
561 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olderr));
562 SvREFCNT_dec(olddef);
563 PL_in_clean_objs = FALSE;
564 }
565
566 /* called by sv_clean_all() for each live SV */
567
568 static void
do_clean_all(pTHX_ SV * const sv)569 do_clean_all(pTHX_ SV *const sv)
570 {
571 if (sv == (const SV *) PL_fdpid || sv == (const SV *)PL_strtab) {
572 /* don't clean pid table and strtab */
573 return;
574 }
575 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%" UVxf "\n", PTR2UV(sv)) ));
576 SvFLAGS(sv) |= SVf_BREAK;
577 SvREFCNT_dec_NN(sv);
578 }
579
580 /*
581 =for apidoc sv_clean_all
582
583 Decrement the refcnt of each remaining SV, possibly triggering a
584 cleanup. This function may have to be called multiple times to free
585 SVs which are in complex self-referential hierarchies.
586
587 =cut
588 */
589
590 SSize_t
Perl_sv_clean_all(pTHX)591 Perl_sv_clean_all(pTHX)
592 {
593 SSize_t cleaned;
594 PL_in_clean_all = TRUE;
595 cleaned = visit(do_clean_all, 0,0);
596 return cleaned;
597 }
598
599
600 #ifdef DEBUGGING
601
602 /* Called by sv_mark_arenas() for each live SV: set SVf_BREAK */
603
604 static void
S_do_sv_mark_arenas(pTHX_ SV * const sv)605 S_do_sv_mark_arenas(pTHX_ SV *const sv)
606 {
607 sv->sv_flags |= SVf_BREAK;
608 }
609
610 /* sv_mark_arenas(): for leak debugging: mark all live SVs with SVf_BREAK.
611 * Then later, use sv_sweep_arenas() to list any SVs not so marked.
612 */
613
614 void
Perl_sv_mark_arenas(pTHX)615 Perl_sv_mark_arenas(pTHX)
616 {
617 visit(S_do_sv_mark_arenas, 0, 0);
618 }
619
620 /* Called by sv_sweep_arenas() for each live SV, to list any SVs without
621 * SVf_BREAK set */
622
623 static void
S_do_sv_sweep_arenas(pTHX_ SV * const sv)624 S_do_sv_sweep_arenas(pTHX_ SV *const sv)
625 {
626 if (sv->sv_flags & SVf_BREAK) {
627 sv->sv_flags &= ~SVf_BREAK;
628 return;
629 }
630 PerlIO_printf(Perl_debug_log, "Unmarked SV: 0x%p: %s\n",
631 sv, SvPEEK(sv));
632 }
633
634
635 /* sv_sweep_arenas(): for debugging: list all live SVs that don't have
636 * SVf_BREAK set, then turn off all SVf_BREAK flags. Typically used some
637 * time after sv_mark_arenas(), to find SVs which have been created since
638 * the marking but not yet freed (they may have leaked, or been stored in
639 * an array, or whatever).
640 */
641
642 void
Perl_sv_sweep_arenas(pTHX)643 Perl_sv_sweep_arenas(pTHX)
644 {
645 visit(S_do_sv_sweep_arenas, 0, 0);
646 }
647
648 #endif
649
650
651 /*
652 ARENASETS: a meta-arena implementation which separates arena-info
653 into struct arena_set, which contains an array of struct
654 arena_descs, each holding info for a single arena. By separating
655 the meta-info from the arena, we recover the 1st slot, formerly
656 borrowed for list management. The arena_set is about the size of an
657 arena, avoiding the needless malloc overhead of a naive linked-list.
658
659 The cost is 1 arena-set malloc per ~320 arena-mallocs, + the unused
660 memory in the last arena-set (1/2 on average). In trade, we get
661 back the 1st slot in each arena (ie 1.7% of a CV-arena, less for
662 smaller types). The recovery of the wasted space allows use of
663 small arenas for large, rare body types, by changing array* fields
664 in body_details_by_type[] below.
665 */
666 struct arena_desc {
667 char *arena; /* the raw storage, allocated aligned */
668 size_t size; /* its size ~4k typ */
669 svtype utype; /* bodytype stored in arena */
670 };
671
672 struct arena_set;
673
674 /* Get the maximum number of elements in set[] such that struct arena_set
675 will fit within PERL_ARENA_SIZE, which is probably just under 4K, and
676 therefore likely to be 1 aligned memory page. */
677
678 #define ARENAS_PER_SET ((PERL_ARENA_SIZE - sizeof(struct arena_set*) \
679 - 2 * sizeof(int)) / sizeof (struct arena_desc))
680
681 struct arena_set {
682 struct arena_set* next;
683 unsigned int set_size; /* ie ARENAS_PER_SET */
684 unsigned int curr; /* index of next available arena-desc */
685 struct arena_desc set[ARENAS_PER_SET];
686 };
687
688 /*
689 =for apidoc sv_free_arenas
690
691 Deallocate the memory used by all arenas. Note that all the individual SV
692 heads and bodies within the arenas must already have been freed.
693
694 =cut
695
696 */
697 void
Perl_sv_free_arenas(pTHX)698 Perl_sv_free_arenas(pTHX)
699 {
700 SV* sva;
701 SV* svanext;
702 unsigned int i;
703
704 /* Free arenas here, but be careful about fake ones. (We assume
705 contiguity of the fake ones with the corresponding real ones.) */
706
707 for (sva = PL_sv_arenaroot; sva; sva = svanext) {
708 svanext = MUTABLE_SV(SvANY(sva));
709 while (svanext && SvFAKE(svanext))
710 svanext = MUTABLE_SV(SvANY(svanext));
711
712 if (!SvFAKE(sva))
713 Safefree(sva);
714 }
715
716 {
717 struct arena_set *aroot = (struct arena_set*) PL_body_arenas;
718
719 while (aroot) {
720 struct arena_set *current = aroot;
721 i = aroot->curr;
722 while (i--) {
723 assert(aroot->set[i].arena);
724 Safefree(aroot->set[i].arena);
725 }
726 aroot = aroot->next;
727 Safefree(current);
728 }
729 }
730 PL_body_arenas = 0;
731
732 i = PERL_ARENA_ROOTS_SIZE;
733 while (i--)
734 PL_body_roots[i] = 0;
735
736 PL_sv_arenaroot = 0;
737 PL_sv_root = 0;
738 }
739
740 /*
741 Historically, here were mid-level routines that manage the
742 allocation of bodies out of the various arenas. Some of these
743 routines and related definitions remain here, but others were
744 moved into sv_inline.h to facilitate inlining of newSV_type().
745
746 There are 4 kinds of arenas:
747
748 1. SV-head arenas, which are discussed and handled above
749 2. regular body arenas
750 3. arenas for reduced-size bodies
751 4. Hash-Entry arenas
752
753 Arena types 2 & 3 are chained by body-type off an array of
754 arena-root pointers, which is indexed by svtype. Some of the
755 larger/less used body types are malloced singly, since a large
756 unused block of them is wasteful. Also, several svtypes don't have
757 bodies; the data fits into the sv-head itself. The arena-root
758 pointer thus has a few unused root-pointers (which may be hijacked
759 later for arena type 4)
760
761 3 differs from 2 as an optimization; some body types have several
762 unused fields in the front of the structure (which are kept in-place
763 for consistency). These bodies can be allocated in smaller chunks,
764 because the leading fields arent accessed. Pointers to such bodies
765 are decremented to point at the unused 'ghost' memory, knowing that
766 the pointers are used with offsets to the real memory.
767
768 Allocation of SV-bodies is similar to SV-heads, differing as follows;
769 the allocation mechanism is used for many body types, so is somewhat
770 more complicated, it uses arena-sets, and has no need for still-live
771 SV detection.
772
773 At the outermost level, (new|del)_X*V macros return bodies of the
774 appropriate type. These macros call either (new|del)_body_type or
775 (new|del)_body_allocated macro pairs, depending on specifics of the
776 type. Most body types use the former pair, the latter pair is used to
777 allocate body types with "ghost fields".
778
779 "ghost fields" are fields that are unused in certain types, and
780 consequently don't need to actually exist. They are declared because
781 they're part of a "base type", which allows use of functions as
782 methods. The simplest examples are AVs and HVs, 2 aggregate types
783 which don't use the fields which support SCALAR semantics.
784
785 For these types, the arenas are carved up into appropriately sized
786 chunks, we thus avoid wasted memory for those unaccessed members.
787 When bodies are allocated, we adjust the pointer back in memory by the
788 size of the part not allocated, so it's as if we allocated the full
789 structure. (But things will all go boom if you write to the part that
790 is "not there", because you'll be overwriting the last members of the
791 preceding structure in memory.)
792
793 We calculate the correction using the STRUCT_OFFSET macro on the first
794 member present. If the allocated structure is smaller (no initial NV
795 actually allocated) then the net effect is to subtract the size of the NV
796 from the pointer, to return a new pointer as if an initial NV were actually
797 allocated. (We were using structures named *_allocated for this, but
798 this turned out to be a subtle bug, because a structure without an NV
799 could have a lower alignment constraint, but the compiler is allowed to
800 optimised accesses based on the alignment constraint of the actual pointer
801 to the full structure, for example, using a single 64 bit load instruction
802 because it "knows" that two adjacent 32 bit members will be 8-byte aligned.)
803
804 This is the same trick as was used for NV and IV bodies. Ironically it
805 doesn't need to be used for NV bodies any more, because NV is now at
806 the start of the structure. IV bodies, and also in some builds NV bodies,
807 don't need it either, because they are no longer allocated.
808
809 In turn, the new_body_* allocators call S_new_body(), which invokes
810 new_body_from_arena macro, which takes a lock, and takes a body off the
811 linked list at PL_body_roots[sv_type], calling Perl_more_bodies() if
812 necessary to refresh an empty list. Then the lock is released, and
813 the body is returned.
814
815 Perl_more_bodies allocates a new arena, and carves it up into an array of N
816 bodies, which it strings into a linked list. It looks up arena-size
817 and body-size from the body_details table described below, thus
818 supporting the multiple body-types.
819
820 If PURIFY is defined, or PERL_ARENA_SIZE=0, arenas are not used, and
821 the (new|del)_X*V macros are mapped directly to malloc/free.
822
823 For each sv-type, struct body_details bodies_by_type[] carries
824 parameters which control these aspects of SV handling:
825
826 Arena_size determines whether arenas are used for this body type, and if
827 so, how big they are. PURIFY or PERL_ARENA_SIZE=0 set this field to
828 zero, forcing individual mallocs and frees.
829
830 Body_size determines how big a body is, and therefore how many fit into
831 each arena. Offset carries the body-pointer adjustment needed for
832 "ghost fields", and is used in *_allocated macros.
833
834 But its main purpose is to parameterize info needed in
835 Perl_sv_upgrade(). The info here dramatically simplifies the function
836 vs the implementation in 5.8.8, making it table-driven. All fields
837 are used for this, except for arena_size.
838
839 For the sv-types that have no bodies, arenas are not used, so those
840 PL_body_roots[sv_type] are unused, and can be overloaded. In
841 something of a special case, SVt_NULL is borrowed for HE arenas;
842 PL_body_roots[HE_ARENA_ROOT_IX=SVt_NULL] is filled by S_more_he, but the
843 bodies_by_type[SVt_NULL] slot is not used, as the table is not
844 available in hv.c. Similarly SVt_IV is re-used for HVAUX_ARENA_ROOT_IX.
845
846 */
847
848 /* return a thing to the free list */
849
850 #define del_body(thing, root) \
851 STMT_START { \
852 void ** const thing_copy = (void **)thing; \
853 *thing_copy = *root; \
854 *root = (void*)thing_copy; \
855 } STMT_END
856
857
858 void *
Perl_more_bodies(pTHX_ const svtype sv_type,const size_t body_size,const size_t arena_size)859 Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size,
860 const size_t arena_size)
861 {
862 void ** const root = &PL_body_roots[sv_type];
863 struct arena_desc *adesc;
864 struct arena_set *aroot = (struct arena_set *) PL_body_arenas;
865 unsigned int curr;
866 char *start;
867 const char *end;
868 const size_t good_arena_size = Perl_malloc_good_size(arena_size);
869 #if defined(DEBUGGING)
870 static bool done_sanity_check;
871
872 if (!done_sanity_check) {
873 unsigned int i = SVt_LAST;
874
875 done_sanity_check = TRUE;
876
877 while (i--)
878 assert (bodies_by_type[i].type == i);
879 }
880 #endif
881
882 assert(arena_size);
883
884 /* may need new arena-set to hold new arena */
885 if (!aroot || aroot->curr >= aroot->set_size) {
886 struct arena_set *newroot;
887 Newxz(newroot, 1, struct arena_set);
888 newroot->set_size = ARENAS_PER_SET;
889 newroot->next = aroot;
890 aroot = newroot;
891 PL_body_arenas = (void *) newroot;
892 DEBUG_m(PerlIO_printf(Perl_debug_log, "new arenaset %p\n", (void*)aroot));
893 }
894
895 /* ok, now have arena-set with at least 1 empty/available arena-desc */
896 curr = aroot->curr++;
897 adesc = &(aroot->set[curr]);
898 assert(!adesc->arena);
899
900 Newx(adesc->arena, good_arena_size, char);
901 adesc->size = good_arena_size;
902 adesc->utype = sv_type;
903 DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %d added: %p size %" UVuf "\n",
904 curr, (void*)adesc->arena, (UV)good_arena_size));
905
906 start = (char *) adesc->arena;
907
908 /* Get the address of the byte after the end of the last body we can fit.
909 Remember, this is integer division: */
910 end = start + good_arena_size / body_size * body_size;
911
912 /* computed count doesn't reflect the 1st slot reservation */
913 #if defined(MYMALLOC) || defined(HAS_MALLOC_GOOD_SIZE)
914 DEBUG_m(PerlIO_printf(Perl_debug_log,
915 "arena %p end %p arena-size %d (from %d) type %d "
916 "size %d ct %d\n",
917 (void*)start, (void*)end, (int)good_arena_size,
918 (int)arena_size, sv_type, (int)body_size,
919 (int)good_arena_size / (int)body_size));
920 #else
921 DEBUG_m(PerlIO_printf(Perl_debug_log,
922 "arena %p end %p arena-size %d type %d size %d ct %d\n",
923 (void*)start, (void*)end,
924 (int)arena_size, sv_type, (int)body_size,
925 (int)good_arena_size / (int)body_size));
926 #endif
927 *root = (void *)start;
928
929 while (1) {
930 /* Where the next body would start: */
931 char * const next = start + body_size;
932
933 if (next >= end) {
934 /* This is the last body: */
935 assert(next == end);
936
937 *(void **)start = 0;
938 return *root;
939 }
940
941 *(void**) start = (void *)next;
942 start = next;
943 }
944 }
945
946 /*
947 =for apidoc sv_upgrade
948
949 Upgrade an SV to a more complex form. Generally adds a new body type to the
950 SV, then copies across as much information as possible from the old body.
951 It croaks if the SV is already in a more complex form than requested. You
952 generally want to use the C<SvUPGRADE> macro wrapper, which checks the type
953 before calling C<sv_upgrade>, and hence does not croak. See also
954 C<L</svtype>>.
955
956 =cut
957 */
958
959 void
Perl_sv_upgrade(pTHX_ SV * const sv,svtype new_type)960 Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type)
961 {
962 void* old_body;
963 void* new_body;
964 const svtype old_type = SvTYPE(sv);
965 const struct body_details *new_type_details;
966 const struct body_details *old_type_details
967 = bodies_by_type + old_type;
968 SV *referent = NULL;
969
970 PERL_ARGS_ASSERT_SV_UPGRADE;
971
972 if (old_type == new_type)
973 return;
974
975 /* This clause was purposefully added ahead of the early return above to
976 the shared string hackery for (sort {$a <=> $b} keys %hash), with the
977 inference by Nick I-S that it would fix other troublesome cases. See
978 changes 7162, 7163 (f130fd4589cf5fbb24149cd4db4137c8326f49c1 and parent)
979
980 Given that shared hash key scalars are no longer PVIV, but PV, there is
981 no longer need to unshare so as to free up the IVX slot for its proper
982 purpose. So it's safe to move the early return earlier. */
983
984 if (new_type > SVt_PVMG && SvIsCOW(sv)) {
985 sv_force_normal_flags(sv, 0);
986 }
987
988 old_body = SvANY(sv);
989
990 /* Copying structures onto other structures that have been neatly zeroed
991 has a subtle gotcha. Consider XPVMG
992
993 +------+------+------+------+------+-------+-------+
994 | NV | CUR | LEN | IV | MAGIC | STASH |
995 +------+------+------+------+------+-------+-------+
996 0 4 8 12 16 20 24 28
997
998 where NVs are aligned to 8 bytes, so that sizeof that structure is
999 actually 32 bytes long, with 4 bytes of padding at the end:
1000
1001 +------+------+------+------+------+-------+-------+------+
1002 | NV | CUR | LEN | IV | MAGIC | STASH | ??? |
1003 +------+------+------+------+------+-------+-------+------+
1004 0 4 8 12 16 20 24 28 32
1005
1006 so what happens if you allocate memory for this structure:
1007
1008 +------+------+------+------+------+-------+-------+------+------+...
1009 | NV | CUR | LEN | IV | MAGIC | STASH | GP | NAME |
1010 +------+------+------+------+------+-------+-------+------+------+...
1011 0 4 8 12 16 20 24 28 32 36
1012
1013 zero it, then copy sizeof(XPVMG) bytes on top of it? Not quite what you
1014 expect, because you copy the area marked ??? onto GP. Now, ??? may have
1015 started out as zero once, but it's quite possible that it isn't. So now,
1016 rather than a nicely zeroed GP, you have it pointing somewhere random.
1017 Bugs ensue.
1018
1019 (In fact, GP ends up pointing at a previous GP structure, because the
1020 principle cause of the padding in XPVMG getting garbage is a copy of
1021 sizeof(XPVMG) bytes from a XPVGV structure in sv_unglob. Right now
1022 this happens to be moot because XPVGV has been re-ordered, with GP
1023 no longer after STASH)
1024
1025 So we are careful and work out the size of used parts of all the
1026 structures. */
1027
1028 switch (old_type) {
1029 case SVt_NULL:
1030 break;
1031 case SVt_IV:
1032 if (SvROK(sv)) {
1033 referent = SvRV(sv);
1034 old_type_details = &fake_rv;
1035 if (new_type == SVt_NV)
1036 new_type = SVt_PVNV;
1037 } else {
1038 if (new_type < SVt_PVIV) {
1039 new_type = (new_type == SVt_NV)
1040 ? SVt_PVNV : SVt_PVIV;
1041 }
1042 }
1043 break;
1044 case SVt_NV:
1045 if (new_type < SVt_PVNV) {
1046 new_type = SVt_PVNV;
1047 }
1048 break;
1049 case SVt_PV:
1050 assert(new_type > SVt_PV);
1051 STATIC_ASSERT_STMT(SVt_IV < SVt_PV);
1052 STATIC_ASSERT_STMT(SVt_NV < SVt_PV);
1053 break;
1054 case SVt_PVIV:
1055 break;
1056 case SVt_PVNV:
1057 break;
1058 case SVt_PVMG:
1059 /* Because the XPVMG of PL_mess_sv isn't allocated from the arena,
1060 there's no way that it can be safely upgraded, because perl.c
1061 expects to Safefree(SvANY(PL_mess_sv)) */
1062 assert(sv != PL_mess_sv);
1063 break;
1064 default:
1065 if (UNLIKELY(old_type_details->cant_upgrade))
1066 Perl_croak(aTHX_ "Can't upgrade %s (%" UVuf ") to %" UVuf,
1067 sv_reftype(sv, 0), (UV) old_type, (UV) new_type);
1068 }
1069
1070 if (UNLIKELY(old_type > new_type))
1071 Perl_croak(aTHX_ "sv_upgrade from type %d down to type %d",
1072 (int)old_type, (int)new_type);
1073
1074 new_type_details = bodies_by_type + new_type;
1075
1076 SvFLAGS(sv) &= ~SVTYPEMASK;
1077 SvFLAGS(sv) |= new_type;
1078
1079 /* This can't happen, as SVt_NULL is <= all values of new_type, so one of
1080 the return statements above will have triggered. */
1081 assert (new_type != SVt_NULL);
1082 switch (new_type) {
1083 case SVt_IV:
1084 assert(old_type == SVt_NULL);
1085 SET_SVANY_FOR_BODYLESS_IV(sv);
1086 SvIV_set(sv, 0);
1087 return;
1088 case SVt_NV:
1089 assert(old_type == SVt_NULL);
1090 #if NVSIZE <= IVSIZE
1091 SET_SVANY_FOR_BODYLESS_NV(sv);
1092 #else
1093 SvANY(sv) = new_XNV();
1094 #endif
1095 SvNV_set(sv, 0);
1096 return;
1097 case SVt_PVHV:
1098 case SVt_PVAV:
1099 case SVt_PVOBJ:
1100 assert(new_type_details->body_size);
1101
1102 #ifndef PURIFY
1103 assert(new_type_details->arena);
1104 assert(new_type_details->arena_size);
1105 /* This points to the start of the allocated area. */
1106 new_body = S_new_body(aTHX_ new_type);
1107 /* xpvav and xpvhv have no offset, so no need to adjust new_body */
1108 assert(!(new_type_details->offset));
1109 #else
1110 /* We always allocated the full length item with PURIFY. To do this
1111 we fake things so that arena is false for all 16 types.. */
1112 new_body = new_NOARENAZ(new_type_details);
1113 #endif
1114 SvANY(sv) = new_body;
1115 switch(new_type) {
1116 case SVt_PVAV:
1117 {
1118 XPVAV pvav = {
1119 .xmg_stash = NULL,
1120 .xmg_u = {.xmg_magic = NULL},
1121 .xav_fill = -1, .xav_max = -1, .xav_alloc = 0
1122 };
1123 *((XPVAV*) SvANY(sv)) = pvav;
1124 }
1125
1126 AvREAL_only(sv);
1127 break;
1128 case SVt_PVHV:
1129 {
1130 XPVHV pvhv = {
1131 .xmg_stash = NULL,
1132 .xmg_u = {.xmg_magic = NULL},
1133 .xhv_keys = 0,
1134 /* start with PERL_HASH_DEFAULT_HvMAX+1 buckets: */
1135 .xhv_max = PERL_HASH_DEFAULT_HvMAX
1136 };
1137 *((XPVHV*) SvANY(sv)) = pvhv;
1138 }
1139
1140 assert(!SvOK(sv));
1141 SvOK_off(sv);
1142 #ifndef NODEFAULT_SHAREKEYS
1143 HvSHAREKEYS_on(sv); /* key-sharing on by default */
1144 #endif
1145 break;
1146 case SVt_PVOBJ:
1147 {
1148 XPVOBJ pvo = {
1149 .xmg_stash = NULL, .xmg_u = {.xmg_magic = NULL},
1150 .xobject_maxfield = -1,
1151 .xobject_iter_sv_at = 0,
1152 .xobject_fields = NULL,
1153 };
1154 *((XPVOBJ*) SvANY(sv)) = pvo;
1155 }
1156 break;
1157 default:
1158 NOT_REACHED;
1159 }
1160
1161 /* SVt_NULL isn't the only thing upgraded to AV or HV.
1162 The target created by newSVrv also is, and it can have magic.
1163 However, it never has SvPVX set.
1164 */
1165 if (old_type == SVt_IV) {
1166 assert(!SvROK(sv));
1167 } else if (old_type >= SVt_PV) {
1168 assert(SvPVX_const(sv) == 0);
1169 }
1170
1171 if (old_type >= SVt_PVMG) {
1172 SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_u.xmg_magic);
1173 SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash);
1174 } else {
1175 sv->sv_u.svu_array = NULL; /* or svu_hash */
1176 }
1177 break;
1178
1179 case SVt_PVIV:
1180 /* XXX Is this still needed? Was it ever needed? Surely as there is
1181 no route from NV to PVIV, NOK can never be true */
1182 assert(!SvNOKp(sv));
1183 assert(!SvNOK(sv));
1184 /* FALLTHROUGH */
1185 case SVt_PVIO:
1186 case SVt_PVFM:
1187 case SVt_PVGV:
1188 case SVt_PVCV:
1189 case SVt_PVLV:
1190 case SVt_INVLIST:
1191 case SVt_REGEXP:
1192 case SVt_PVMG:
1193 case SVt_PVNV:
1194 case SVt_PV:
1195
1196 assert(new_type_details->body_size);
1197 /* We always allocated the full length item with PURIFY. To do this
1198 we fake things so that arena is false for all 16 types.. */
1199 #ifndef PURIFY
1200 if(new_type_details->arena) {
1201 /* This points to the start of the allocated area. */
1202 new_body = S_new_body(aTHX_ new_type);
1203 Zero(new_body, new_type_details->body_size, char);
1204 new_body = ((char *)new_body) - new_type_details->offset;
1205 } else
1206 #endif
1207 {
1208 new_body = new_NOARENAZ(new_type_details);
1209 }
1210 SvANY(sv) = new_body;
1211
1212 if (old_type_details->copy) {
1213 /* There is now the potential for an upgrade from something without
1214 an offset (PVNV or PVMG) to something with one (PVCV, PVFM) */
1215 int offset = old_type_details->offset;
1216 int length = old_type_details->copy;
1217
1218 if (new_type_details->offset > old_type_details->offset) {
1219 const int difference
1220 = new_type_details->offset - old_type_details->offset;
1221 offset += difference;
1222 length -= difference;
1223 }
1224 assert (length >= 0);
1225
1226 Copy((char *)old_body + offset, (char *)new_body + offset, length,
1227 char);
1228 }
1229
1230 #ifndef NV_ZERO_IS_ALLBITS_ZERO
1231 /* If NV 0.0 is stores as all bits 0 then Zero() already creates a
1232 * correct 0.0 for us. Otherwise, if the old body didn't have an
1233 * NV slot, but the new one does, then we need to initialise the
1234 * freshly created NV slot with whatever the correct bit pattern is
1235 * for 0.0 */
1236 if (old_type_details->zero_nv && !new_type_details->zero_nv
1237 && !isGV_with_GP(sv))
1238 SvNV_set(sv, 0);
1239 #endif
1240
1241 if (UNLIKELY(new_type == SVt_PVIO)) {
1242 IO * const io = MUTABLE_IO(sv);
1243 GV *iogv = gv_fetchpvs("IO::File::", GV_ADD, SVt_PVHV);
1244
1245 SvOBJECT_on(io);
1246 /* Clear the stashcache because a new IO could overrule a package
1247 name */
1248 DEBUG_o(Perl_deb(aTHX_ "sv_upgrade clearing PL_stashcache\n"));
1249 hv_clear(PL_stashcache);
1250
1251 SvSTASH_set(io, MUTABLE_HV(SvREFCNT_inc(GvHV(iogv))));
1252 IoPAGE_LEN(sv) = 60;
1253 }
1254 if (old_type < SVt_PV) {
1255 /* referent will be NULL unless the old type was SVt_IV emulating
1256 SVt_RV */
1257 sv->sv_u.svu_rv = referent;
1258 }
1259 break;
1260 default:
1261 Perl_croak(aTHX_ "panic: sv_upgrade to unknown type %lu",
1262 (unsigned long)new_type);
1263 }
1264
1265 /* if this is zero, this is a body-less SVt_NULL, SVt_IV/SVt_RV,
1266 and sometimes SVt_NV */
1267 if (old_type_details->body_size) {
1268 #ifdef PURIFY
1269 safefree(old_body);
1270 #else
1271 /* Note that there is an assumption that all bodies of types that
1272 can be upgraded came from arenas. Only the more complex non-
1273 upgradable types are allowed to be directly malloc()ed. */
1274 assert(old_type_details->arena);
1275 del_body((void*)((char*)old_body + old_type_details->offset),
1276 &PL_body_roots[old_type]);
1277 #endif
1278 }
1279 }
1280
1281 struct xpvhv_aux*
Perl_hv_auxalloc(pTHX_ HV * hv)1282 Perl_hv_auxalloc(pTHX_ HV *hv) {
1283 const struct body_details *old_type_details = bodies_by_type + SVt_PVHV;
1284 void *old_body;
1285 void *new_body;
1286
1287 PERL_ARGS_ASSERT_HV_AUXALLOC;
1288 assert(SvTYPE(hv) == SVt_PVHV);
1289 assert(!HvHasAUX(hv));
1290
1291 #ifdef PURIFY
1292 new_body = new_NOARENAZ(&fake_hv_with_aux);
1293 #else
1294 new_body_from_arena(new_body, HVAUX_ARENA_ROOT_IX, fake_hv_with_aux);
1295 #endif
1296
1297 old_body = SvANY(hv);
1298
1299 Copy((char *)old_body + old_type_details->offset,
1300 (char *)new_body + fake_hv_with_aux.offset,
1301 old_type_details->copy,
1302 char);
1303
1304 #ifdef PURIFY
1305 safefree(old_body);
1306 #else
1307 assert(old_type_details->arena);
1308 del_body((void*)((char*)old_body + old_type_details->offset),
1309 &PL_body_roots[SVt_PVHV]);
1310 #endif
1311
1312 SvANY(hv) = (XPVHV *) new_body;
1313 SvFLAGS(hv) |= SVphv_HasAUX;
1314 return HvAUX(hv);
1315 }
1316
1317 /*
1318 =for apidoc sv_backoff
1319
1320 Remove any string offset. You should normally use the C<SvOOK_off> macro
1321 wrapper instead.
1322
1323 =cut
1324 */
1325
1326 /* prior to 5.000 stable, this function returned the new OOK-less SvFLAGS
1327 prior to 5.23.4 this function always returned 0
1328 */
1329
1330 void
Perl_sv_backoff(SV * const sv)1331 Perl_sv_backoff(SV *const sv)
1332 {
1333 STRLEN delta;
1334 const char * const s = SvPVX_const(sv);
1335
1336 PERL_ARGS_ASSERT_SV_BACKOFF;
1337
1338 assert(SvOOK(sv));
1339 assert(SvTYPE(sv) != SVt_PVHV);
1340 assert(SvTYPE(sv) != SVt_PVAV);
1341
1342 SvOOK_offset(sv, delta);
1343
1344 SvLEN_set(sv, SvLEN(sv) + delta);
1345 SvPV_set(sv, SvPVX(sv) - delta);
1346 SvFLAGS(sv) &= ~SVf_OOK;
1347 Move(s, SvPVX(sv), SvCUR(sv)+1, char);
1348 return;
1349 }
1350
1351
1352 /* forward declaration */
1353 static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags);
1354
1355
1356 /*
1357 =for apidoc sv_grow
1358
1359 Expands the character buffer in the SV. If necessary, uses C<sv_unref> and
1360 upgrades the SV to C<SVt_PV>. Returns a pointer to the character buffer.
1361 Use the C<SvGROW> wrapper instead.
1362
1363 =cut
1364 */
1365
1366
1367 char *
Perl_sv_grow(pTHX_ SV * const sv,STRLEN newlen)1368 Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
1369 {
1370 char *s;
1371
1372 PERL_ARGS_ASSERT_SV_GROW;
1373
1374 if (SvROK(sv))
1375 sv_unref(sv);
1376 if (SvTYPE(sv) < SVt_PV) {
1377 sv_upgrade(sv, SVt_PV);
1378 s = SvPVX_mutable(sv);
1379 }
1380 else if (SvOOK(sv)) { /* pv is offset? */
1381 sv_backoff(sv);
1382 s = SvPVX_mutable(sv);
1383 if (newlen > SvLEN(sv))
1384 newlen += 10 * (newlen - SvCUR(sv)); /* avoid copy each time */
1385 }
1386 else
1387 {
1388 if (SvIsCOW(sv)) S_sv_uncow(aTHX_ sv, 0);
1389 s = SvPVX_mutable(sv);
1390 }
1391
1392 #ifdef PERL_COPY_ON_WRITE
1393 /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare)
1394 * to store the COW count. So in general, allocate one more byte than
1395 * asked for, to make it likely this byte is always spare: and thus
1396 * make more strings COW-able.
1397 *
1398 * Only increment if the allocation isn't MEM_SIZE_MAX,
1399 * otherwise it will wrap to 0.
1400 */
1401 if ( newlen != MEM_SIZE_MAX )
1402 newlen++;
1403 #endif
1404
1405 #if defined(PERL_USE_MALLOC_SIZE) && defined(Perl_safesysmalloc_size)
1406 #define PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
1407 #endif
1408
1409 if (newlen > SvLEN(sv)) { /* need more room? */
1410 STRLEN minlen = SvCUR(sv);
1411 minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + PERL_STRLEN_NEW_MIN;
1412 if (newlen < minlen)
1413 newlen = minlen;
1414 #ifndef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
1415
1416 /* Don't round up on the first allocation, as odds are pretty good that
1417 * the initial request is accurate as to what is really needed */
1418 if (SvLEN(sv)) {
1419 STRLEN rounded = PERL_STRLEN_ROUNDUP(newlen);
1420 if (rounded > newlen)
1421 newlen = rounded;
1422 }
1423 #endif
1424 if (SvLEN(sv) && s) {
1425 s = (char*)saferealloc(s, newlen);
1426 }
1427 else {
1428 s = (char*)safemalloc(newlen);
1429 if (SvPVX_const(sv) && SvCUR(sv)) {
1430 Move(SvPVX_const(sv), s, SvCUR(sv), char);
1431 }
1432 }
1433 SvPV_set(sv, s);
1434 #ifdef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
1435 /* Do this here, do it once, do it right, and then we will never get
1436 called back into sv_grow() unless there really is some growing
1437 needed. */
1438 SvLEN_set(sv, Perl_safesysmalloc_size(s));
1439 #else
1440 SvLEN_set(sv, newlen);
1441 #endif
1442 }
1443 return s;
1444 }
1445
1446 /*
1447 =for apidoc sv_grow_fresh
1448
1449 A cut-down version of sv_grow intended only for when sv is a freshly-minted
1450 SVt_PV, SVt_PVIV, SVt_PVNV, or SVt_PVMG. i.e. sv has the default flags, has
1451 never been any other type, and does not have an existing string. Basically,
1452 just assigns a char buffer and returns a pointer to it.
1453
1454 =cut
1455 */
1456
1457
1458 char *
Perl_sv_grow_fresh(pTHX_ SV * const sv,STRLEN newlen)1459 Perl_sv_grow_fresh(pTHX_ SV *const sv, STRLEN newlen)
1460 {
1461 char *s;
1462
1463 PERL_ARGS_ASSERT_SV_GROW_FRESH;
1464
1465 assert(SvTYPE(sv) >= SVt_PV && SvTYPE(sv) <= SVt_PVMG);
1466 assert(!SvROK(sv));
1467 assert(!SvOOK(sv));
1468 assert(!SvIsCOW(sv));
1469 assert(!SvLEN(sv));
1470 assert(!SvCUR(sv));
1471
1472 #ifdef PERL_COPY_ON_WRITE
1473 /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare)
1474 * to store the COW count. So in general, allocate one more byte than
1475 * asked for, to make it likely this byte is always spare: and thus
1476 * make more strings COW-able.
1477 *
1478 * Only increment if the allocation isn't MEM_SIZE_MAX,
1479 * otherwise it will wrap to 0.
1480 */
1481 if ( newlen != MEM_SIZE_MAX )
1482 newlen++;
1483 #endif
1484
1485 if (newlen < PERL_STRLEN_NEW_MIN)
1486 newlen = PERL_STRLEN_NEW_MIN;
1487
1488 s = (char*)safemalloc(newlen);
1489 SvPV_set(sv, s);
1490
1491 /* No PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC here, since many strings */
1492 /* will never be grown once set. Let the real sv_grow worry about that. */
1493 SvLEN_set(sv, newlen);
1494 return s;
1495 }
1496
1497 /*
1498 =for apidoc sv_setiv
1499 =for apidoc_item sv_setiv_mg
1500
1501 These copy an integer into the given SV, upgrading first if necessary.
1502
1503 They differ only in that C<sv_setiv_mg> handles 'set' magic; C<sv_setiv> does
1504 not.
1505
1506 =cut
1507 */
1508
1509 void
Perl_sv_setiv(pTHX_ SV * const sv,const IV i)1510 Perl_sv_setiv(pTHX_ SV *const sv, const IV i)
1511 {
1512 PERL_ARGS_ASSERT_SV_SETIV;
1513
1514 SV_CHECK_THINKFIRST_COW_DROP(sv);
1515 switch (SvTYPE(sv)) {
1516 #if NVSIZE <= IVSIZE
1517 case SVt_NULL:
1518 case SVt_NV:
1519 SET_SVANY_FOR_BODYLESS_IV(sv);
1520 SvFLAGS(sv) &= ~SVTYPEMASK;
1521 SvFLAGS(sv) |= SVt_IV;
1522 break;
1523 #else
1524 case SVt_NULL:
1525 SET_SVANY_FOR_BODYLESS_IV(sv);
1526 SvFLAGS(sv) &= ~SVTYPEMASK;
1527 SvFLAGS(sv) |= SVt_IV;
1528 break;
1529 case SVt_NV:
1530 sv_upgrade(sv, SVt_IV);
1531 break;
1532 #endif
1533 case SVt_PV:
1534 sv_upgrade(sv, SVt_PVIV);
1535 break;
1536
1537 case SVt_PVGV:
1538 if (!isGV_with_GP(sv))
1539 break;
1540 /* FALLTHROUGH */
1541 case SVt_PVAV:
1542 case SVt_PVHV:
1543 case SVt_PVCV:
1544 case SVt_PVFM:
1545 case SVt_PVIO:
1546 /* diag_listed_as: Can't coerce %s to %s in %s */
1547 Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0),
1548 OP_DESC(PL_op));
1549 NOT_REACHED; /* NOTREACHED */
1550 break;
1551 default: NOOP;
1552 }
1553 (void)SvIOK_only(sv); /* validate number */
1554 SvIV_set(sv, i);
1555 SvTAINT(sv);
1556 }
1557
1558 void
Perl_sv_setiv_mg(pTHX_ SV * const sv,const IV i)1559 Perl_sv_setiv_mg(pTHX_ SV *const sv, const IV i)
1560 {
1561 PERL_ARGS_ASSERT_SV_SETIV_MG;
1562
1563 sv_setiv(sv,i);
1564 SvSETMAGIC(sv);
1565 }
1566
1567 /*
1568 =for apidoc sv_setuv
1569 =for apidoc_item sv_setuv_mg
1570
1571 These copy an unsigned integer into the given SV, upgrading first if necessary.
1572
1573
1574 They differ only in that C<sv_setuv_mg> handles 'set' magic; C<sv_setuv> does
1575 not.
1576
1577 =cut
1578 */
1579
1580 void
Perl_sv_setuv(pTHX_ SV * const sv,const UV u)1581 Perl_sv_setuv(pTHX_ SV *const sv, const UV u)
1582 {
1583 PERL_ARGS_ASSERT_SV_SETUV;
1584
1585 /* With the if statement to ensure that integers are stored as IVs whenever
1586 possible:
1587 u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865
1588
1589 without
1590 u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865
1591
1592 If you wish to remove the following if statement, so that this routine
1593 (and its callers) always return UVs, please benchmark to see what the
1594 effect is. Modern CPUs may be different. Or may not :-)
1595 */
1596 if (u <= (UV)IV_MAX) {
1597 sv_setiv(sv, (IV)u);
1598 return;
1599 }
1600 sv_setiv(sv, 0);
1601 SvIsUV_on(sv);
1602 SvUV_set(sv, u);
1603 }
1604
1605 void
Perl_sv_setuv_mg(pTHX_ SV * const sv,const UV u)1606 Perl_sv_setuv_mg(pTHX_ SV *const sv, const UV u)
1607 {
1608 PERL_ARGS_ASSERT_SV_SETUV_MG;
1609
1610 sv_setuv(sv,u);
1611 SvSETMAGIC(sv);
1612 }
1613
1614 /*
1615 =for apidoc sv_setnv
1616 =for apidoc_item sv_setnv_mg
1617
1618 These copy a double into the given SV, upgrading first if necessary.
1619
1620 They differ only in that C<sv_setnv_mg> handles 'set' magic; C<sv_setnv> does
1621 not.
1622
1623 =cut
1624 */
1625
1626 void
Perl_sv_setnv(pTHX_ SV * const sv,const NV num)1627 Perl_sv_setnv(pTHX_ SV *const sv, const NV num)
1628 {
1629 PERL_ARGS_ASSERT_SV_SETNV;
1630
1631 SV_CHECK_THINKFIRST_COW_DROP(sv);
1632 switch (SvTYPE(sv)) {
1633 case SVt_NULL:
1634 case SVt_IV:
1635 #if NVSIZE <= IVSIZE
1636 SET_SVANY_FOR_BODYLESS_NV(sv);
1637 SvFLAGS(sv) &= ~SVTYPEMASK;
1638 SvFLAGS(sv) |= SVt_NV;
1639 break;
1640 #else
1641 sv_upgrade(sv, SVt_NV);
1642 break;
1643 #endif
1644 case SVt_PV:
1645 case SVt_PVIV:
1646 sv_upgrade(sv, SVt_PVNV);
1647 break;
1648
1649 case SVt_PVGV:
1650 if (!isGV_with_GP(sv))
1651 break;
1652 /* FALLTHROUGH */
1653 case SVt_PVAV:
1654 case SVt_PVHV:
1655 case SVt_PVCV:
1656 case SVt_PVFM:
1657 case SVt_PVIO:
1658 /* diag_listed_as: Can't coerce %s to %s in %s */
1659 Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0),
1660 OP_DESC(PL_op));
1661 NOT_REACHED; /* NOTREACHED */
1662 break;
1663 default: NOOP;
1664 }
1665 SvNV_set(sv, num);
1666 (void)SvNOK_only(sv); /* validate number */
1667 SvTAINT(sv);
1668 }
1669
1670 void
Perl_sv_setnv_mg(pTHX_ SV * const sv,const NV num)1671 Perl_sv_setnv_mg(pTHX_ SV *const sv, const NV num)
1672 {
1673 PERL_ARGS_ASSERT_SV_SETNV_MG;
1674
1675 sv_setnv(sv,num);
1676 SvSETMAGIC(sv);
1677 }
1678
1679 /*
1680 =for apidoc sv_setrv_noinc
1681 =for apidoc_item sv_setrv_noinc_mg
1682
1683 Copies an SV pointer into the given SV as an SV reference, upgrading it if
1684 necessary. After this, C<SvRV(sv)> is equal to I<ref>. This does not adjust
1685 the reference count of I<ref>. The reference I<ref> must not be NULL.
1686
1687 C<sv_setrv_noinc_mg> will invoke 'set' magic on the SV; C<sv_setrv_noinc> will
1688 not.
1689
1690 =cut
1691 */
1692
1693 void
Perl_sv_setrv_noinc(pTHX_ SV * const sv,SV * const ref)1694 Perl_sv_setrv_noinc(pTHX_ SV *const sv, SV *const ref)
1695 {
1696 PERL_ARGS_ASSERT_SV_SETRV_NOINC;
1697
1698 SV_CHECK_THINKFIRST_COW_DROP(sv);
1699 prepare_SV_for_RV(sv);
1700
1701 SvOK_off(sv);
1702 SvRV_set(sv, ref);
1703 SvROK_on(sv);
1704 }
1705
1706 void
Perl_sv_setrv_noinc_mg(pTHX_ SV * const sv,SV * const ref)1707 Perl_sv_setrv_noinc_mg(pTHX_ SV *const sv, SV *const ref)
1708 {
1709 PERL_ARGS_ASSERT_SV_SETRV_NOINC_MG;
1710
1711 sv_setrv_noinc(sv, ref);
1712 SvSETMAGIC(sv);
1713 }
1714
1715 /*
1716 =for apidoc sv_setrv_inc
1717 =for apidoc_item sv_setrv_inc_mg
1718
1719 As C<sv_setrv_noinc> but increments the reference count of I<ref>.
1720
1721 C<sv_setrv_inc_mg> will invoke 'set' magic on the SV; C<sv_setrv_inc> will
1722 not.
1723
1724 =cut
1725 */
1726
1727 void
Perl_sv_setrv_inc(pTHX_ SV * const sv,SV * const ref)1728 Perl_sv_setrv_inc(pTHX_ SV *const sv, SV *const ref)
1729 {
1730 PERL_ARGS_ASSERT_SV_SETRV_INC;
1731
1732 sv_setrv_noinc(sv, SvREFCNT_inc_simple_NN(ref));
1733 }
1734
1735 void
Perl_sv_setrv_inc_mg(pTHX_ SV * const sv,SV * const ref)1736 Perl_sv_setrv_inc_mg(pTHX_ SV *const sv, SV *const ref)
1737 {
1738 PERL_ARGS_ASSERT_SV_SETRV_INC_MG;
1739
1740 sv_setrv_noinc(sv, SvREFCNT_inc_simple_NN(ref));
1741 SvSETMAGIC(sv);
1742 }
1743
1744 /* Return a cleaned-up, printable version of sv, for non-numeric, or
1745 * not incrementable warning display.
1746 * Originally part of S_not_a_number().
1747 * The return value may be != tmpbuf.
1748 */
1749
1750 STATIC const char *
S_sv_display(pTHX_ SV * const sv,char * tmpbuf,STRLEN tmpbuf_size)1751 S_sv_display(pTHX_ SV *const sv, char *tmpbuf, STRLEN tmpbuf_size) {
1752 const char *pv;
1753
1754 PERL_ARGS_ASSERT_SV_DISPLAY;
1755
1756 if (DO_UTF8(sv)) {
1757 SV *dsv = newSVpvs_flags("", SVs_TEMP);
1758 pv = sv_uni_display(dsv, sv, 32, UNI_DISPLAY_ISPRINT);
1759 } else {
1760 char *d = tmpbuf;
1761 const char * const limit = tmpbuf + tmpbuf_size - 8;
1762 /* each *s can expand to 4 chars + "...\0",
1763 i.e. need room for 8 chars */
1764
1765 const char *s = SvPVX_const(sv);
1766 const char * const end = s + SvCUR(sv);
1767 for ( ; s < end && d < limit; s++ ) {
1768 int ch = (U8) *s;
1769 if (! isASCII(ch) && !isPRINT_LC(ch)) {
1770 *d++ = 'M';
1771 *d++ = '-';
1772
1773 /* Map to ASCII "equivalent" of Latin1 */
1774 ch = LATIN1_TO_NATIVE(NATIVE_TO_LATIN1(ch) & 127);
1775 }
1776 if (ch == '\n') {
1777 *d++ = '\\';
1778 *d++ = 'n';
1779 }
1780 else if (ch == '\r') {
1781 *d++ = '\\';
1782 *d++ = 'r';
1783 }
1784 else if (ch == '\f') {
1785 *d++ = '\\';
1786 *d++ = 'f';
1787 }
1788 else if (ch == '\\') {
1789 *d++ = '\\';
1790 *d++ = '\\';
1791 }
1792 else if (ch == '\0') {
1793 *d++ = '\\';
1794 *d++ = '0';
1795 }
1796 else if (isPRINT_LC(ch))
1797 *d++ = ch;
1798 else {
1799 *d++ = '^';
1800 *d++ = toCTRL(ch);
1801 }
1802 }
1803 if (s < end) {
1804 *d++ = '.';
1805 *d++ = '.';
1806 *d++ = '.';
1807 }
1808 *d = '\0';
1809 pv = tmpbuf;
1810 }
1811
1812 return pv;
1813 }
1814
1815 /* Print an "isn't numeric" warning, using a cleaned-up,
1816 * printable version of the offending string
1817 */
1818
1819 STATIC void
S_not_a_number(pTHX_ SV * const sv)1820 S_not_a_number(pTHX_ SV *const sv)
1821 {
1822 char tmpbuf[64];
1823 const char *pv;
1824
1825 PERL_ARGS_ASSERT_NOT_A_NUMBER;
1826
1827 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf));
1828
1829 if (PL_op)
1830 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
1831 /* diag_listed_as: Argument "%s" isn't numeric%s */
1832 "Argument \"%s\" isn't numeric in %s", pv,
1833 OP_DESC(PL_op));
1834 else
1835 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
1836 /* diag_listed_as: Argument "%s" isn't numeric%s */
1837 "Argument \"%s\" isn't numeric", pv);
1838 }
1839
1840 STATIC void
S_not_incrementable(pTHX_ SV * const sv)1841 S_not_incrementable(pTHX_ SV *const sv) {
1842 char tmpbuf[64];
1843 const char *pv;
1844
1845 PERL_ARGS_ASSERT_NOT_INCREMENTABLE;
1846
1847 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf));
1848
1849 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
1850 "Argument \"%s\" treated as 0 in increment (++)", pv);
1851 }
1852
1853 /*
1854 =for apidoc looks_like_number
1855
1856 Test if the content of an SV looks like a number (or is a number).
1857 C<Inf> and C<Infinity> are treated as numbers (so will not issue a
1858 non-numeric warning), even if your C<atof()> doesn't grok them. Get-magic is
1859 ignored.
1860
1861 =cut
1862 */
1863
1864 I32
Perl_looks_like_number(pTHX_ SV * const sv)1865 Perl_looks_like_number(pTHX_ SV *const sv)
1866 {
1867 const char *sbegin;
1868 STRLEN len;
1869 int numtype;
1870
1871 PERL_ARGS_ASSERT_LOOKS_LIKE_NUMBER;
1872
1873 if (SvPOK(sv) || SvPOKp(sv)) {
1874 sbegin = SvPV_nomg_const(sv, len);
1875 }
1876 else
1877 return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK);
1878 numtype = grok_number(sbegin, len, NULL);
1879 return ((numtype & IS_NUMBER_TRAILING)) ? 0 : numtype;
1880 }
1881
1882 STATIC bool
S_glob_2number(pTHX_ GV * const gv)1883 S_glob_2number(pTHX_ GV * const gv)
1884 {
1885 PERL_ARGS_ASSERT_GLOB_2NUMBER;
1886
1887 /* We know that all GVs stringify to something that is not-a-number,
1888 so no need to test that. */
1889 if (ckWARN(WARN_NUMERIC))
1890 {
1891 SV *const buffer = sv_newmortal();
1892 gv_efullname3(buffer, gv, "*");
1893 not_a_number(buffer);
1894 }
1895 /* We just want something true to return, so that S_sv_2iuv_common
1896 can tail call us and return true. */
1897 return TRUE;
1898 }
1899
1900 /* Actually, ISO C leaves conversion of UV to IV undefined, but
1901 until proven guilty, assume that things are not that bad... */
1902
1903 /*
1904 NV_PRESERVES_UV:
1905
1906 As 64 bit platforms often have an NV that doesn't preserve all bits of
1907 an IV (an assumption perl has been based on to date) it becomes necessary
1908 to remove the assumption that the NV always carries enough precision to
1909 recreate the IV whenever needed, and that the NV is the canonical form.
1910 Instead, IV/UV and NV need to be given equal rights. So as to not lose
1911 precision as a side effect of conversion (which would lead to insanity
1912 and the dragon(s) in t/op/numconvert.t getting very angry) the intent is
1913 1) to distinguish between IV/UV/NV slots that have a valid conversion cached
1914 where precision was lost, and IV/UV/NV slots that have a valid conversion
1915 which has lost no precision
1916 2) to ensure that if a numeric conversion to one form is requested that
1917 would lose precision, the precise conversion (or differently
1918 imprecise conversion) is also performed and cached, to prevent
1919 requests for different numeric formats on the same SV causing
1920 lossy conversion chains. (lossless conversion chains are perfectly
1921 acceptable (still))
1922
1923
1924 flags are used:
1925 SvIOKp is true if the IV slot contains a valid value
1926 SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true)
1927 SvNOKp is true if the NV slot contains a valid value
1928 SvNOK is true only if the NV value is accurate
1929
1930 so
1931 while converting from PV to NV, check to see if converting that NV to an
1932 IV(or UV) would lose accuracy over a direct conversion from PV to
1933 IV(or UV). If it would, cache both conversions, return NV, but mark
1934 SV as IOK NOKp (ie not NOK).
1935
1936 While converting from PV to IV, check to see if converting that IV to an
1937 NV would lose accuracy over a direct conversion from PV to NV. If it
1938 would, cache both conversions, flag similarly.
1939
1940 Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite
1941 correctly because if IV & NV were set NV *always* overruled.
1942 Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flag's meaning
1943 changes - now IV and NV together means that the two are interchangeable:
1944 SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX;
1945
1946 The benefit of this is that operations such as pp_add know that if
1947 SvIOK is true for both left and right operands, then integer addition
1948 can be used instead of floating point (for cases where the result won't
1949 overflow). Before, floating point was always used, which could lead to
1950 loss of precision compared with integer addition.
1951
1952 * making IV and NV equal status should make maths accurate on 64 bit
1953 platforms
1954 * may speed up maths somewhat if pp_add and friends start to use
1955 integers when possible instead of fp. (Hopefully the overhead in
1956 looking for SvIOK and checking for overflow will not outweigh the
1957 fp to integer speedup)
1958 * will slow down integer operations (callers of SvIV) on "inaccurate"
1959 values, as the change from SvIOK to SvIOKp will cause a call into
1960 sv_2iv each time rather than a macro access direct to the IV slot
1961 * should speed up number->string conversion on integers as IV is
1962 favoured when IV and NV are equally accurate
1963
1964 ####################################################################
1965 You had better be using SvIOK_notUV if you want an IV for arithmetic:
1966 SvIOK is true if (IV or UV), so you might be getting (IV)SvUV.
1967 On the other hand, SvUOK is true iff UV.
1968 ####################################################################
1969
1970 Your mileage will vary depending your CPU's relative fp to integer
1971 performance ratio.
1972 */
1973
1974 #ifndef NV_PRESERVES_UV
1975 # define IS_NUMBER_UNDERFLOW_IV 1
1976 # define IS_NUMBER_UNDERFLOW_UV 2
1977 # define IS_NUMBER_IV_AND_UV 2
1978 # define IS_NUMBER_OVERFLOW_IV 4
1979 # define IS_NUMBER_OVERFLOW_UV 5
1980
1981 /* sv_2iuv_non_preserve(): private routine for use by sv_2iv() and sv_2uv() */
1982
1983 /* For sv_2nv these three cases are "SvNOK and don't bother casting" */
1984 STATIC int
S_sv_2iuv_non_preserve(pTHX_ SV * const sv,I32 numtype)1985 S_sv_2iuv_non_preserve(pTHX_ SV *const sv
1986 # ifdef DEBUGGING
1987 , I32 numtype
1988 # endif
1989 )
1990 {
1991 PERL_ARGS_ASSERT_SV_2IUV_NON_PRESERVE;
1992 PERL_UNUSED_CONTEXT;
1993
1994 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%" UVxf " NV=%" NVgf " inttype=%" UVXf "\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype));
1995 if (SvNVX(sv) < (NV)IV_MIN) {
1996 (void)SvIOKp_on(sv);
1997 (void)SvNOK_on(sv);
1998 SvIV_set(sv, IV_MIN);
1999 return IS_NUMBER_UNDERFLOW_IV;
2000 }
2001 if (SvNVX(sv) > (NV)UV_MAX) {
2002 (void)SvIOKp_on(sv);
2003 (void)SvNOK_on(sv);
2004 SvIsUV_on(sv);
2005 SvUV_set(sv, UV_MAX);
2006 return IS_NUMBER_OVERFLOW_UV;
2007 }
2008 (void)SvIOKp_on(sv);
2009 (void)SvNOK_on(sv);
2010 /* Can't use strtol etc to convert this string. (See truth table in
2011 sv_2iv */
2012 if (SvNVX(sv) < IV_MAX_P1) {
2013 SvIV_set(sv, I_V(SvNVX(sv)));
2014 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
2015 SvIOK_on(sv); /* Integer is precise. NOK, IOK */
2016 } else {
2017 /* Integer is imprecise. NOK, IOKp */
2018 }
2019 return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV;
2020 }
2021 SvIsUV_on(sv);
2022 SvUV_set(sv, U_V(SvNVX(sv)));
2023 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
2024 if (SvUVX(sv) == UV_MAX) {
2025 /* As we know that NVs don't preserve UVs, UV_MAX cannot
2026 possibly be preserved by NV. Hence, it must be overflow.
2027 NOK, IOKp */
2028 return IS_NUMBER_OVERFLOW_UV;
2029 }
2030 SvIOK_on(sv); /* Integer is precise. NOK, UOK */
2031 } else {
2032 /* Integer is imprecise. NOK, IOKp */
2033 }
2034 return IS_NUMBER_OVERFLOW_IV;
2035 }
2036 #endif /* !NV_PRESERVES_UV*/
2037
2038 /* If numtype is infnan, set the NV of the sv accordingly.
2039 * If numtype is anything else, try setting the NV using Atof(PV). */
2040 static void
S_sv_setnv(pTHX_ SV * sv,int numtype)2041 S_sv_setnv(pTHX_ SV* sv, int numtype)
2042 {
2043 bool pok = cBOOL(SvPOK(sv));
2044 bool nok = FALSE;
2045 #ifdef NV_INF
2046 if ((numtype & IS_NUMBER_INFINITY)) {
2047 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -NV_INF : NV_INF);
2048 nok = TRUE;
2049 } else
2050 #endif
2051 #ifdef NV_NAN
2052 if ((numtype & IS_NUMBER_NAN)) {
2053 SvNV_set(sv, NV_NAN);
2054 nok = TRUE;
2055 } else
2056 #endif
2057 if (pok) {
2058 SvNV_set(sv, Atof(SvPVX_const(sv)));
2059 /* Purposefully no true nok here, since we don't want to blow
2060 * away the possible IOK/UV of an existing sv. */
2061 }
2062 if (nok) {
2063 SvNOK_only(sv); /* No IV or UV please, this is pure infnan. */
2064 if (pok)
2065 SvPOK_on(sv); /* PV is okay, though. */
2066 }
2067 }
2068
2069 #ifndef NV_PRESERVES_UV
2070 # define MAX_UV_PRESERVED_IN_NV (((UV)1 << NV_PRESERVES_UV_BITS) - 1)
2071 # define MAX_IV_PRESERVED_IN_NV ((IV)MAX_UV_PRESERVED_IN_NV)
2072 # define MIN_IV_PRESERVED_IN_NV (-MAX_IV_PRESERVED_IN_NV)
2073 /* We presume that (IV)MAX_UV_PRESERVED_IN_NV and (-MAX_IV_PRESERVED_IN_NV)
2074 above will not overflow if the condition below holds true: */
2075 STATIC_ASSERT_DECL(MAX_UV_PRESERVED_IN_NV <= (UV)IV_MAX);
2076 #endif
2077
2078 STATIC bool
S_sv_2iuv_common(pTHX_ SV * const sv)2079 S_sv_2iuv_common(pTHX_ SV *const sv)
2080 {
2081 PERL_ARGS_ASSERT_SV_2IUV_COMMON;
2082
2083 if (SvNOKp(sv)) {
2084 /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv
2085 * without also getting a cached IV/UV from it at the same time
2086 * (ie PV->NV conversion should detect loss of accuracy and cache
2087 * IV or UV at same time to avoid this. */
2088 /* IV-over-UV optimisation - choose to cache IV if possible */
2089
2090 if (SvTYPE(sv) == SVt_NV)
2091 sv_upgrade(sv, SVt_PVNV);
2092
2093 got_nv:
2094 (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */
2095 /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost
2096 certainly cast into the IV range at IV_MAX, whereas the correct
2097 answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary
2098 cases go to UV */
2099 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2100 if (Perl_isnan(SvNVX(sv))) {
2101 SvUV_set(sv, 0);
2102 SvIsUV_on(sv);
2103 return FALSE;
2104 }
2105 #endif
2106 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2107 SvIV_set(sv, I_V(SvNVX(sv)));
2108 if (SvNVX(sv) == (NV) SvIVX(sv)
2109 #ifndef NV_PRESERVES_UV
2110 /* Optimizing compilers might merge two comparisons below
2111 into single comparison */
2112 && MIN_IV_PRESERVED_IN_NV <= SvIVX(sv)
2113 && SvIVX(sv) <= MAX_IV_PRESERVED_IN_NV
2114 /* Don't flag it as "accurately an integer" if the number
2115 came from a (by definition imprecise) NV operation, and
2116 we're outside the range of NV integer precision */
2117 #endif
2118 ) {
2119 if (SvNOK(sv))
2120 SvIOK_on(sv); /* Can this go wrong with rounding? NWC */
2121 else {
2122 /* scalar has trailing garbage, eg "42a" */
2123 }
2124 DEBUG_c(PerlIO_printf(Perl_debug_log,
2125 "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (precise)\n",
2126 PTR2UV(sv),
2127 SvNVX(sv),
2128 SvIVX(sv)));
2129
2130 } else {
2131 /* IV not precise. No need to convert from PV, as NV
2132 conversion would already have cached IV if it detected
2133 that PV->IV would be better than PV->NV->IV
2134 flags already correct - don't set public IOK. */
2135 DEBUG_c(PerlIO_printf(Perl_debug_log,
2136 "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (imprecise)\n",
2137 PTR2UV(sv),
2138 SvNVX(sv),
2139 SvIVX(sv)));
2140 }
2141 /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN,
2142 but the cast (NV)IV_MIN rounds to a the value less (more
2143 negative) than IV_MIN which happens to be equal to SvNVX ??
2144 Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and
2145 NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and
2146 (NV)UVX == NVX are both true, but the values differ. :-(
2147 Hopefully for 2s complement IV_MIN is something like
2148 0x8000000000000000 which will be exact. NWC */
2149 }
2150 else {
2151 SvUV_set(sv, U_V(SvNVX(sv)));
2152 if (
2153 (SvNVX(sv) == (NV) SvUVX(sv))
2154 #ifndef NV_PRESERVES_UV
2155 /* Make sure it's not 0xFFFFFFFFFFFFFFFF */
2156 /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */
2157 && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv))
2158 /* Don't flag it as "accurately an integer" if the number
2159 came from a (by definition imprecise) NV operation, and
2160 we're outside the range of NV integer precision */
2161 #endif
2162 && SvNOK(sv)
2163 )
2164 SvIOK_on(sv);
2165 SvIsUV_on(sv);
2166 DEBUG_c(PerlIO_printf(Perl_debug_log,
2167 "0x%" UVxf " 2iv(%" UVuf " => %" IVdf ") (as unsigned)\n",
2168 PTR2UV(sv),
2169 SvUVX(sv),
2170 SvUVX(sv)));
2171 }
2172 }
2173 else if (SvPOKp(sv)) {
2174 UV value;
2175 int numtype;
2176 const char *s = SvPVX_const(sv);
2177 const STRLEN cur = SvCUR(sv);
2178
2179 /* short-cut for a single digit string like "1" */
2180
2181 if (cur == 1) {
2182 char c = *s;
2183 if (isDIGIT(c)) {
2184 if (SvTYPE(sv) < SVt_PVIV)
2185 sv_upgrade(sv, SVt_PVIV);
2186 (void)SvIOK_on(sv);
2187 SvIV_set(sv, (IV)(c - '0'));
2188 return FALSE;
2189 }
2190 }
2191
2192 numtype = grok_number(s, cur, &value);
2193 /* We want to avoid a possible problem when we cache an IV/ a UV which
2194 may be later translated to an NV, and the resulting NV is not
2195 the same as the direct translation of the initial string
2196 (eg 123.456 can shortcut to the IV 123 with atol(), but we must
2197 be careful to ensure that the value with the .456 is around if the
2198 NV value is requested in the future).
2199
2200 This means that if we cache such an IV/a UV, we need to cache the
2201 NV as well. Moreover, we trade speed for space, and do not
2202 cache the NV if we are sure it's not needed.
2203 */
2204
2205 /* SVt_PVNV is one higher than SVt_PVIV, hence this order */
2206 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2207 == IS_NUMBER_IN_UV) {
2208 /* It's definitely an integer, only upgrade to PVIV */
2209 if (SvTYPE(sv) < SVt_PVIV)
2210 sv_upgrade(sv, SVt_PVIV);
2211 (void)SvIOK_on(sv);
2212 } else if (SvTYPE(sv) < SVt_PVNV)
2213 sv_upgrade(sv, SVt_PVNV);
2214
2215 if ((numtype & (IS_NUMBER_INFINITY | IS_NUMBER_NAN))) {
2216 if (ckWARN(WARN_NUMERIC) && ((numtype & IS_NUMBER_TRAILING)))
2217 not_a_number(sv);
2218 S_sv_setnv(aTHX_ sv, numtype);
2219 goto got_nv; /* Fill IV/UV slot and set IOKp */
2220 }
2221
2222 /* If NVs preserve UVs then we only use the UV value if we know that
2223 we aren't going to call atof() below. If NVs don't preserve UVs
2224 then the value returned may have more precision than atof() will
2225 return, even though value isn't perfectly accurate. */
2226 if ((numtype & (IS_NUMBER_IN_UV
2227 #ifdef NV_PRESERVES_UV
2228 | IS_NUMBER_NOT_INT
2229 #endif
2230 )) == IS_NUMBER_IN_UV) {
2231 /* This won't turn off the public IOK flag if it was set above */
2232 (void)SvIOKp_on(sv);
2233
2234 if (!(numtype & IS_NUMBER_NEG)) {
2235 /* positive */;
2236 if (value <= (UV)IV_MAX) {
2237 SvIV_set(sv, (IV)value);
2238 } else {
2239 /* it didn't overflow, and it was positive. */
2240 SvUV_set(sv, value);
2241 SvIsUV_on(sv);
2242 }
2243 } else {
2244 /* 2s complement assumption */
2245 if (value <= (UV)IV_MIN) {
2246 SvIV_set(sv, value == (UV)IV_MIN
2247 ? IV_MIN : -(IV)value);
2248 } else {
2249 /* Too negative for an IV. This is a double upgrade, but
2250 I'm assuming it will be rare. */
2251 if (SvTYPE(sv) < SVt_PVNV)
2252 sv_upgrade(sv, SVt_PVNV);
2253 SvNOK_on(sv);
2254 SvIOK_off(sv);
2255 SvIOKp_on(sv);
2256 SvNV_set(sv, -(NV)value);
2257 SvIV_set(sv, IV_MIN);
2258 }
2259 }
2260 }
2261 /* For !NV_PRESERVES_UV and IS_NUMBER_IN_UV and IS_NUMBER_NOT_INT we
2262 will be in the previous block to set the IV slot, and the next
2263 block to set the NV slot. So no else here. */
2264
2265 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2266 != IS_NUMBER_IN_UV) {
2267 /* It wasn't an (integer that doesn't overflow the UV). */
2268 S_sv_setnv(aTHX_ sv, numtype);
2269
2270 if (! numtype && ckWARN(WARN_NUMERIC))
2271 not_a_number(sv);
2272
2273 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" NVgf ")\n",
2274 PTR2UV(sv), SvNVX(sv)));
2275
2276 #ifdef NV_PRESERVES_UV
2277 SvNOKp_on(sv);
2278 if (numtype)
2279 SvNOK_on(sv);
2280 goto got_nv; /* Fill IV/UV slot and set IOKp, maybe IOK */
2281 #else /* NV_PRESERVES_UV */
2282 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2283 == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) {
2284 /* The IV/UV slot will have been set from value returned by
2285 grok_number above. The NV slot has just been set using
2286 Atof. */
2287 SvNOK_on(sv);
2288 assert (SvIOKp(sv));
2289 } else {
2290 if (((UV)1 << NV_PRESERVES_UV_BITS) >
2291 U_V(Perl_fabs(SvNVX(sv)))) {
2292 /* Small enough to preserve all bits. */
2293 (void)SvIOKp_on(sv);
2294 SvNOK_on(sv);
2295 SvIV_set(sv, I_V(SvNVX(sv)));
2296 if ((NV)(SvIVX(sv)) == SvNVX(sv))
2297 SvIOK_on(sv);
2298 /* There had been runtime checking for
2299 "U_V(Perl_fabs(SvNVX(sv))) < (UV)IV_MAX" here to ensure
2300 that this NV is in the preserved range, but this should
2301 be always true if the following assertion is true: */
2302 STATIC_ASSERT_STMT(((UV)1 << NV_PRESERVES_UV_BITS) <=
2303 (UV)IV_MAX);
2304 } else {
2305 /* IN_UV NOT_INT
2306 0 0 already failed to read UV.
2307 0 1 already failed to read UV.
2308 1 0 you won't get here in this case. IV/UV
2309 slot set, public IOK, Atof() unneeded.
2310 1 1 already read UV.
2311 so there's no point in sv_2iuv_non_preserve() attempting
2312 to use atol, strtol, strtoul etc. */
2313 # ifdef DEBUGGING
2314 sv_2iuv_non_preserve (sv, numtype);
2315 # else
2316 sv_2iuv_non_preserve (sv);
2317 # endif
2318 }
2319 }
2320 /* It might be more code efficient to go through the entire logic above
2321 and conditionally set with SvIOKp_on() rather than SvIOK(), but it
2322 gets complex and potentially buggy, so more programmer efficient
2323 to do it this way, by turning off the public flags: */
2324 if (!numtype)
2325 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
2326 #endif /* NV_PRESERVES_UV */
2327 }
2328 }
2329 else {
2330 if (isGV_with_GP(sv))
2331 return glob_2number(MUTABLE_GV(sv));
2332
2333 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
2334 report_uninit(sv);
2335 if (SvTYPE(sv) < SVt_IV)
2336 /* Typically the caller expects that sv_any is not NULL now. */
2337 sv_upgrade(sv, SVt_IV);
2338 /* Return 0 from the caller. */
2339 return TRUE;
2340 }
2341 return FALSE;
2342 }
2343
2344 /*
2345 =for apidoc sv_2iv_flags
2346
2347 Return the integer value of an SV, doing any necessary string
2348 conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
2349 Normally used via the C<SvIV(sv)> and C<SvIVx(sv)> macros.
2350
2351 =cut
2352 */
2353
2354 IV
Perl_sv_2iv_flags(pTHX_ SV * const sv,const I32 flags)2355 Perl_sv_2iv_flags(pTHX_ SV *const sv, const I32 flags)
2356 {
2357 PERL_ARGS_ASSERT_SV_2IV_FLAGS;
2358
2359 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2360 && SvTYPE(sv) != SVt_PVFM);
2361
2362 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2363 mg_get(sv);
2364
2365 if (SvROK(sv)) {
2366 if (SvAMAGIC(sv)) {
2367 SV * tmpstr;
2368 if (flags & SV_SKIP_OVERLOAD)
2369 return 0;
2370 tmpstr = AMG_CALLunary(sv, numer_amg);
2371 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2372 return SvIV(tmpstr);
2373 }
2374 }
2375 return PTR2IV(SvRV(sv));
2376 }
2377
2378 if (SvVALID(sv) || isREGEXP(sv)) {
2379 /* FBMs use the space for SvIVX and SvNVX for other purposes, so
2380 must not let them cache IVs.
2381 In practice they are extremely unlikely to actually get anywhere
2382 accessible by user Perl code - the only way that I'm aware of is when
2383 a constant subroutine which is used as the second argument to index.
2384
2385 Regexps have no SvIVX and SvNVX fields.
2386 */
2387 assert(SvPOKp(sv));
2388 {
2389 UV value;
2390 const char * const ptr =
2391 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
2392 const int numtype
2393 = grok_number(ptr, SvCUR(sv), &value);
2394
2395 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2396 == IS_NUMBER_IN_UV) {
2397 /* It's definitely an integer */
2398 if (numtype & IS_NUMBER_NEG) {
2399 if (value < (UV)IV_MIN)
2400 return -(IV)value;
2401 } else {
2402 if (value < (UV)IV_MAX)
2403 return (IV)value;
2404 }
2405 }
2406
2407 /* Quite wrong but no good choices. */
2408 if ((numtype & IS_NUMBER_INFINITY)) {
2409 return (numtype & IS_NUMBER_NEG) ? IV_MIN : IV_MAX;
2410 } else if ((numtype & IS_NUMBER_NAN)) {
2411 return 0; /* So wrong. */
2412 }
2413
2414 if (!numtype) {
2415 if (ckWARN(WARN_NUMERIC))
2416 not_a_number(sv);
2417 }
2418 return I_V(Atof(ptr));
2419 }
2420 }
2421
2422 if (SvTHINKFIRST(sv)) {
2423 if (SvREADONLY(sv) && !SvOK(sv)) {
2424 if (ckWARN(WARN_UNINITIALIZED))
2425 report_uninit(sv);
2426 return 0;
2427 }
2428 }
2429
2430 if (!SvIOKp(sv)) {
2431 if (S_sv_2iuv_common(aTHX_ sv))
2432 return 0;
2433 }
2434
2435 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" IVdf ")\n",
2436 PTR2UV(sv),SvIVX(sv)));
2437 return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv);
2438 }
2439
2440 /*
2441 =for apidoc sv_2uv_flags
2442
2443 Return the unsigned integer value of an SV, doing any necessary string
2444 conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
2445 Normally used via the C<SvUV(sv)> and C<SvUVx(sv)> macros.
2446
2447 =for apidoc Amnh||SV_GMAGIC
2448
2449 =cut
2450 */
2451
2452 UV
Perl_sv_2uv_flags(pTHX_ SV * const sv,const I32 flags)2453 Perl_sv_2uv_flags(pTHX_ SV *const sv, const I32 flags)
2454 {
2455 PERL_ARGS_ASSERT_SV_2UV_FLAGS;
2456
2457 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2458 mg_get(sv);
2459
2460 if (SvROK(sv)) {
2461 if (SvAMAGIC(sv)) {
2462 SV *tmpstr;
2463 if (flags & SV_SKIP_OVERLOAD)
2464 return 0;
2465 tmpstr = AMG_CALLunary(sv, numer_amg);
2466 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2467 return SvUV(tmpstr);
2468 }
2469 }
2470 return PTR2UV(SvRV(sv));
2471 }
2472
2473 if (SvVALID(sv) || isREGEXP(sv)) {
2474 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
2475 the same flag bit as SVf_IVisUV, so must not let them cache IVs.
2476 Regexps have no SvIVX and SvNVX fields. */
2477 assert(SvPOKp(sv));
2478 {
2479 UV value;
2480 const char * const ptr =
2481 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
2482 const int numtype
2483 = grok_number(ptr, SvCUR(sv), &value);
2484
2485 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2486 == IS_NUMBER_IN_UV) {
2487 /* It's definitely an integer */
2488 if (!(numtype & IS_NUMBER_NEG))
2489 return value;
2490 }
2491
2492 /* Quite wrong but no good choices. */
2493 if ((numtype & IS_NUMBER_INFINITY)) {
2494 return UV_MAX; /* So wrong. */
2495 } else if ((numtype & IS_NUMBER_NAN)) {
2496 return 0; /* So wrong. */
2497 }
2498
2499 if (!numtype) {
2500 if (ckWARN(WARN_NUMERIC))
2501 not_a_number(sv);
2502 }
2503 return U_V(Atof(ptr));
2504 }
2505 }
2506
2507 if (SvTHINKFIRST(sv)) {
2508 if (SvREADONLY(sv) && !SvOK(sv)) {
2509 if (ckWARN(WARN_UNINITIALIZED))
2510 report_uninit(sv);
2511 return 0;
2512 }
2513 }
2514
2515 if (!SvIOKp(sv)) {
2516 if (S_sv_2iuv_common(aTHX_ sv))
2517 return 0;
2518 }
2519
2520 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2uv(%" UVuf ")\n",
2521 PTR2UV(sv),SvUVX(sv)));
2522 return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv);
2523 }
2524
2525 /*
2526 =for apidoc sv_2nv_flags
2527
2528 Return the num value of an SV, doing any necessary string or integer
2529 conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
2530 Normally used via the C<SvNV(sv)> and C<SvNVx(sv)> macros.
2531
2532 =cut
2533 */
2534
2535 NV
Perl_sv_2nv_flags(pTHX_ SV * const sv,const I32 flags)2536 Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags)
2537 {
2538 PERL_ARGS_ASSERT_SV_2NV_FLAGS;
2539
2540 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2541 && SvTYPE(sv) != SVt_PVFM);
2542 if (SvGMAGICAL(sv) || SvVALID(sv) || isREGEXP(sv)) {
2543 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
2544 the same flag bit as SVf_IVisUV, so must not let them cache NVs.
2545 Regexps have no SvIVX and SvNVX fields. */
2546 const char *ptr;
2547 if (flags & SV_GMAGIC)
2548 mg_get(sv);
2549 if (SvNOKp(sv))
2550 return SvNVX(sv);
2551 if (SvPOKp(sv) && !SvIOKp(sv)) {
2552 ptr = SvPVX_const(sv);
2553 if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) &&
2554 !grok_number(ptr, SvCUR(sv), NULL))
2555 not_a_number(sv);
2556 return Atof(ptr);
2557 }
2558 if (SvIOKp(sv)) {
2559 if (SvIsUV(sv))
2560 return (NV)SvUVX(sv);
2561 else
2562 return (NV)SvIVX(sv);
2563 }
2564 if (SvROK(sv)) {
2565 goto return_rok;
2566 }
2567 assert(SvTYPE(sv) >= SVt_PVMG);
2568 /* This falls through to the report_uninit near the end of the
2569 function. */
2570 } else if (SvTHINKFIRST(sv)) {
2571 if (SvROK(sv)) {
2572 return_rok:
2573 if (SvAMAGIC(sv)) {
2574 SV *tmpstr;
2575 if (flags & SV_SKIP_OVERLOAD)
2576 return 0;
2577 tmpstr = AMG_CALLunary(sv, numer_amg);
2578 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2579 return SvNV(tmpstr);
2580 }
2581 }
2582 return PTR2NV(SvRV(sv));
2583 }
2584 if (SvREADONLY(sv) && !SvOK(sv)) {
2585 if (ckWARN(WARN_UNINITIALIZED))
2586 report_uninit(sv);
2587 return 0.0;
2588 }
2589 }
2590 if (SvTYPE(sv) < SVt_NV) {
2591 /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */
2592 sv_upgrade(sv, SVt_NV);
2593 CLANG_DIAG_IGNORE_STMT(-Wthread-safety);
2594 DEBUG_c({
2595 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
2596 STORE_LC_NUMERIC_SET_STANDARD();
2597 PerlIO_printf(Perl_debug_log,
2598 "0x%" UVxf " num(%" NVgf ")\n",
2599 PTR2UV(sv), SvNVX(sv));
2600 RESTORE_LC_NUMERIC();
2601 });
2602 CLANG_DIAG_RESTORE_STMT;
2603
2604 }
2605 else if (SvTYPE(sv) < SVt_PVNV)
2606 sv_upgrade(sv, SVt_PVNV);
2607 if (SvNOKp(sv)) {
2608 return SvNVX(sv);
2609 }
2610 if (SvIOKp(sv)) {
2611 SvNV_set(sv, SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv));
2612 #ifdef NV_PRESERVES_UV
2613 if (SvIOK(sv))
2614 SvNOK_on(sv);
2615 else
2616 SvNOKp_on(sv);
2617 #else
2618 /* Only set the public NV OK flag if this NV preserves the IV */
2619 /* Check it's not 0xFFFFFFFFFFFFFFFF */
2620 if (SvIOK(sv) &&
2621 SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv))))
2622 : (SvIVX(sv) == I_V(SvNVX(sv))))
2623 SvNOK_on(sv);
2624 else
2625 SvNOKp_on(sv);
2626 #endif
2627 }
2628 else if (SvPOKp(sv)) {
2629 UV value;
2630 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
2631 if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC))
2632 not_a_number(sv);
2633 #ifdef NV_PRESERVES_UV
2634 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2635 == IS_NUMBER_IN_UV) {
2636 /* It's definitely an integer */
2637 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -(NV)value : (NV)value);
2638 } else {
2639 S_sv_setnv(aTHX_ sv, numtype);
2640 }
2641 if (numtype)
2642 SvNOK_on(sv);
2643 else
2644 SvNOKp_on(sv);
2645 #else
2646 SvNV_set(sv, Atof(SvPVX_const(sv)));
2647 /* Only set the public NV OK flag if this NV preserves the value in
2648 the PV at least as well as an IV/UV would.
2649 Not sure how to do this 100% reliably. */
2650 /* if that shift count is out of range then Configure's test is
2651 wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS ==
2652 UV_BITS */
2653 if (((UV)1 << NV_PRESERVES_UV_BITS) > U_V(Perl_fabs(SvNVX(sv)))) {
2654 SvNOK_on(sv); /* Definitely small enough to preserve all bits */
2655 } else if (!(numtype & IS_NUMBER_IN_UV)) {
2656 /* Can't use strtol etc to convert this string, so don't try.
2657 sv_2iv and sv_2uv will use the NV to convert, not the PV. */
2658 SvNOK_on(sv);
2659 } else {
2660 /* value has been set. It may not be precise. */
2661 if ((numtype & IS_NUMBER_NEG) && (value >= (UV)IV_MIN)) {
2662 /* 2s complement assumption for (UV)IV_MIN */
2663 SvNOK_on(sv); /* Integer is too negative. */
2664 } else {
2665 SvNOKp_on(sv);
2666 SvIOKp_on(sv);
2667
2668 if (numtype & IS_NUMBER_NEG) {
2669 /* -IV_MIN is undefined, but we should never reach
2670 * this point with both IS_NUMBER_NEG and value ==
2671 * (UV)IV_MIN */
2672 assert(value != (UV)IV_MIN);
2673 SvIV_set(sv, -(IV)value);
2674 } else if (value <= (UV)IV_MAX) {
2675 SvIV_set(sv, (IV)value);
2676 } else {
2677 SvUV_set(sv, value);
2678 SvIsUV_on(sv);
2679 }
2680
2681 if (numtype & IS_NUMBER_NOT_INT) {
2682 /* I believe that even if the original PV had decimals,
2683 they are lost beyond the limit of the FP precision.
2684 However, neither is canonical, so both only get p
2685 flags. NWC, 2000/11/25 */
2686 /* Both already have p flags, so do nothing */
2687 } else {
2688 const NV nv = SvNVX(sv);
2689 /* XXX should this spot have NAN_COMPARE_BROKEN, too? */
2690 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2691 if (SvIVX(sv) == I_V(nv)) {
2692 SvNOK_on(sv);
2693 } else {
2694 /* It had no "." so it must be integer. */
2695 }
2696 SvIOK_on(sv);
2697 } else {
2698 /* between IV_MAX and NV(UV_MAX).
2699 Could be slightly > UV_MAX */
2700
2701 if (numtype & IS_NUMBER_NOT_INT) {
2702 /* UV and NV both imprecise. */
2703 } else {
2704 const UV nv_as_uv = U_V(nv);
2705
2706 if (value == nv_as_uv && SvUVX(sv) != UV_MAX) {
2707 SvNOK_on(sv);
2708 }
2709 SvIOK_on(sv);
2710 }
2711 }
2712 }
2713 }
2714 }
2715 /* It might be more code efficient to go through the entire logic above
2716 and conditionally set with SvNOKp_on() rather than SvNOK(), but it
2717 gets complex and potentially buggy, so more programmer efficient
2718 to do it this way, by turning off the public flags: */
2719 if (!numtype)
2720 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
2721 #endif /* NV_PRESERVES_UV */
2722 }
2723 else {
2724 if (isGV_with_GP(sv)) {
2725 glob_2number(MUTABLE_GV(sv));
2726 return 0.0;
2727 }
2728
2729 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
2730 report_uninit(sv);
2731 assert (SvTYPE(sv) >= SVt_NV);
2732 /* Typically the caller expects that sv_any is not NULL now. */
2733 /* XXX Ilya implies that this is a bug in callers that assume this
2734 and ideally should be fixed. */
2735 return 0.0;
2736 }
2737 CLANG_DIAG_IGNORE_STMT(-Wthread-safety);
2738 DEBUG_c({
2739 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
2740 STORE_LC_NUMERIC_SET_STANDARD();
2741 PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2nv(%" NVgf ")\n",
2742 PTR2UV(sv), SvNVX(sv));
2743 RESTORE_LC_NUMERIC();
2744 });
2745 CLANG_DIAG_RESTORE_STMT;
2746 return SvNVX(sv);
2747 }
2748
2749 /*
2750 =for apidoc sv_2num
2751
2752 Return an SV with the numeric value of the source SV, doing any necessary
2753 reference or overload conversion. The caller is expected to have handled
2754 get-magic already.
2755
2756 =cut
2757 */
2758
2759 SV *
Perl_sv_2num(pTHX_ SV * const sv)2760 Perl_sv_2num(pTHX_ SV *const sv)
2761 {
2762 PERL_ARGS_ASSERT_SV_2NUM;
2763
2764 if (!SvROK(sv))
2765 return sv;
2766 if (SvAMAGIC(sv)) {
2767 SV * const tmpsv = AMG_CALLunary(sv, numer_amg);
2768 TAINT_IF(tmpsv && SvTAINTED(tmpsv));
2769 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv))))
2770 return sv_2num(tmpsv);
2771 }
2772 return sv_2mortal(newSVuv(PTR2UV(SvRV(sv))));
2773 }
2774
2775 /* int2str_table: lookup table containing string representations of all
2776 * two digit numbers. For example, int2str_table.arr[0] is "00" and
2777 * int2str_table.arr[12*2] is "12".
2778 *
2779 * We are going to read two bytes at a time, so we have to ensure that
2780 * the array is aligned to a 2 byte boundary. That's why it was made a
2781 * union with a dummy U16 member. */
2782 static const union {
2783 char arr[200];
2784 U16 dummy;
2785 } int2str_table = {{
2786 '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6',
2787 '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3',
2788 '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0',
2789 '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7',
2790 '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4',
2791 '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1',
2792 '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8',
2793 '4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5',
2794 '5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2',
2795 '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
2796 '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6',
2797 '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3',
2798 '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0',
2799 '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7',
2800 '9', '8', '9', '9'
2801 }};
2802
2803 /* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or
2804 * UV as a string towards the end of buf, and return pointers to start and
2805 * end of it.
2806 *
2807 * We assume that buf is at least TYPE_CHARS(UV) long.
2808 */
2809
2810 PERL_STATIC_INLINE char *
S_uiv_2buf(char * const buf,const IV iv,UV uv,const int is_uv,char ** const peob)2811 S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob)
2812 {
2813 char *ptr = buf + TYPE_CHARS(UV);
2814 char * const ebuf = ptr;
2815 int sign;
2816 U16 *word_ptr, *word_table;
2817
2818 PERL_ARGS_ASSERT_UIV_2BUF;
2819
2820 /* ptr has to be properly aligned, because we will cast it to U16* */
2821 assert(PTR2nat(ptr) % 2 == 0);
2822 /* we are going to read/write two bytes at a time */
2823 word_ptr = (U16*)ptr;
2824 word_table = (U16*)int2str_table.arr;
2825
2826 if (UNLIKELY(is_uv))
2827 sign = 0;
2828 else if (iv >= 0) {
2829 uv = iv;
2830 sign = 0;
2831 } else {
2832 /* Using 0- here to silence bogus warning from MS VC */
2833 uv = (UV) (0 - (UV) iv);
2834 sign = 1;
2835 }
2836
2837 while (uv > 99) {
2838 *--word_ptr = word_table[uv % 100];
2839 uv /= 100;
2840 }
2841 ptr = (char*)word_ptr;
2842
2843 if (uv < 10)
2844 *--ptr = (char)uv + '0';
2845 else {
2846 *--word_ptr = word_table[uv];
2847 ptr = (char*)word_ptr;
2848 }
2849
2850 if (sign)
2851 *--ptr = '-';
2852
2853 *peob = ebuf;
2854 return ptr;
2855 }
2856
2857 /* Helper for sv_2pv_flags and sv_vcatpvfn_flags. If the NV is an
2858 * infinity or a not-a-number, writes the appropriate strings to the
2859 * buffer, including a zero byte. On success returns the written length,
2860 * excluding the zero byte, on failure (not an infinity, not a nan)
2861 * returns zero, assert-fails on maxlen being too short.
2862 *
2863 * XXX for "Inf", "-Inf", and "NaN", we could have three read-only
2864 * shared string constants we point to, instead of generating a new
2865 * string for each instance. */
2866 STATIC size_t
S_infnan_2pv(NV nv,char * buffer,size_t maxlen,char plus)2867 S_infnan_2pv(NV nv, char* buffer, size_t maxlen, char plus) {
2868 char* s = buffer;
2869 assert(maxlen >= 4);
2870 if (Perl_isinf(nv)) {
2871 if (nv < 0) {
2872 if (maxlen < 5) /* "-Inf\0" */
2873 return 0;
2874 *s++ = '-';
2875 } else if (plus) {
2876 *s++ = '+';
2877 }
2878 *s++ = 'I';
2879 *s++ = 'n';
2880 *s++ = 'f';
2881 }
2882 else if (Perl_isnan(nv)) {
2883 *s++ = 'N';
2884 *s++ = 'a';
2885 *s++ = 'N';
2886 /* XXX optionally output the payload mantissa bits as
2887 * "(unsigned)" (to match the nan("...") C99 function,
2888 * or maybe as "(0xhhh...)" would make more sense...
2889 * provide a format string so that the user can decide?
2890 * NOTE: would affect the maxlen and assert() logic.*/
2891 }
2892 else {
2893 return 0;
2894 }
2895 assert((s == buffer + 3) || (s == buffer + 4));
2896 *s = 0;
2897 return s - buffer;
2898 }
2899
2900 /*
2901 =for apidoc sv_2pv
2902 =for apidoc_item sv_2pv_flags
2903
2904 These implement the various forms of the L<perlapi/C<SvPV>> macros.
2905 The macros are the preferred interface.
2906
2907 These return a pointer to the string value of an SV (coercing it to a string if
2908 necessary), and set C<*lp> to its length in bytes.
2909
2910 The forms differ in that plain C<sv_2pvbyte> always processes 'get' magic; and
2911 C<sv_2pvbyte_flags> processes 'get' magic if and only if C<flags> contains
2912 C<SV_GMAGIC>.
2913
2914 =for apidoc Amnh||SV_GMAGIC
2915
2916 =cut
2917 */
2918
2919 char *
Perl_sv_2pv_flags(pTHX_ SV * const sv,STRLEN * const lp,const U32 flags)2920 Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const U32 flags)
2921 {
2922 char *s;
2923 bool done_gmagic = FALSE;
2924
2925 PERL_ARGS_ASSERT_SV_2PV_FLAGS;
2926
2927 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2928 && SvTYPE(sv) != SVt_PVFM);
2929 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC)) {
2930 mg_get(sv);
2931 done_gmagic = TRUE;
2932 }
2933
2934 if (SvROK(sv)) {
2935 if (SvAMAGIC(sv)) {
2936 SV *tmpstr;
2937 SV *nsv= (SV *)sv;
2938 if (flags & SV_SKIP_OVERLOAD)
2939 return NULL;
2940 if (done_gmagic)
2941 nsv = sv_mortalcopy_flags(sv,0);
2942 tmpstr = AMG_CALLunary(nsv, string_amg);
2943 TAINT_IF(tmpstr && SvTAINTED(tmpstr));
2944 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(nsv)))) {
2945 /* Unwrap this: */
2946 /* char *pv = lp ? SvPV(tmpstr, *lp) : SvPV_nolen(tmpstr);
2947 */
2948
2949 char *pv;
2950 if ((SvFLAGS(tmpstr) & (SVf_POK)) == SVf_POK) {
2951 if (flags & SV_CONST_RETURN) {
2952 pv = (char *) SvPVX_const(tmpstr);
2953 } else {
2954 pv = (flags & SV_MUTABLE_RETURN)
2955 ? SvPVX_mutable(tmpstr) : SvPVX(tmpstr);
2956 }
2957 if (lp)
2958 *lp = SvCUR(tmpstr);
2959 } else {
2960 pv = sv_2pv_flags(tmpstr, lp, flags);
2961 }
2962 if (SvUTF8(tmpstr))
2963 SvUTF8_on(sv);
2964 else
2965 SvUTF8_off(sv);
2966 return pv;
2967 }
2968 }
2969 {
2970 STRLEN len;
2971 char *retval;
2972 char *buffer;
2973 SV *const referent = SvRV(sv);
2974
2975 if (!referent) {
2976 len = 7;
2977 retval = buffer = savepvn("NULLREF", len);
2978 } else if (SvTYPE(referent) == SVt_REGEXP &&
2979 (!(PL_curcop->cop_hints & HINT_NO_AMAGIC) ||
2980 amagic_is_enabled(string_amg))) {
2981 REGEXP * const re = (REGEXP *)MUTABLE_PTR(referent);
2982
2983 assert(re);
2984
2985 /* If the regex is UTF-8 we want the containing scalar to
2986 have an UTF-8 flag too */
2987 if (RX_UTF8(re))
2988 SvUTF8_on(sv);
2989 else
2990 SvUTF8_off(sv);
2991
2992 if (lp)
2993 *lp = RX_WRAPLEN(re);
2994
2995 return RX_WRAPPED(re);
2996 } else {
2997 const char *const typestring = sv_reftype(referent, 0);
2998 const STRLEN typelen = strlen(typestring);
2999 UV addr = PTR2UV(referent);
3000 const char *stashname = NULL;
3001 STRLEN stashnamelen = 0; /* hush, gcc */
3002 const char *buffer_end;
3003
3004 if (SvOBJECT(referent)) {
3005 const HEK *const name = HvNAME_HEK(SvSTASH(referent));
3006
3007 if (name) {
3008 stashname = HEK_KEY(name);
3009 stashnamelen = HEK_LEN(name);
3010
3011 if (HEK_UTF8(name)) {
3012 SvUTF8_on(sv);
3013 } else {
3014 SvUTF8_off(sv);
3015 }
3016 } else {
3017 stashname = "__ANON__";
3018 stashnamelen = 8;
3019 }
3020 len = stashnamelen + 1 /* = */ + typelen + 3 /* (0x */
3021 + 2 * sizeof(UV) + 2 /* )\0 */;
3022 } else {
3023 len = typelen + 3 /* (0x */
3024 + 2 * sizeof(UV) + 2 /* )\0 */;
3025 }
3026
3027 Newx(buffer, len, char);
3028 buffer_end = retval = buffer + len;
3029
3030 /* Working backwards */
3031 *--retval = '\0';
3032 *--retval = ')';
3033 do {
3034 *--retval = PL_hexdigit[addr & 15];
3035 } while (addr >>= 4);
3036 *--retval = 'x';
3037 *--retval = '0';
3038 *--retval = '(';
3039
3040 retval -= typelen;
3041 memcpy(retval, typestring, typelen);
3042
3043 if (stashname) {
3044 *--retval = '=';
3045 retval -= stashnamelen;
3046 memcpy(retval, stashname, stashnamelen);
3047 }
3048 /* retval may not necessarily have reached the start of the
3049 buffer here. */
3050 assert (retval >= buffer);
3051
3052 len = buffer_end - retval - 1; /* -1 for that \0 */
3053 }
3054 if (lp)
3055 *lp = len;
3056 SAVEFREEPV(buffer);
3057 return retval;
3058 }
3059 }
3060
3061 if (SvPOKp(sv)) {
3062 if (lp)
3063 *lp = SvCUR(sv);
3064 if (flags & SV_MUTABLE_RETURN)
3065 return SvPVX_mutable(sv);
3066 if (flags & SV_CONST_RETURN)
3067 return (char *)SvPVX_const(sv);
3068 return SvPVX(sv);
3069 }
3070
3071 if (SvIOK(sv)) {
3072 /* I'm assuming that if both IV and NV are equally valid then
3073 converting the IV is going to be more efficient */
3074 const U32 isUIOK = SvIsUV(sv);
3075 /* The purpose of this union is to ensure that arr is aligned on
3076 a 2 byte boundary, because that is what uiv_2buf() requires */
3077 union {
3078 char arr[TYPE_CHARS(UV)];
3079 U16 dummy;
3080 } buf;
3081 char *ebuf, *ptr;
3082 STRLEN len;
3083
3084 if (SvTYPE(sv) < SVt_PVIV)
3085 sv_upgrade(sv, SVt_PVIV);
3086 ptr = uiv_2buf(buf.arr, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf);
3087 len = ebuf - ptr;
3088 /* inlined from sv_setpvn */
3089 s = SvGROW_mutable(sv, len + 1);
3090 Move(ptr, s, len, char);
3091 s += len;
3092 *s = '\0';
3093 /* We used to call SvPOK_on(). Whilst this is fine for (most) Perl code,
3094 it means that after this stringification is cached, there is no way
3095 to distinguish between values originally assigned as $a = 42; and
3096 $a = "42"; (or results of string operators vs numeric operators)
3097 where the value has subsequently been used in the other sense
3098 and had a value cached.
3099 This (somewhat) hack means that we retain the cached stringification,
3100 but don't set SVf_POK. Hence if a value is SVf_IOK|SVf_POK then it
3101 originated as "42", whereas if it's SVf_IOK then it originated as 42.
3102 (ignore SVp_IOK and SVp_POK)
3103 The SvPV macros are now updated to recognise this specific case
3104 (and that there isn't overloading or magic that could alter the
3105 cached value) and so return the cached value immediately without
3106 re-entering this function, getting back here to this block of code,
3107 and repeating the same conversion. */
3108 SvPOKp_on(sv);
3109 }
3110 else if (SvNOK(sv)) {
3111 if (SvTYPE(sv) < SVt_PVNV)
3112 sv_upgrade(sv, SVt_PVNV);
3113 if (SvNVX(sv) == 0.0
3114 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
3115 && !Perl_isnan(SvNVX(sv))
3116 #endif
3117 ) {
3118 s = SvGROW_mutable(sv, 2);
3119 *s++ = '0';
3120 *s = '\0';
3121 } else {
3122 STRLEN len;
3123 STRLEN size = 5; /* "-Inf\0" */
3124
3125 s = SvGROW_mutable(sv, size);
3126 len = S_infnan_2pv(SvNVX(sv), s, size, 0);
3127 if (len > 0) {
3128 s += len;
3129 SvPOKp_on(sv);
3130 }
3131 else {
3132 /* some Xenix systems wipe out errno here */
3133 dSAVE_ERRNO;
3134
3135 size =
3136 1 + /* sign */
3137 1 + /* "." */
3138 NV_DIG +
3139 1 + /* "e" */
3140 1 + /* sign */
3141 5 + /* exponent digits */
3142 1 + /* \0 */
3143 2; /* paranoia */
3144
3145 s = SvGROW_mutable(sv, size);
3146 #ifndef USE_LOCALE_NUMERIC
3147 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG);
3148
3149 SvPOKp_on(sv);
3150 #else
3151 {
3152 bool local_radix;
3153 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
3154 STORE_LC_NUMERIC_SET_TO_NEEDED();
3155
3156 local_radix = NOT_IN_NUMERIC_STANDARD_;
3157 if (local_radix && SvCUR(PL_numeric_radix_sv) > 1) {
3158 size += SvCUR(PL_numeric_radix_sv) - 1;
3159 s = SvGROW_mutable(sv, size);
3160 }
3161
3162 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG);
3163
3164 /* If the radix character is UTF-8, and actually is in the
3165 * output, turn on the UTF-8 flag for the scalar */
3166 if ( local_radix
3167 && SvUTF8(PL_numeric_radix_sv)
3168 && instr(s, SvPVX_const(PL_numeric_radix_sv)))
3169 {
3170 SvUTF8_on(sv);
3171 }
3172
3173 RESTORE_LC_NUMERIC();
3174 }
3175
3176 /* We don't call SvPOK_on(), because it may come to
3177 * pass that the locale changes so that the
3178 * stringification we just did is no longer correct. We
3179 * will have to re-stringify every time it is needed */
3180 #endif
3181 RESTORE_ERRNO;
3182 }
3183 while (*s) s++;
3184 }
3185 }
3186 else if (isGV_with_GP(sv)) {
3187 GV *const gv = MUTABLE_GV(sv);
3188 SV *const buffer = sv_newmortal();
3189
3190 gv_efullname3(buffer, gv, "*");
3191
3192 assert(SvPOK(buffer));
3193 if (SvUTF8(buffer))
3194 SvUTF8_on(sv);
3195 else
3196 SvUTF8_off(sv);
3197 if (lp)
3198 *lp = SvCUR(buffer);
3199 return SvPVX(buffer);
3200 }
3201 else {
3202 if (lp)
3203 *lp = 0;
3204 if (flags & SV_UNDEF_RETURNS_NULL)
3205 return NULL;
3206 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
3207 report_uninit(sv);
3208 /* Typically the caller expects that sv_any is not NULL now. */
3209 if (!SvREADONLY(sv) && SvTYPE(sv) < SVt_PV)
3210 sv_upgrade(sv, SVt_PV);
3211 return (char *)"";
3212 }
3213
3214 {
3215 const STRLEN len = s - SvPVX_const(sv);
3216 if (lp)
3217 *lp = len;
3218 SvCUR_set(sv, len);
3219 }
3220 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2pv(%s)\n",
3221 PTR2UV(sv),SvPVX_const(sv)));
3222 if (flags & SV_CONST_RETURN)
3223 return (char *)SvPVX_const(sv);
3224 if (flags & SV_MUTABLE_RETURN)
3225 return SvPVX_mutable(sv);
3226 return SvPVX(sv);
3227 }
3228
3229 /*
3230 =for apidoc sv_copypv
3231 =for apidoc_item sv_copypv_flags
3232 =for apidoc_item sv_copypv_nomg
3233
3234 These copy a stringified representation of the source SV into the
3235 destination SV. They automatically perform coercion of numeric values into
3236 strings. Guaranteed to preserve the C<UTF8> flag even from overloaded objects.
3237 Similar in nature to C<sv_2pv[_flags]> but they operate directly on an SV
3238 instead of just the string. Mostly they use L</C<sv_2pv_flags>> to
3239 do the work, except when that would lose the UTF-8'ness of the PV.
3240
3241 The three forms differ only in whether or not they perform 'get magic' on
3242 C<sv>. C<sv_copypv_nomg> skips 'get magic'; C<sv_copypv> performs it; and
3243 C<sv_copypv_flags> either performs it (if the C<SV_GMAGIC> bit is set in
3244 C<flags>) or doesn't (if that bit is cleared).
3245
3246 =cut
3247 */
3248
3249 void
Perl_sv_copypv_flags(pTHX_ SV * const dsv,SV * const ssv,const I32 flags)3250 Perl_sv_copypv_flags(pTHX_ SV *const dsv, SV *const ssv, const I32 flags)
3251 {
3252 STRLEN len;
3253 const char *s;
3254
3255 PERL_ARGS_ASSERT_SV_COPYPV_FLAGS;
3256
3257 s = SvPV_flags_const(ssv,len,(flags & SV_GMAGIC));
3258 sv_setpvn(dsv,s,len);
3259 if (SvUTF8(ssv))
3260 SvUTF8_on(dsv);
3261 else
3262 SvUTF8_off(dsv);
3263 }
3264
3265 /*
3266 =for apidoc sv_2pvbyte
3267 =for apidoc_item sv_2pvbyte_flags
3268
3269 These implement the various forms of the L<perlapi/C<SvPVbyte>> macros.
3270 The macros are the preferred interface.
3271
3272 These return a pointer to the byte-encoded representation of the SV, and set
3273 C<*lp> to its length. If the SV is marked as being encoded as UTF-8, it will
3274 be downgraded, if possible, to a byte string. If the SV cannot be downgraded,
3275 they croak.
3276
3277 The forms differ in that plain C<sv_2pvbyte> always processes 'get' magic; and
3278 C<sv_2pvbyte_flags> processes 'get' magic if and only if C<flags> contains
3279 C<SV_GMAGIC>.
3280
3281 =for apidoc Amnh||SV_GMAGIC
3282
3283 =cut
3284 */
3285
3286 char *
Perl_sv_2pvbyte_flags(pTHX_ SV * sv,STRLEN * const lp,const U32 flags)3287 Perl_sv_2pvbyte_flags(pTHX_ SV *sv, STRLEN *const lp, const U32 flags)
3288 {
3289 PERL_ARGS_ASSERT_SV_2PVBYTE_FLAGS;
3290
3291 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
3292 mg_get(sv);
3293 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
3294 || isGV_with_GP(sv) || SvROK(sv)) {
3295 SV *sv2 = sv_newmortal();
3296 sv_copypv_nomg(sv2,sv);
3297 sv = sv2;
3298 }
3299 sv_utf8_downgrade_nomg(sv,0);
3300 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
3301 }
3302
3303 /*
3304 =for apidoc sv_2pvutf8
3305 =for apidoc_item sv_2pvutf8_flags
3306
3307 These implement the various forms of the L<perlapi/C<SvPVutf8>> macros.
3308 The macros are the preferred interface.
3309
3310 These return a pointer to the UTF-8-encoded representation of the SV, and set
3311 C<*lp> to its length in bytes. They may cause the SV to be upgraded to UTF-8
3312 as a side-effect.
3313
3314 The forms differ in that plain C<sv_2pvutf8> always processes 'get' magic; and
3315 C<sv_2pvutf8_flags> processes 'get' magic if and only if C<flags> contains
3316 C<SV_GMAGIC>.
3317
3318 =cut
3319 */
3320
3321 char *
Perl_sv_2pvutf8_flags(pTHX_ SV * sv,STRLEN * const lp,const U32 flags)3322 Perl_sv_2pvutf8_flags(pTHX_ SV *sv, STRLEN *const lp, const U32 flags)
3323 {
3324 PERL_ARGS_ASSERT_SV_2PVUTF8_FLAGS;
3325
3326 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
3327 mg_get(sv);
3328 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
3329 || isGV_with_GP(sv) || SvROK(sv)) {
3330 SV *sv2 = sv_newmortal();
3331 sv_copypv_nomg(sv2,sv);
3332 sv = sv2;
3333 }
3334 sv_utf8_upgrade_nomg(sv);
3335 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
3336 }
3337
3338
3339 /*
3340 =for apidoc sv_2bool
3341
3342 This macro is only used by C<sv_true()> or its macro equivalent, and only if
3343 the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>.
3344 It calls C<sv_2bool_flags> with the C<SV_GMAGIC> flag.
3345
3346 =for apidoc sv_2bool_flags
3347
3348 This function is only used by C<sv_true()> and friends, and only if
3349 the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>. If the flags
3350 contain C<SV_GMAGIC>, then it does an C<mg_get()> first.
3351
3352
3353 =cut
3354 */
3355
3356 bool
Perl_sv_2bool_flags(pTHX_ SV * sv,I32 flags)3357 Perl_sv_2bool_flags(pTHX_ SV *sv, I32 flags)
3358 {
3359 PERL_ARGS_ASSERT_SV_2BOOL_FLAGS;
3360
3361 restart:
3362 if(flags & SV_GMAGIC) SvGETMAGIC(sv);
3363
3364 if (!SvOK(sv))
3365 return 0;
3366 if (SvROK(sv)) {
3367 if (SvAMAGIC(sv)) {
3368 SV * const tmpsv = AMG_CALLunary(sv, bool__amg);
3369 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv)))) {
3370 bool svb;
3371 sv = tmpsv;
3372 if(SvGMAGICAL(sv)) {
3373 flags = SV_GMAGIC;
3374 goto restart; /* call sv_2bool */
3375 }
3376 /* expanded SvTRUE_common(sv, (flags = 0, goto restart)) */
3377 else if(!SvOK(sv)) {
3378 svb = 0;
3379 }
3380 else if(SvPOK(sv)) {
3381 svb = SvPVXtrue(sv);
3382 }
3383 else if((SvFLAGS(sv) & (SVf_IOK|SVf_NOK))) {
3384 svb = (SvIOK(sv) && SvIVX(sv) != 0)
3385 || (SvNOK(sv) && SvNVX(sv) != 0.0);
3386 }
3387 else {
3388 flags = 0;
3389 goto restart; /* call sv_2bool_nomg */
3390 }
3391 return cBOOL(svb);
3392 }
3393 }
3394 assert(SvRV(sv));
3395 return TRUE;
3396 }
3397 if (isREGEXP(sv))
3398 return
3399 RX_WRAPLEN(sv) > 1 || (RX_WRAPLEN(sv) && *RX_WRAPPED(sv) != '0');
3400
3401 if (SvNOK(sv) && !SvPOK(sv))
3402 return SvNVX(sv) != 0.0;
3403
3404 return SvTRUE_common(sv, 0);
3405 }
3406
3407 /*
3408 =for apidoc sv_utf8_upgrade
3409 =for apidoc_item sv_utf8_upgrade_flags
3410 =for apidoc_item sv_utf8_upgrade_flags_grow
3411 =for apidoc_item sv_utf8_upgrade_nomg
3412
3413 These convert the PV of an SV to its UTF-8-encoded form.
3414 The SV is forced to string form if it is not already.
3415 They always set the C<SvUTF8> flag to avoid future validity checks even if the
3416 whole string is the same in UTF-8 as not.
3417 They return the number of bytes in the converted string
3418
3419 The forms differ in just two ways. The main difference is whether or not they
3420 perform 'get magic' on C<sv>. C<sv_utf8_upgrade_nomg> skips 'get magic';
3421 C<sv_utf8_upgrade> performs it; and C<sv_utf8_upgrade_flags> and
3422 C<sv_utf8_upgrade_flags_grow> either perform it (if the C<SV_GMAGIC> bit is set
3423 in C<flags>) or don't (if that bit is cleared).
3424
3425 The other difference is that C<sv_utf8_upgrade_flags_grow> has an additional
3426 parameter, C<extra>, which allows the caller to specify an amount of space to
3427 be reserved as spare beyond what is needed for the actual conversion. This is
3428 used when the caller knows it will soon be needing yet more space, and it is
3429 more efficient to request space from the system in a single call.
3430 This form is otherwise identical to C<sv_utf8_upgrade_flags>.
3431
3432 These are not a general purpose byte encoding to Unicode interface: use the
3433 Encode extension for that.
3434
3435 The C<SV_FORCE_UTF8_UPGRADE> flag is now ignored.
3436
3437 =for apidoc Amnh||SV_GMAGIC|
3438 =for apidoc Amnh||SV_FORCE_UTF8_UPGRADE|
3439
3440 =cut
3441
3442 If the routine itself changes the string, it adds a trailing C<NUL>. Such a
3443 C<NUL> isn't guaranteed due to having other routines do the work in some input
3444 cases, or if the input is already flagged as being in utf8.
3445
3446 */
3447
3448 STRLEN
Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV * const sv,const I32 flags,STRLEN extra)3449 Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extra)
3450 {
3451 PERL_ARGS_ASSERT_SV_UTF8_UPGRADE_FLAGS_GROW;
3452
3453 if (sv == &PL_sv_undef)
3454 return 0;
3455 if (!SvPOK_nog(sv)) {
3456 STRLEN len = 0;
3457 if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) {
3458 (void) sv_2pv_flags(sv,&len, flags);
3459 if (SvUTF8(sv)) {
3460 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3461 return len;
3462 }
3463 } else {
3464 (void) SvPV_force_flags(sv,len,flags & SV_GMAGIC);
3465 }
3466 }
3467
3468 /* SVt_REGEXP's shouldn't be upgraded to UTF8 - they're already
3469 * compiled and individual nodes will remain non-utf8 even if the
3470 * stringified version of the pattern gets upgraded. Whether the
3471 * PVX of a REGEXP should be grown or we should just croak, I don't
3472 * know - DAPM */
3473 if (SvUTF8(sv) || isREGEXP(sv)) {
3474 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3475 return SvCUR(sv);
3476 }
3477
3478 if (SvIsCOW(sv)) {
3479 S_sv_uncow(aTHX_ sv, 0);
3480 }
3481
3482 if (SvCUR(sv) == 0) {
3483 if (extra) SvGROW(sv, extra + 1); /* Make sure is room for a trailing
3484 byte */
3485 } else { /* Assume Latin-1/EBCDIC */
3486 /* This function could be much more efficient if we
3487 * had a FLAG in SVs to signal if there are any variant
3488 * chars in the PV. Given that there isn't such a flag
3489 * make the loop as fast as possible. */
3490 U8 * s = (U8 *) SvPVX_const(sv);
3491 U8 *t = s;
3492
3493 if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) {
3494
3495 /* utf8 conversion not needed because all are invariants. Mark
3496 * as UTF-8 even if no variant - saves scanning loop */
3497 SvUTF8_on(sv);
3498 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3499 return SvCUR(sv);
3500 }
3501
3502 /* Here, there is at least one variant (t points to the first one), so
3503 * the string should be converted to utf8. Everything from 's' to
3504 * 't - 1' will occupy only 1 byte each on output.
3505 *
3506 * Note that the incoming SV may not have a trailing '\0', as certain
3507 * code in pp_formline can send us partially built SVs.
3508 *
3509 * There are two main ways to convert. One is to create a new string
3510 * and go through the input starting from the beginning, appending each
3511 * converted value onto the new string as we go along. Going this
3512 * route, it's probably best to initially allocate enough space in the
3513 * string rather than possibly running out of space and having to
3514 * reallocate and then copy what we've done so far. Since everything
3515 * from 's' to 't - 1' is invariant, the destination can be initialized
3516 * with these using a fast memory copy. To be sure to allocate enough
3517 * space, one could use the worst case scenario, where every remaining
3518 * byte expands to two under UTF-8, or one could parse it and count
3519 * exactly how many do expand.
3520 *
3521 * The other way is to unconditionally parse the remainder of the
3522 * string to figure out exactly how big the expanded string will be,
3523 * growing if needed. Then start at the end of the string and place
3524 * the character there at the end of the unfilled space in the expanded
3525 * one, working backwards until reaching 't'.
3526 *
3527 * The problem with assuming the worst case scenario is that for very
3528 * long strings, we could allocate much more memory than actually
3529 * needed, which can create performance problems. If we have to parse
3530 * anyway, the second method is the winner as it may avoid an extra
3531 * copy. The code used to use the first method under some
3532 * circumstances, but now that there is faster variant counting on
3533 * ASCII platforms, the second method is used exclusively, eliminating
3534 * some code that no longer has to be maintained. */
3535
3536 {
3537 /* Count the total number of variants there are. We can start
3538 * just beyond the first one, which is known to be at 't' */
3539 const Size_t invariant_length = t - s;
3540 U8 * e = (U8 *) SvEND(sv);
3541
3542 /* The length of the left overs, plus 1. */
3543 const Size_t remaining_length_p1 = e - t;
3544
3545 /* We expand by 1 for the variant at 't' and one for each remaining
3546 * variant (we start looking at 't+1') */
3547 Size_t expansion = 1 + variant_under_utf8_count(t + 1, e);
3548
3549 /* +1 = trailing NUL */
3550 Size_t need = SvCUR(sv) + expansion + extra + 1;
3551 U8 * d;
3552
3553 /* Grow if needed */
3554 if (SvLEN(sv) < need) {
3555 t = invariant_length + (U8*) SvGROW(sv, need);
3556 e = t + remaining_length_p1;
3557 }
3558 SvCUR_set(sv, invariant_length + remaining_length_p1 + expansion);
3559
3560 /* Set the NUL at the end */
3561 d = (U8 *) SvEND(sv);
3562 *d-- = '\0';
3563
3564 /* Having decremented d, it points to the position to put the
3565 * very last byte of the expanded string. Go backwards through
3566 * the string, copying and expanding as we go, stopping when we
3567 * get to the part that is invariant the rest of the way down */
3568
3569 e--;
3570 while (e >= t) {
3571 if (NATIVE_BYTE_IS_INVARIANT(*e)) {
3572 *d-- = *e;
3573 } else {
3574 *d-- = UTF8_EIGHT_BIT_LO(*e);
3575 *d-- = UTF8_EIGHT_BIT_HI(*e);
3576 }
3577 e--;
3578 }
3579
3580 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
3581 /* Update pos. We do it at the end rather than during
3582 * the upgrade, to avoid slowing down the common case
3583 * (upgrade without pos).
3584 * pos can be stored as either bytes or characters. Since
3585 * this was previously a byte string we can just turn off
3586 * the bytes flag. */
3587 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
3588 if (mg) {
3589 mg->mg_flags &= ~MGf_BYTES;
3590 }
3591 if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
3592 magic_setutf8(sv,mg); /* clear UTF8 cache */
3593 }
3594 }
3595 }
3596
3597 SvUTF8_on(sv);
3598 return SvCUR(sv);
3599 }
3600
3601 /*
3602 =for apidoc sv_utf8_downgrade
3603 =for apidoc_item sv_utf8_downgrade_flags
3604 =for apidoc_item sv_utf8_downgrade_nomg
3605
3606 These attempt to convert the PV of an SV from characters to bytes. If the PV
3607 contains a character that cannot fit in a byte, this conversion will fail; in
3608 this case, C<FALSE> is returned if C<fail_ok> is true; otherwise they croak.
3609
3610 They are not a general purpose Unicode to byte encoding interface:
3611 use the C<Encode> extension for that.
3612
3613 They differ only in that:
3614
3615 C<sv_utf8_downgrade> processes 'get' magic on C<sv>.
3616
3617 C<sv_utf8_downgrade_nomg> does not.
3618
3619 C<sv_utf8_downgrade_flags> has an additional C<flags> parameter in which you can specify
3620 C<SV_GMAGIC> to process 'get' magic, or leave it cleared to not process 'get' magic.
3621
3622 =cut
3623 */
3624
3625 bool
Perl_sv_utf8_downgrade_flags(pTHX_ SV * const sv,const bool fail_ok,const U32 flags)3626 Perl_sv_utf8_downgrade_flags(pTHX_ SV *const sv, const bool fail_ok, const U32 flags)
3627 {
3628 PERL_ARGS_ASSERT_SV_UTF8_DOWNGRADE_FLAGS;
3629
3630 if (SvPOKp(sv) && SvUTF8(sv)) {
3631 if (SvCUR(sv)) {
3632 U8 *s;
3633 STRLEN len;
3634 U32 mg_flags = flags & SV_GMAGIC;
3635
3636 if (SvIsCOW(sv)) {
3637 S_sv_uncow(aTHX_ sv, 0);
3638 }
3639 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
3640 /* update pos */
3641 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
3642 if (mg && mg->mg_len > 0 && mg->mg_flags & MGf_BYTES) {
3643 mg->mg_len = sv_pos_b2u_flags(sv, mg->mg_len,
3644 mg_flags|SV_CONST_RETURN);
3645 mg_flags = 0; /* sv_pos_b2u does get magic */
3646 }
3647 if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
3648 magic_setutf8(sv,mg); /* clear UTF8 cache */
3649
3650 }
3651 s = (U8 *) SvPV_flags(sv, len, mg_flags);
3652
3653 if (!utf8_to_bytes(s, &len)) {
3654 if (fail_ok)
3655 return FALSE;
3656 else {
3657 if (PL_op)
3658 Perl_croak(aTHX_ "Wide character in %s",
3659 OP_DESC(PL_op));
3660 else
3661 Perl_croak(aTHX_ "Wide character");
3662 }
3663 }
3664 SvCUR_set(sv, len);
3665 }
3666 }
3667 SvUTF8_off(sv);
3668 return TRUE;
3669 }
3670
3671 /*
3672 =for apidoc sv_utf8_encode
3673
3674 Converts the PV of an SV to UTF-8, but then turns the C<SvUTF8>
3675 flag off so that it looks like octets again.
3676
3677 =cut
3678 */
3679
3680 void
Perl_sv_utf8_encode(pTHX_ SV * const sv)3681 Perl_sv_utf8_encode(pTHX_ SV *const sv)
3682 {
3683 PERL_ARGS_ASSERT_SV_UTF8_ENCODE;
3684
3685 if (SvREADONLY(sv)) {
3686 sv_force_normal_flags(sv, 0);
3687 }
3688 (void) sv_utf8_upgrade(sv);
3689 SvUTF8_off(sv);
3690 }
3691
3692 /*
3693 =for apidoc sv_utf8_decode
3694
3695 If the PV of the SV is an octet sequence in Perl's extended UTF-8
3696 and contains a multiple-byte character, the C<SvUTF8> flag is turned on
3697 so that it looks like a character. If the PV contains only single-byte
3698 characters, the C<SvUTF8> flag stays off.
3699 Scans PV for validity and returns FALSE if the PV is invalid UTF-8.
3700
3701 =cut
3702 */
3703
3704 bool
Perl_sv_utf8_decode(pTHX_ SV * const sv)3705 Perl_sv_utf8_decode(pTHX_ SV *const sv)
3706 {
3707 PERL_ARGS_ASSERT_SV_UTF8_DECODE;
3708
3709 if (SvPOKp(sv)) {
3710 const U8 *start, *c, *first_variant;
3711
3712 /* The octets may have got themselves encoded - get them back as
3713 * bytes
3714 */
3715 if (!sv_utf8_downgrade(sv, TRUE))
3716 return FALSE;
3717
3718 /* it is actually just a matter of turning the utf8 flag on, but
3719 * we want to make sure everything inside is valid utf8 first.
3720 */
3721 c = start = (const U8 *) SvPVX_const(sv);
3722 if (! is_utf8_invariant_string_loc(c, SvCUR(sv), &first_variant)) {
3723 if (!is_utf8_string(first_variant, SvCUR(sv) - (first_variant -c)))
3724 return FALSE;
3725 SvUTF8_on(sv);
3726 }
3727 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
3728 /* XXX Is this dead code? XS_utf8_decode calls SvSETMAGIC
3729 after this, clearing pos. Does anything on CPAN
3730 need this? */
3731 /* adjust pos to the start of a UTF8 char sequence */
3732 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
3733 if (mg) {
3734 SSize_t pos = mg->mg_len;
3735 if (pos > 0) {
3736 for (c = start + pos; c > start; c--) {
3737 if (UTF8_IS_START(*c))
3738 break;
3739 }
3740 mg->mg_len = c - start;
3741 }
3742 }
3743 if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
3744 magic_setutf8(sv,mg); /* clear UTF8 cache */
3745 }
3746 }
3747 return TRUE;
3748 }
3749
3750 /*
3751 =for apidoc sv_setsv
3752 =for apidoc_item sv_setsv_flags
3753 =for apidoc_item sv_setsv_mg
3754 =for apidoc_item sv_setsv_nomg
3755
3756 These copy the contents of the source SV C<ssv> into the destination SV C<dsv>.
3757 C<ssv> may be destroyed if it is mortal, so don't use these functions if
3758 the source SV needs to be reused.
3759 Loosely speaking, they perform a copy-by-value, obliterating any previous
3760 content of the destination.
3761
3762 They differ only in that:
3763
3764 C<sv_setsv> calls 'get' magic on C<ssv>, but skips 'set' magic on C<dsv>.
3765
3766 C<sv_setsv_mg> calls both 'get' magic on C<ssv> and 'set' magic on C<dsv>.
3767
3768 C<sv_setsv_nomg> skips all magic.
3769
3770 C<sv_setsv_flags> has a C<flags> parameter which you can use to specify any
3771 combination of magic handling, and also you can specify C<SV_NOSTEAL> so that
3772 the buffers of temps will not be stolen.
3773
3774 You probably want to instead use one of the assortment of wrappers, such as
3775 C<L</SvSetSV>>, C<L</SvSetSV_nosteal>>, C<L</SvSetMagicSV>> and
3776 C<L</SvSetMagicSV_nosteal>>.
3777
3778 C<sv_setsv_flags> is the primary function for copying scalars, and most other
3779 copy-ish functions and macros use it underneath.
3780
3781 =for apidoc Amnh||SV_NOSTEAL
3782
3783 =cut
3784 */
3785
3786 static void
S_glob_assign_glob(pTHX_ SV * const dsv,SV * const ssv,const int dtype)3787 S_glob_assign_glob(pTHX_ SV *const dsv, SV *const ssv, const int dtype)
3788 {
3789 I32 mro_changes = 0; /* 1 = method, 2 = isa, 3 = recursive isa */
3790 HV *old_stash = NULL;
3791
3792 PERL_ARGS_ASSERT_GLOB_ASSIGN_GLOB;
3793
3794 if (dtype != SVt_PVGV && !isGV_with_GP(dsv)) {
3795 const char * const name = GvNAME(ssv);
3796 const STRLEN len = GvNAMELEN(ssv);
3797 {
3798 if (dtype >= SVt_PV) {
3799 SvPV_free(dsv);
3800 SvPV_set(dsv, 0);
3801 SvLEN_set(dsv, 0);
3802 SvCUR_set(dsv, 0);
3803 }
3804 SvUPGRADE(dsv, SVt_PVGV);
3805 (void)SvOK_off(dsv);
3806 isGV_with_GP_on(dsv);
3807 }
3808 GvSTASH(dsv) = GvSTASH(ssv);
3809 if (GvSTASH(dsv))
3810 Perl_sv_add_backref(aTHX_ MUTABLE_SV(GvSTASH(dsv)), dsv);
3811 gv_name_set(MUTABLE_GV(dsv), name, len,
3812 GV_ADD | (GvNAMEUTF8(ssv) ? SVf_UTF8 : 0 ));
3813 SvFAKE_on(dsv); /* can coerce to non-glob */
3814 }
3815
3816 if(GvGP(MUTABLE_GV(ssv))) {
3817 /* If source has method cache entry, clear it */
3818 if(GvCVGEN(ssv)) {
3819 SvREFCNT_dec(GvCV(ssv));
3820 GvCV_set(ssv, NULL);
3821 GvCVGEN(ssv) = 0;
3822 }
3823 /* If source has a real method, then a method is
3824 going to change */
3825 else if(
3826 GvCV((const GV *)ssv) && GvSTASH(dsv) && HvHasENAME(GvSTASH(dsv))
3827 ) {
3828 mro_changes = 1;
3829 }
3830 }
3831
3832 /* If dest already had a real method, that's a change as well */
3833 if(
3834 !mro_changes && GvGP(MUTABLE_GV(dsv)) && GvCVu((const GV *)dsv)
3835 && GvSTASH(dsv) && HvHasENAME(GvSTASH(dsv))
3836 ) {
3837 mro_changes = 1;
3838 }
3839
3840 /* We don't need to check the name of the destination if it was not a
3841 glob to begin with. */
3842 if(dtype == SVt_PVGV) {
3843 const char * const name = GvNAME((const GV *)dsv);
3844 const STRLEN len = GvNAMELEN(dsv);
3845 if(memEQs(name, len, "ISA")
3846 /* The stash may have been detached from the symbol table, so
3847 check its name. */
3848 && GvSTASH(dsv) && HvHasENAME(GvSTASH(dsv))
3849 )
3850 mro_changes = 2;
3851 else {
3852 if ((len > 1 && name[len-2] == ':' && name[len-1] == ':')
3853 || (len == 1 && name[0] == ':')) {
3854 mro_changes = 3;
3855
3856 /* Set aside the old stash, so we can reset isa caches on
3857 its subclasses. */
3858 if((old_stash = GvHV(dsv)))
3859 /* Make sure we do not lose it early. */
3860 SvREFCNT_inc_simple_void_NN(
3861 sv_2mortal((SV *)old_stash)
3862 );
3863 }
3864 }
3865
3866 SvREFCNT_inc_simple_void_NN(sv_2mortal(dsv));
3867 }
3868
3869 /* freeing dsv's GP might free ssv (e.g. *x = $x),
3870 * so temporarily protect it */
3871 ENTER;
3872 SAVEFREESV(SvREFCNT_inc_simple_NN(ssv));
3873 gp_free(MUTABLE_GV(dsv));
3874 GvINTRO_off(dsv); /* one-shot flag */
3875 GvGP_set(dsv, gp_ref(GvGP(ssv)));
3876 LEAVE;
3877
3878 if (SvTAINTED(ssv))
3879 SvTAINT(dsv);
3880 if (GvIMPORTED(dsv) != GVf_IMPORTED
3881 && CopSTASH_ne(PL_curcop, GvSTASH(dsv)))
3882 {
3883 GvIMPORTED_on(dsv);
3884 }
3885 GvMULTI_on(dsv);
3886 if(mro_changes == 2) {
3887 if (GvAV((const GV *)ssv)) {
3888 MAGIC *mg;
3889 SV * const sref = (SV *)GvAV((const GV *)dsv);
3890 if (SvSMAGICAL(sref) && (mg = mg_find(sref, PERL_MAGIC_isa))) {
3891 if (SvTYPE(mg->mg_obj) != SVt_PVAV) {
3892 AV * const ary = newAV_alloc_x(2);
3893 av_push_simple(ary, mg->mg_obj); /* takes the refcount */
3894 av_push_simple(ary, SvREFCNT_inc_simple_NN(dsv));
3895 mg->mg_obj = (SV *)ary;
3896 } else {
3897 av_push((AV *)mg->mg_obj, SvREFCNT_inc_simple_NN(dsv));
3898 }
3899 }
3900 else sv_magic(sref, dsv, PERL_MAGIC_isa, NULL, 0);
3901 }
3902 mro_isa_changed_in(GvSTASH(dsv));
3903 }
3904 else if(mro_changes == 3) {
3905 HV * const stash = GvHV(dsv);
3906 if(old_stash ? HvHasENAME(old_stash) : cBOOL(stash))
3907 mro_package_moved(
3908 stash, old_stash,
3909 (GV *)dsv, 0
3910 );
3911 }
3912 else if(mro_changes) mro_method_changed_in(GvSTASH(dsv));
3913 if (GvIO(dsv) && dtype == SVt_PVGV) {
3914 DEBUG_o(Perl_deb(aTHX_
3915 "glob_assign_glob clearing PL_stashcache\n"));
3916 /* It's a cache. It will rebuild itself quite happily.
3917 It's a lot of effort to work out exactly which key (or keys)
3918 might be invalidated by the creation of the this file handle.
3919 */
3920 hv_clear(PL_stashcache);
3921 }
3922 return;
3923 }
3924
3925 void
Perl_gv_setref(pTHX_ SV * const dsv,SV * const ssv)3926 Perl_gv_setref(pTHX_ SV *const dsv, SV *const ssv)
3927 {
3928 SV * const sref = SvRV(ssv);
3929 SV *dref;
3930 const int intro = GvINTRO(dsv);
3931 SV **location;
3932 U8 import_flag = 0;
3933 const U32 stype = SvTYPE(sref);
3934
3935 PERL_ARGS_ASSERT_GV_SETREF;
3936
3937 if (intro) {
3938 GvINTRO_off(dsv); /* one-shot flag */
3939 GvLINE(dsv) = CopLINE(PL_curcop);
3940 GvEGV(dsv) = MUTABLE_GV(dsv);
3941 }
3942 GvMULTI_on(dsv);
3943 switch (stype) {
3944 case SVt_PVCV:
3945 location = (SV **) &(GvGP(dsv)->gp_cv); /* XXX bypassing GvCV_set */
3946 import_flag = GVf_IMPORTED_CV;
3947 goto common;
3948 case SVt_PVHV:
3949 location = (SV **) &GvHV(dsv);
3950 import_flag = GVf_IMPORTED_HV;
3951 goto common;
3952 case SVt_PVAV:
3953 location = (SV **) &GvAV(dsv);
3954 import_flag = GVf_IMPORTED_AV;
3955 goto common;
3956 case SVt_PVIO:
3957 location = (SV **) &GvIOp(dsv);
3958 goto common;
3959 case SVt_PVFM:
3960 location = (SV **) &GvFORM(dsv);
3961 goto common;
3962 default:
3963 location = &GvSV(dsv);
3964 import_flag = GVf_IMPORTED_SV;
3965 common:
3966 if (intro) {
3967 if (stype == SVt_PVCV) {
3968 /*if (GvCVGEN(dsv) && (GvCV(dsv) != (const CV *)sref || GvCVGEN(dsv))) {*/
3969 if (GvCVGEN(dsv)) {
3970 SvREFCNT_dec(GvCV(dsv));
3971 GvCV_set(dsv, NULL);
3972 GvCVGEN(dsv) = 0; /* Switch off cacheness. */
3973 }
3974 }
3975 /* SAVEt_GVSLOT takes more room on the savestack and has more
3976 overhead in leave_scope than SAVEt_GENERIC_SV. But for CVs
3977 leave_scope needs access to the GV so it can reset method
3978 caches. We must use SAVEt_GVSLOT whenever the type is
3979 SVt_PVCV, even if the stash is anonymous, as the stash may
3980 gain a name somehow before leave_scope. */
3981 if (stype == SVt_PVCV) {
3982 /* There is no save_pushptrptrptr. Creating it for this
3983 one call site would be overkill. So inline the ss add
3984 routines here. */
3985 dSS_ADD;
3986 SS_ADD_PTR(dsv);
3987 SS_ADD_PTR(location);
3988 SS_ADD_PTR(SvREFCNT_inc(*location));
3989 SS_ADD_UV(SAVEt_GVSLOT);
3990 SS_ADD_END(4);
3991 }
3992 else SAVEGENERICSV(*location);
3993 }
3994 dref = *location;
3995 if (stype == SVt_PVCV && (*location != sref || GvCVGEN(dsv))) {
3996 CV* const cv = MUTABLE_CV(*location);
3997 if (cv) {
3998 if (!GvCVGEN((const GV *)dsv) &&
3999 (CvROOT(cv) || CvXSUB(cv)) &&
4000 /* redundant check that avoids creating the extra SV
4001 most of the time: */
4002 (CvCONST(cv) || (ckWARN(WARN_REDEFINE) && !intro)))
4003 {
4004 SV * const new_const_sv =
4005 CvCONST((const CV *)sref)
4006 ? cv_const_sv_or_av((const CV *)sref)
4007 : NULL;
4008 HV * const stash = GvSTASH((const GV *)dsv);
4009 report_redefined_cv(
4010 sv_2mortal(
4011 stash
4012 ? Perl_newSVpvf(aTHX_
4013 "%" HEKf "::%" HEKf,
4014 HEKfARG(HvNAME_HEK(stash)),
4015 HEKfARG(GvENAME_HEK(MUTABLE_GV(dsv))))
4016 : Perl_newSVpvf(aTHX_
4017 "%" HEKf,
4018 HEKfARG(GvENAME_HEK(MUTABLE_GV(dsv))))
4019 ),
4020 cv,
4021 CvCONST((const CV *)sref) ? &new_const_sv : NULL
4022 );
4023 }
4024 if (!intro)
4025 cv_ckproto_len_flags(cv, (const GV *)dsv,
4026 SvPOK(sref) ? CvPROTO(sref) : NULL,
4027 SvPOK(sref) ? CvPROTOLEN(sref) : 0,
4028 SvPOK(sref) ? SvUTF8(sref) : 0);
4029 }
4030 GvCVGEN(dsv) = 0; /* Switch off cacheness. */
4031 GvASSUMECV_on(dsv);
4032 if(GvSTASH(dsv)) { /* sub foo { 1 } sub bar { 2 } *bar = \&foo */
4033 if (intro && GvREFCNT(dsv) > 1) {
4034 /* temporary remove extra savestack's ref */
4035 --GvREFCNT(dsv);
4036 gv_method_changed(dsv);
4037 ++GvREFCNT(dsv);
4038 }
4039 else gv_method_changed(dsv);
4040 }
4041 }
4042 *location = SvREFCNT_inc_simple_NN(sref);
4043 if (import_flag && !(GvFLAGS(dsv) & import_flag)
4044 && CopSTASH_ne(PL_curcop, GvSTASH(dsv))) {
4045 GvFLAGS(dsv) |= import_flag;
4046 }
4047
4048 if (stype == SVt_PVHV) {
4049 const char * const name = GvNAME((GV*)dsv);
4050 const STRLEN len = GvNAMELEN(dsv);
4051 if (
4052 (
4053 (len > 1 && name[len-2] == ':' && name[len-1] == ':')
4054 || (len == 1 && name[0] == ':')
4055 )
4056 && (!dref || HvHasENAME(dref))
4057 ) {
4058 mro_package_moved(
4059 (HV *)sref, (HV *)dref,
4060 (GV *)dsv, 0
4061 );
4062 }
4063 }
4064 else if (
4065 stype == SVt_PVAV && sref != dref
4066 && memEQs(GvNAME((GV*)dsv), GvNAMELEN((GV*)dsv), "ISA")
4067 /* The stash may have been detached from the symbol table, so
4068 check its name before doing anything. */
4069 && GvSTASH(dsv) && HvHasENAME(GvSTASH(dsv))
4070 ) {
4071 MAGIC *mg;
4072 MAGIC * const omg = dref && SvSMAGICAL(dref)
4073 ? mg_find(dref, PERL_MAGIC_isa)
4074 : NULL;
4075 if (SvSMAGICAL(sref) && (mg = mg_find(sref, PERL_MAGIC_isa))) {
4076 if (SvTYPE(mg->mg_obj) != SVt_PVAV) {
4077 AV * const ary = newAV_alloc_xz(4);
4078 av_push_simple(ary, mg->mg_obj); /* takes the refcount */
4079 mg->mg_obj = (SV *)ary;
4080 }
4081 if (omg) {
4082 if (SvTYPE(omg->mg_obj) == SVt_PVAV) {
4083 SV **svp = AvARRAY((AV *)omg->mg_obj);
4084 I32 items = AvFILLp((AV *)omg->mg_obj) + 1;
4085 while (items--)
4086 av_push(
4087 (AV *)mg->mg_obj,
4088 SvREFCNT_inc_simple_NN(*svp++)
4089 );
4090 }
4091 else
4092 av_push(
4093 (AV *)mg->mg_obj,
4094 SvREFCNT_inc_simple_NN(omg->mg_obj)
4095 );
4096 }
4097 else
4098 av_push((AV *)mg->mg_obj,SvREFCNT_inc_simple_NN(dsv));
4099 }
4100 else
4101 {
4102 SSize_t i;
4103 sv_magic(
4104 sref, omg ? omg->mg_obj : dsv, PERL_MAGIC_isa, NULL, 0
4105 );
4106 for (i = 0; i <= AvFILL(sref); ++i) {
4107 SV **elem = av_fetch ((AV*)sref, i, 0);
4108 if (elem) {
4109 sv_magic(
4110 *elem, sref, PERL_MAGIC_isaelem, NULL, i
4111 );
4112 }
4113 }
4114 mg = mg_find(sref, PERL_MAGIC_isa);
4115 }
4116 /* Since the *ISA assignment could have affected more than
4117 one stash, don't call mro_isa_changed_in directly, but let
4118 magic_clearisa do it for us, as it already has the logic for
4119 dealing with globs vs arrays of globs. */
4120 assert(mg);
4121 Perl_magic_clearisa(aTHX_ NULL, mg);
4122 }
4123 else if (stype == SVt_PVIO) {
4124 DEBUG_o(Perl_deb(aTHX_ "gv_setref clearing PL_stashcache\n"));
4125 /* It's a cache. It will rebuild itself quite happily.
4126 It's a lot of effort to work out exactly which key (or keys)
4127 might be invalidated by the creation of the this file handle.
4128 */
4129 hv_clear(PL_stashcache);
4130 }
4131 break;
4132 }
4133 if (!intro) SvREFCNT_dec(dref);
4134 if (SvTAINTED(ssv))
4135 SvTAINT(dsv);
4136 return;
4137 }
4138
4139
4140
4141
4142 #ifdef PERL_DEBUG_READONLY_COW
4143 # include <sys/mman.h>
4144
4145 # ifndef PERL_MEMORY_DEBUG_HEADER_SIZE
4146 # define PERL_MEMORY_DEBUG_HEADER_SIZE 0
4147 # endif
4148
4149 void
Perl_sv_buf_to_ro(pTHX_ SV * sv)4150 Perl_sv_buf_to_ro(pTHX_ SV *sv)
4151 {
4152 struct perl_memory_debug_header * const header =
4153 (struct perl_memory_debug_header *)(SvPVX(sv)-PERL_MEMORY_DEBUG_HEADER_SIZE);
4154 const MEM_SIZE len = header->size;
4155 PERL_ARGS_ASSERT_SV_BUF_TO_RO;
4156 # ifdef PERL_TRACK_MEMPOOL
4157 if (!header->readonly) header->readonly = 1;
4158 # endif
4159 if (mprotect(header, len, PROT_READ))
4160 Perl_warn(aTHX_ "mprotect RW for COW string %p %lu failed with %d",
4161 header, len, errno);
4162 }
4163
4164 static void
S_sv_buf_to_rw(pTHX_ SV * sv)4165 S_sv_buf_to_rw(pTHX_ SV *sv)
4166 {
4167 struct perl_memory_debug_header * const header =
4168 (struct perl_memory_debug_header *)(SvPVX(sv)-PERL_MEMORY_DEBUG_HEADER_SIZE);
4169 const MEM_SIZE len = header->size;
4170 PERL_ARGS_ASSERT_SV_BUF_TO_RW;
4171 if (mprotect(header, len, PROT_READ|PROT_WRITE))
4172 Perl_warn(aTHX_ "mprotect for COW string %p %lu failed with %d",
4173 header, len, errno);
4174 # ifdef PERL_TRACK_MEMPOOL
4175 header->readonly = 0;
4176 # endif
4177 }
4178
4179 #else
4180 # define sv_buf_to_ro(sv) NOOP
4181 # define sv_buf_to_rw(sv) NOOP
4182 #endif
4183
4184 void
Perl_sv_setsv_flags(pTHX_ SV * dsv,SV * ssv,const I32 flags)4185 Perl_sv_setsv_flags(pTHX_ SV *dsv, SV* ssv, const I32 flags)
4186 {
4187 U32 sflags;
4188 int dtype;
4189 svtype stype;
4190 unsigned int both_type;
4191
4192 PERL_ARGS_ASSERT_SV_SETSV_FLAGS;
4193
4194 if (UNLIKELY( ssv == dsv ))
4195 return;
4196
4197 if (UNLIKELY( !ssv ))
4198 ssv = &PL_sv_undef;
4199
4200 stype = SvTYPE(ssv);
4201 dtype = SvTYPE(dsv);
4202 both_type = (stype | dtype);
4203
4204 /* with these values, we can check that both SVs are NULL/IV (and not
4205 * freed) just by testing the or'ed types */
4206 STATIC_ASSERT_STMT(SVt_NULL == 0);
4207 STATIC_ASSERT_STMT(SVt_IV == 1);
4208 STATIC_ASSERT_STMT(SVt_NV == 2);
4209 #if NVSIZE <= IVSIZE
4210 if (both_type <= 2) {
4211 #else
4212 if (both_type <= 1) {
4213 #endif
4214 /* both src and dst are UNDEF/IV/RV - maybe NV depending on config,
4215 * so we can do a lot of special-casing */
4216 U32 sflags;
4217 U32 new_dflags;
4218 SV *old_rv = NULL;
4219
4220 /* minimal subset of SV_CHECK_THINKFIRST_COW_DROP(dsv) */
4221 if (SvREADONLY(dsv))
4222 Perl_croak_no_modify();
4223 if (SvROK(dsv)) {
4224 if (SvWEAKREF(dsv))
4225 sv_unref_flags(dsv, 0);
4226 else
4227 old_rv = SvRV(dsv);
4228 SvROK_off(dsv);
4229 }
4230
4231 assert(!SvGMAGICAL(ssv));
4232 assert(!SvGMAGICAL(dsv));
4233
4234 sflags = SvFLAGS(ssv);
4235 if (sflags & (SVf_IOK|SVf_ROK)) {
4236 SET_SVANY_FOR_BODYLESS_IV(dsv);
4237 new_dflags = SVt_IV;
4238
4239 if (sflags & SVf_ROK) {
4240 dsv->sv_u.svu_rv = SvREFCNT_inc(SvRV(ssv));
4241 new_dflags |= SVf_ROK;
4242 }
4243 else {
4244 /* both src and dst are <= SVt_IV, so sv_any points to the
4245 * head; so access the head directly
4246 */
4247 assert( &(ssv->sv_u.svu_iv)
4248 == &(((XPVIV*) SvANY(ssv))->xiv_iv));
4249 assert( &(dsv->sv_u.svu_iv)
4250 == &(((XPVIV*) SvANY(dsv))->xiv_iv));
4251 dsv->sv_u.svu_iv = ssv->sv_u.svu_iv;
4252 new_dflags |= (SVf_IOK|SVp_IOK|(sflags & SVf_IVisUV));
4253 }
4254 }
4255 #if NVSIZE <= IVSIZE
4256 else if (sflags & SVf_NOK) {
4257 SET_SVANY_FOR_BODYLESS_NV(dsv);
4258 new_dflags = (SVt_NV|SVf_NOK|SVp_NOK);
4259
4260 /* both src and dst are <= SVt_MV, so sv_any points to the
4261 * head; so access the head directly
4262 */
4263 assert( &(ssv->sv_u.svu_nv)
4264 == &(((XPVNV*) SvANY(ssv))->xnv_u.xnv_nv));
4265 assert( &(dsv->sv_u.svu_nv)
4266 == &(((XPVNV*) SvANY(dsv))->xnv_u.xnv_nv));
4267 dsv->sv_u.svu_nv = ssv->sv_u.svu_nv;
4268 }
4269 #endif
4270 else {
4271 new_dflags = dtype; /* turn off everything except the type */
4272 }
4273 /* Should preserve some dsv flags - at least SVs_TEMP, */
4274 /* so cannot just set SvFLAGS(dsv) = new_dflags */
4275 /* First clear the flags that we do want to clobber */
4276 (void)SvOK_off(dsv);
4277 SvFLAGS(dsv) &= ~SVTYPEMASK;
4278 /* Now set the new flags */
4279 SvFLAGS(dsv) |= new_dflags;
4280
4281 SvREFCNT_dec(old_rv);
4282 return;
4283 }
4284
4285 if (UNLIKELY(both_type == SVTYPEMASK)) {
4286 if (SvIS_FREED(dsv)) {
4287 Perl_croak(aTHX_ "panic: attempt to copy value %" SVf
4288 " to a freed scalar %p", SVfARG(ssv), (void *)dsv);
4289 }
4290 if (SvIS_FREED(ssv)) {
4291 Perl_croak(aTHX_ "panic: attempt to copy freed scalar %p to %p",
4292 (void*)ssv, (void*)dsv);
4293 }
4294 }
4295
4296
4297
4298 SV_CHECK_THINKFIRST_COW_DROP(dsv);
4299 dtype = SvTYPE(dsv); /* THINKFIRST may have changed type */
4300
4301 /* There's a lot of redundancy below but we're going for speed here
4302 * Note: some of the cases below do return; rather than break; so the
4303 * if-elseif-else logic below this switch does not see all cases. */
4304
4305 switch (stype) {
4306 case SVt_NULL:
4307 undef_sstr:
4308 if (LIKELY( dtype != SVt_PVGV && dtype != SVt_PVLV )) {
4309 (void)SvOK_off(dsv);
4310 return;
4311 }
4312 break;
4313 case SVt_IV:
4314 if (SvIOK(ssv)) {
4315 switch (dtype) {
4316 case SVt_NULL:
4317 /* For performance, we inline promoting to type SVt_IV. */
4318 /* We're starting from SVt_NULL, so provided that define is
4319 * actual 0, we don't have to unset any SV type flags
4320 * to promote to SVt_IV. */
4321 STATIC_ASSERT_STMT(SVt_NULL == 0);
4322 SET_SVANY_FOR_BODYLESS_IV(dsv);
4323 SvFLAGS(dsv) |= SVt_IV;
4324 break;
4325 case SVt_NV:
4326 case SVt_PV:
4327 sv_upgrade(dsv, SVt_PVIV);
4328 break;
4329 case SVt_PVGV:
4330 case SVt_PVLV:
4331 goto end_of_first_switch;
4332 }
4333 (void)SvIOK_only(dsv);
4334 SvIV_set(dsv, SvIVX(ssv));
4335 if (SvIsUV(ssv))
4336 SvIsUV_on(dsv);
4337 /* SvTAINTED can only be true if the SV has taint magic, which in
4338 turn means that the SV type is PVMG (or greater). This is the
4339 case statement for SVt_IV, so this cannot be true (whatever gcov
4340 may say). */
4341 assert(!SvTAINTED(ssv));
4342 return;
4343 }
4344 if (!SvROK(ssv))
4345 goto undef_sstr;
4346 if (dtype < SVt_PV && dtype != SVt_IV)
4347 sv_upgrade(dsv, SVt_IV);
4348 break;
4349
4350 case SVt_NV:
4351 if (LIKELY( SvNOK(ssv) )) {
4352 switch (dtype) {
4353 case SVt_NULL:
4354 case SVt_IV:
4355 sv_upgrade(dsv, SVt_NV);
4356 break;
4357 case SVt_PV:
4358 case SVt_PVIV:
4359 sv_upgrade(dsv, SVt_PVNV);
4360 break;
4361 case SVt_PVGV:
4362 case SVt_PVLV:
4363 goto end_of_first_switch;
4364 }
4365 SvNV_set(dsv, SvNVX(ssv));
4366 (void)SvNOK_only(dsv);
4367 /* SvTAINTED can only be true if the SV has taint magic, which in
4368 turn means that the SV type is PVMG (or greater). This is the
4369 case statement for SVt_NV, so this cannot be true (whatever gcov
4370 may say). */
4371 assert(!SvTAINTED(ssv));
4372 return;
4373 }
4374 goto undef_sstr;
4375
4376 case SVt_PV:
4377 if (dtype < SVt_PV)
4378 sv_upgrade(dsv, SVt_PV);
4379 break;
4380 case SVt_PVIV:
4381 if (dtype < SVt_PVIV)
4382 sv_upgrade(dsv, SVt_PVIV);
4383 break;
4384 case SVt_PVNV:
4385 if (dtype < SVt_PVNV)
4386 sv_upgrade(dsv, SVt_PVNV);
4387 break;
4388
4389 case SVt_INVLIST:
4390 invlist_clone(ssv, dsv);
4391 return;
4392 default:
4393 {
4394 const char * const type = sv_reftype(ssv,0);
4395 if (PL_op)
4396 /* diag_listed_as: Bizarre copy of %s */
4397 Perl_croak(aTHX_ "Bizarre copy of %s in %s", type, OP_DESC(PL_op));
4398 else
4399 Perl_croak(aTHX_ "Bizarre copy of %s", type);
4400 }
4401 NOT_REACHED; /* NOTREACHED */
4402
4403 case SVt_REGEXP:
4404 upgregexp:
4405 if (dtype < SVt_REGEXP)
4406 sv_upgrade(dsv, SVt_REGEXP);
4407 break;
4408
4409 case SVt_PVLV:
4410 case SVt_PVGV:
4411 case SVt_PVMG:
4412 if (SvGMAGICAL(ssv) && (flags & SV_GMAGIC)) {
4413 mg_get(ssv);
4414 if (SvTYPE(ssv) != stype)
4415 stype = SvTYPE(ssv);
4416 }
4417 if (isGV_with_GP(ssv) && dtype <= SVt_PVLV) {
4418 glob_assign_glob(dsv, ssv, dtype);
4419 return;
4420 }
4421 if (stype == SVt_PVLV)
4422 {
4423 if (isREGEXP(ssv)) goto upgregexp;
4424 SvUPGRADE(dsv, SVt_PVNV);
4425 }
4426 else
4427 SvUPGRADE(dsv, (svtype)stype);
4428 }
4429 end_of_first_switch:
4430
4431 /* dsv may have been upgraded. */
4432 dtype = SvTYPE(dsv);
4433 sflags = SvFLAGS(ssv);
4434
4435 if (UNLIKELY( dtype == SVt_PVCV )) {
4436 /* Assigning to a subroutine sets the prototype. */
4437 if (SvOK(ssv)) {
4438 STRLEN len;
4439 const char *const ptr = SvPV_const(ssv, len);
4440
4441 SvGROW(dsv, len + 1);
4442 Copy(ptr, SvPVX(dsv), len + 1, char);
4443 SvCUR_set(dsv, len);
4444 SvPOK_only(dsv);
4445 SvFLAGS(dsv) |= sflags & SVf_UTF8;
4446 CvAUTOLOAD_off(dsv);
4447 } else {
4448 SvOK_off(dsv);
4449 }
4450 }
4451 else if (UNLIKELY(dtype == SVt_PVAV || dtype == SVt_PVHV
4452 || dtype == SVt_PVFM))
4453 {
4454 const char * const type = sv_reftype(dsv,0);
4455 if (PL_op)
4456 /* diag_listed_as: Cannot copy to %s */
4457 Perl_croak(aTHX_ "Cannot copy to %s in %s", type, OP_DESC(PL_op));
4458 else
4459 Perl_croak(aTHX_ "Cannot copy to %s", type);
4460 } else if (sflags & SVf_ROK) {
4461 if (isGV_with_GP(dsv)
4462 && SvTYPE(SvRV(ssv)) == SVt_PVGV && isGV_with_GP(SvRV(ssv))) {
4463 ssv = SvRV(ssv);
4464 if (ssv == dsv) {
4465 if (GvIMPORTED(dsv) != GVf_IMPORTED
4466 && CopSTASH_ne(PL_curcop, GvSTASH(dsv)))
4467 {
4468 GvIMPORTED_on(dsv);
4469 }
4470 GvMULTI_on(dsv);
4471 return;
4472 }
4473 glob_assign_glob(dsv, ssv, dtype);
4474 return;
4475 }
4476
4477 if (dtype >= SVt_PV) {
4478 if (isGV_with_GP(dsv)) {
4479 gv_setref(dsv, ssv);
4480 return;
4481 }
4482 if (SvPVX_const(dsv)) {
4483 SvPV_free(dsv);
4484 SvLEN_set(dsv, 0);
4485 SvCUR_set(dsv, 0);
4486 }
4487 }
4488 (void)SvOK_off(dsv);
4489 SvRV_set(dsv, SvREFCNT_inc(SvRV(ssv)));
4490 SvFLAGS(dsv) |= sflags & SVf_ROK;
4491 assert(!(sflags & SVp_NOK));
4492 assert(!(sflags & SVp_IOK));
4493 assert(!(sflags & SVf_NOK));
4494 assert(!(sflags & SVf_IOK));
4495 }
4496 else if (isGV_with_GP(dsv)) {
4497 if (!(sflags & SVf_OK)) {
4498 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4499 "Undefined value assigned to typeglob");
4500 }
4501 else {
4502 GV *gv = gv_fetchsv_nomg(ssv, GV_ADD, SVt_PVGV);
4503 if (dsv != (const SV *)gv) {
4504 const char * const name = GvNAME((const GV *)dsv);
4505 const STRLEN len = GvNAMELEN(dsv);
4506 HV *old_stash = NULL;
4507 bool reset_isa = FALSE;
4508 if ((len > 1 && name[len-2] == ':' && name[len-1] == ':')
4509 || (len == 1 && name[0] == ':')) {
4510 /* Set aside the old stash, so we can reset isa caches
4511 on its subclasses. */
4512 if((old_stash = GvHV(dsv))) {
4513 /* Make sure we do not lose it early. */
4514 SvREFCNT_inc_simple_void_NN(
4515 sv_2mortal((SV *)old_stash)
4516 );
4517 }
4518 reset_isa = TRUE;
4519 }
4520
4521 if (GvGP(dsv)) {
4522 SvREFCNT_inc_simple_void_NN(sv_2mortal(dsv));
4523 gp_free(MUTABLE_GV(dsv));
4524 }
4525 GvGP_set(dsv, gp_ref(GvGP(gv)));
4526
4527 if (reset_isa) {
4528 HV * const stash = GvHV(dsv);
4529 if(
4530 old_stash ? HvHasENAME(old_stash) : cBOOL(stash)
4531 )
4532 mro_package_moved(
4533 stash, old_stash,
4534 (GV *)dsv, 0
4535 );
4536 }
4537 }
4538 }
4539 }
4540 else if ((dtype == SVt_REGEXP || dtype == SVt_PVLV)
4541 && (stype == SVt_REGEXP || isREGEXP(ssv))) {
4542 reg_temp_copy((REGEXP*)dsv, (REGEXP*)ssv);
4543 }
4544 else if (sflags & SVp_POK) {
4545 const STRLEN cur = SvCUR(ssv);
4546 const STRLEN len = SvLEN(ssv);
4547
4548 /*
4549 * We have three basic ways to copy the string:
4550 *
4551 * 1. Swipe
4552 * 2. Copy-on-write
4553 * 3. Actual copy
4554 *
4555 * Which we choose is based on various factors. The following
4556 * things are listed in order of speed, fastest to slowest:
4557 * - Swipe
4558 * - Copying a short string
4559 * - Copy-on-write bookkeeping
4560 * - malloc
4561 * - Copying a long string
4562 *
4563 * We swipe the string (steal the string buffer) if the SV on the
4564 * rhs is about to be freed anyway (TEMP and refcnt==1). This is a
4565 * big win on long strings. It should be a win on short strings if
4566 * SvPVX_const(dsv) has to be allocated. If not, it should not
4567 * slow things down, as SvPVX_const(ssv) would have been freed
4568 * soon anyway.
4569 *
4570 * We also steal the buffer from a PADTMP (operator target) if it
4571 * is ‘long enough’. For short strings, a swipe does not help
4572 * here, as it causes more malloc calls the next time the target
4573 * is used. Benchmarks show that even if SvPVX_const(dsv) has to
4574 * be allocated it is still not worth swiping PADTMPs for short
4575 * strings, as the savings here are small.
4576 *
4577 * If swiping is not an option, then we see whether it is worth using
4578 * copy-on-write. If the lhs already has a buffer big enough and the
4579 * string is short, we skip it and fall back to method 3, since memcpy
4580 * is faster for short strings than the later bookkeeping overhead that
4581 * copy-on-write entails.
4582
4583 * If the rhs is not a copy-on-write string yet, then we also
4584 * consider whether the buffer is too large relative to the string
4585 * it holds. Some operations such as readline allocate a large
4586 * buffer in the expectation of reusing it. But turning such into
4587 * a COW buffer is counter-productive because it increases memory
4588 * usage by making readline allocate a new large buffer the sec-
4589 * ond time round. So, if the buffer is too large, again, we use
4590 * method 3 (copy).
4591 *
4592 * Finally, if there is no buffer on the left, or the buffer is too
4593 * small, then we use copy-on-write and make both SVs share the
4594 * string buffer.
4595 *
4596 */
4597
4598 /* Whichever path we take through the next code, we want this true,
4599 and doing it now facilitates the COW check. */
4600 (void)SvPOK_only(dsv);
4601
4602 if (
4603 ( /* Either ... */
4604 /* slated for free anyway (and not COW)? */
4605 (sflags & (SVs_TEMP|SVf_IsCOW)) == SVs_TEMP
4606 /* or a swipable TARG */
4607 || ((sflags &
4608 (SVs_PADTMP|SVf_READONLY|SVf_PROTECT|SVf_IsCOW))
4609 == SVs_PADTMP
4610 /* whose buffer is worth stealing */
4611 && CHECK_COWBUF_THRESHOLD(cur,len)
4612 )
4613 ) &&
4614 !(sflags & SVf_OOK) && /* and not involved in OOK hack? */
4615 (!(flags & SV_NOSTEAL)) &&
4616 /* and we're allowed to steal temps */
4617 SvREFCNT(ssv) == 1 && /* and no other references to it? */
4618 len) /* and really is a string */
4619 { /* Passes the swipe test. */
4620 if (SvPVX_const(dsv)) /* we know that dtype >= SVt_PV */
4621 SvPV_free(dsv);
4622 SvPV_set(dsv, SvPVX_mutable(ssv));
4623 SvLEN_set(dsv, SvLEN(ssv));
4624 SvCUR_set(dsv, SvCUR(ssv));
4625
4626 SvTEMP_off(dsv);
4627 (void)SvOK_off(ssv); /* NOTE: nukes most SvFLAGS on ssv */
4628 SvPV_set(ssv, NULL);
4629 SvLEN_set(ssv, 0);
4630 SvCUR_set(ssv, 0);
4631 SvTEMP_off(ssv);
4632 }
4633 /* We must check for SvIsCOW_static() even without
4634 * SV_COW_SHARED_HASH_KEYS being set or else we'll break SvIsBOOL()
4635 */
4636 else if (SvIsCOW_static(ssv)) {
4637 if (SvPVX_const(dsv)) { /* we know that dtype >= SVt_PV */
4638 SvPV_free(dsv);
4639 }
4640 SvPV_set(dsv, SvPVX(ssv));
4641 SvLEN_set(dsv, 0);
4642 SvCUR_set(dsv, cur);
4643 SvFLAGS(dsv) |= (SVf_IsCOW|SVppv_STATIC);
4644 }
4645 else if (flags & SV_COW_SHARED_HASH_KEYS
4646 &&
4647 #ifdef PERL_COPY_ON_WRITE
4648 (sflags & SVf_IsCOW
4649 ? (!len ||
4650 ( (CHECK_COWBUF_THRESHOLD(cur,len) || SvLEN(dsv) < cur+1)
4651 /* If this is a regular (non-hek) COW, only so
4652 many COW "copies" are possible. */
4653 && CowREFCNT(ssv) != SV_COW_REFCNT_MAX ))
4654 : ( (sflags & CAN_COW_MASK) == CAN_COW_FLAGS
4655 && !(SvFLAGS(dsv) & SVf_BREAK)
4656 && CHECK_COW_THRESHOLD(cur,len) && cur+1 < len
4657 && (CHECK_COWBUF_THRESHOLD(cur,len) || SvLEN(dsv) < cur+1)
4658 ))
4659 #else
4660 sflags & SVf_IsCOW
4661 && !(SvFLAGS(dsv) & SVf_BREAK)
4662 #endif
4663 ) {
4664 /* Either it's a shared hash key, or it's suitable for
4665 copy-on-write. */
4666 #ifdef DEBUGGING
4667 if (DEBUG_C_TEST) {
4668 PerlIO_printf(Perl_debug_log, "Copy on write: ssv --> dsv\n");
4669 sv_dump(ssv);
4670 sv_dump(dsv);
4671 }
4672 #endif
4673 #ifdef PERL_ANY_COW
4674 if (!(sflags & SVf_IsCOW)) {
4675 SvIsCOW_on(ssv);
4676 CowREFCNT(ssv) = 0;
4677 }
4678 #endif
4679 if (SvPVX_const(dsv)) { /* we know that dtype >= SVt_PV */
4680 SvPV_free(dsv);
4681 }
4682
4683 #ifdef PERL_ANY_COW
4684 if (len) {
4685 if (sflags & SVf_IsCOW) {
4686 sv_buf_to_rw(ssv);
4687 }
4688 CowREFCNT(ssv)++;
4689 SvPV_set(dsv, SvPVX_mutable(ssv));
4690 sv_buf_to_ro(ssv);
4691 } else
4692 #endif
4693 {
4694 /* SvIsCOW_shared_hash */
4695 DEBUG_C(PerlIO_printf(Perl_debug_log,
4696 "Copy on write: Sharing hash\n"));
4697
4698 assert (SvTYPE(dsv) >= SVt_PV);
4699 SvPV_set(dsv,
4700 HEK_KEY(share_hek_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(ssv)))));
4701 }
4702 SvLEN_set(dsv, len);
4703 SvCUR_set(dsv, cur);
4704 SvIsCOW_on(dsv);
4705 } else {
4706 /* Failed the swipe test, and we cannot do copy-on-write either.
4707 Have to copy the string. */
4708 SvGROW(dsv, cur + 1); /* inlined from sv_setpvn */
4709 Move(SvPVX_const(ssv),SvPVX(dsv),cur,char);
4710 SvCUR_set(dsv, cur);
4711 *SvEND(dsv) = '\0';
4712 }
4713 if (sflags & SVp_NOK) {
4714 SvNV_set(dsv, SvNVX(ssv));
4715 if ((sflags & SVf_NOK) && !(sflags & SVf_POK)) {
4716 /* Source was SVf_NOK|SVp_NOK|SVp_POK but not SVf_POK, meaning
4717 a value set as floating point and later stringified, where
4718 the value happens to be one of the few that we know aren't
4719 affected by the numeric locale, hence we can cache the
4720 stringification. Currently that's +Inf, -Inf and NaN, but
4721 conceivably we might extend this to -9 .. +9 (excluding -0).
4722 So mark destination the same: */
4723 SvFLAGS(dsv) &= ~SVf_POK;
4724 }
4725 }
4726 if (sflags & SVp_IOK) {
4727 SvIV_set(dsv, SvIVX(ssv));
4728 if (sflags & SVf_IVisUV)
4729 SvIsUV_on(dsv);
4730 if ((sflags & SVf_IOK) && !(sflags & SVf_POK)) {
4731 /* Source was SVf_IOK|SVp_IOK|SVp_POK but not SVf_POK, meaning
4732 a value set as an integer and later stringified. So mark
4733 destination the same: */
4734 SvFLAGS(dsv) &= ~SVf_POK;
4735 }
4736 }
4737 SvFLAGS(dsv) |= sflags & (SVf_IOK|SVp_IOK|SVf_NOK|SVp_NOK|SVf_UTF8);
4738 {
4739 const MAGIC * const smg = SvVSTRING_mg(ssv);
4740 if (smg) {
4741 sv_magic(dsv, NULL, PERL_MAGIC_vstring,
4742 smg->mg_ptr, smg->mg_len);
4743 SvRMAGICAL_on(dsv);
4744 }
4745 }
4746 }
4747 else if (sflags & (SVp_IOK|SVp_NOK)) {
4748 (void)SvOK_off(dsv);
4749 SvFLAGS(dsv) |= sflags & (SVf_IOK|SVp_IOK|SVf_IVisUV|SVf_NOK|SVp_NOK);
4750 if (sflags & SVp_IOK) {
4751 /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */
4752 SvIV_set(dsv, SvIVX(ssv));
4753 }
4754 if (sflags & SVp_NOK) {
4755 SvNV_set(dsv, SvNVX(ssv));
4756 }
4757 }
4758 else {
4759 if (isGV_with_GP(ssv)) {
4760 gv_efullname3(dsv, MUTABLE_GV(ssv), "*");
4761 }
4762 else
4763 (void)SvOK_off(dsv);
4764 }
4765 if (SvTAINTED(ssv))
4766 SvTAINT(dsv);
4767 }
4768
4769
4770 /*
4771 =for apidoc sv_set_undef
4772
4773 Equivalent to C<sv_setsv(sv, &PL_sv_undef)>, but more efficient.
4774 Doesn't handle set magic.
4775
4776 The perl equivalent is C<$sv = undef;>. Note that it doesn't free any string
4777 buffer, unlike C<undef $sv>.
4778
4779 Introduced in perl 5.25.12.
4780
4781 =cut
4782 */
4783
4784 void
4785 Perl_sv_set_undef(pTHX_ SV *sv)
4786 {
4787 U32 type = SvTYPE(sv);
4788
4789 PERL_ARGS_ASSERT_SV_SET_UNDEF;
4790
4791 /* shortcut, NULL, IV, RV */
4792
4793 if (type <= SVt_IV) {
4794 assert(!SvGMAGICAL(sv));
4795 if (SvREADONLY(sv)) {
4796 /* does undeffing PL_sv_undef count as modifying a read-only
4797 * variable? Some XS code does this */
4798 if (sv == &PL_sv_undef)
4799 return;
4800 Perl_croak_no_modify();
4801 }
4802
4803 if (SvROK(sv)) {
4804 if (SvWEAKREF(sv))
4805 sv_unref_flags(sv, 0);
4806 else {
4807 SV *rv = SvRV(sv);
4808 SvFLAGS(sv) = type; /* quickly turn off all flags */
4809 SvREFCNT_dec_NN(rv);
4810 return;
4811 }
4812 }
4813 SvFLAGS(sv) = type; /* quickly turn off all flags */
4814 return;
4815 }
4816
4817 if (SvIS_FREED(sv))
4818 Perl_croak(aTHX_ "panic: attempt to undefine a freed scalar %p",
4819 (void *)sv);
4820
4821 SV_CHECK_THINKFIRST_COW_DROP(sv);
4822
4823 if (isGV_with_GP(sv))
4824 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4825 "Undefined value assigned to typeglob");
4826 else
4827 SvOK_off(sv);
4828 }
4829
4830 /*
4831 =for apidoc sv_set_true
4832
4833 Equivalent to C<sv_setsv(sv, &PL_sv_yes)>, but may be made more
4834 efficient in the future. Doesn't handle set magic.
4835
4836 The perl equivalent is C<$sv = !0;>.
4837
4838 Introduced in perl 5.35.11.
4839
4840 =cut
4841 */
4842
4843 void
4844 Perl_sv_set_true(pTHX_ SV *sv)
4845 {
4846 PERL_ARGS_ASSERT_SV_SET_TRUE;
4847 sv_setsv(sv, &PL_sv_yes);
4848 }
4849
4850 /*
4851 =for apidoc sv_set_false
4852
4853 Equivalent to C<sv_setsv(sv, &PL_sv_no)>, but may be made more
4854 efficient in the future. Doesn't handle set magic.
4855
4856 The perl equivalent is C<$sv = !1;>.
4857
4858 Introduced in perl 5.35.11.
4859
4860 =cut
4861 */
4862
4863 void
4864 Perl_sv_set_false(pTHX_ SV *sv)
4865 {
4866 PERL_ARGS_ASSERT_SV_SET_FALSE;
4867 sv_setsv(sv, &PL_sv_no);
4868 }
4869
4870 /*
4871 =for apidoc sv_set_bool
4872
4873 Equivalent to C<sv_setsv(sv, bool_val ? &Pl_sv_yes : &PL_sv_no)>, but
4874 may be made more efficient in the future. Doesn't handle set magic.
4875
4876 The perl equivalent is C<$sv = !!$expr;>.
4877
4878 Introduced in perl 5.35.11.
4879
4880 =cut
4881 */
4882
4883 void
4884 Perl_sv_set_bool(pTHX_ SV *sv, const bool bool_val)
4885 {
4886 PERL_ARGS_ASSERT_SV_SET_BOOL;
4887 sv_setsv(sv, bool_val ? &PL_sv_yes : &PL_sv_no);
4888 }
4889
4890
4891 void
4892 Perl_sv_setsv_mg(pTHX_ SV *const dsv, SV *const ssv)
4893 {
4894 PERL_ARGS_ASSERT_SV_SETSV_MG;
4895
4896 sv_setsv(dsv,ssv);
4897 SvSETMAGIC(dsv);
4898 }
4899
4900 #ifdef PERL_ANY_COW
4901 # define SVt_COW SVt_PV
4902 SV *
4903 Perl_sv_setsv_cow(pTHX_ SV *dsv, SV *ssv)
4904 {
4905 STRLEN cur = SvCUR(ssv);
4906 STRLEN len = SvLEN(ssv);
4907 char *new_pv;
4908 U32 new_flags = (SVt_COW|SVf_POK|SVp_POK|SVf_IsCOW);
4909 #if defined(PERL_DEBUG_READONLY_COW) && defined(PERL_COPY_ON_WRITE)
4910 const bool already = cBOOL(SvIsCOW(ssv));
4911 #endif
4912
4913 PERL_ARGS_ASSERT_SV_SETSV_COW;
4914 #ifdef DEBUGGING
4915 if (DEBUG_C_TEST) {
4916 PerlIO_printf(Perl_debug_log, "Fast copy on write: %p -> %p\n",
4917 (void*)ssv, (void*)dsv);
4918 sv_dump(ssv);
4919 if (dsv)
4920 sv_dump(dsv);
4921 }
4922 #endif
4923 if (dsv) {
4924 if (SvTHINKFIRST(dsv))
4925 sv_force_normal_flags(dsv, SV_COW_DROP_PV);
4926 else if (SvPVX_const(dsv))
4927 Safefree(SvPVX_mutable(dsv));
4928 SvUPGRADE(dsv, SVt_COW);
4929 }
4930 else
4931 dsv = newSV_type(SVt_COW);
4932
4933 assert (SvPOK(ssv));
4934 assert (SvPOKp(ssv));
4935
4936 if (SvIsCOW(ssv)) {
4937 if (SvIsCOW_shared_hash(ssv)) {
4938 /* source is a COW shared hash key. */
4939 DEBUG_C(PerlIO_printf(Perl_debug_log,
4940 "Fast copy on write: Sharing hash\n"));
4941 new_pv = HEK_KEY(share_hek_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(ssv))));
4942 goto common_exit;
4943 }
4944 else if (SvIsCOW_static(ssv)) {
4945 /* source is static constant; preserve this */
4946 new_pv = SvPVX(ssv);
4947 new_flags |= SVppv_STATIC;
4948 goto common_exit;
4949 }
4950 assert(SvCUR(ssv)+1 < SvLEN(ssv));
4951 assert(CowREFCNT(ssv) < SV_COW_REFCNT_MAX);
4952 } else {
4953 assert ((SvFLAGS(ssv) & CAN_COW_MASK) == CAN_COW_FLAGS);
4954 SvUPGRADE(ssv, SVt_COW);
4955 SvIsCOW_on(ssv);
4956 DEBUG_C(PerlIO_printf(Perl_debug_log,
4957 "Fast copy on write: Converting ssv to COW\n"));
4958 CowREFCNT(ssv) = 0;
4959 }
4960 # ifdef PERL_DEBUG_READONLY_COW
4961 if (already) sv_buf_to_rw(ssv);
4962 # endif
4963 CowREFCNT(ssv)++;
4964 new_pv = SvPVX_mutable(ssv);
4965 sv_buf_to_ro(ssv);
4966
4967 common_exit:
4968 SvPV_set(dsv, new_pv);
4969 SvFLAGS(dsv) = new_flags;
4970 if (SvUTF8(ssv))
4971 SvUTF8_on(dsv);
4972 SvLEN_set(dsv, len);
4973 SvCUR_set(dsv, cur);
4974 #ifdef DEBUGGING
4975 if (DEBUG_C_TEST)
4976 sv_dump(dsv);
4977 #endif
4978 return dsv;
4979 }
4980 #endif
4981
4982 /*
4983 =for apidoc sv_setpv_bufsize
4984
4985 Sets the SV to be a string of C<cur> bytes length, with at least
4986 C<len> bytes available. Ensures that there is a null byte at C<SvEND>.
4987
4988 Returns a char * pointer to the SvPV buffer.
4989
4990 The caller must set the first C<cur> bytes of C<sv> before the first use of its
4991 contents. This means that if C<cur> is zero, the SV is immediately fully
4992 formed and ready to use, just like any other SV containing an empty string.
4993
4994 =cut
4995 */
4996
4997 char *
4998 Perl_sv_setpv_bufsize(pTHX_ SV *const sv, const STRLEN cur, const STRLEN len)
4999 {
5000 char *pv;
5001
5002 PERL_ARGS_ASSERT_SV_SETPV_BUFSIZE;
5003
5004 SV_CHECK_THINKFIRST_COW_DROP(sv);
5005 SvUPGRADE(sv, SVt_PV);
5006 pv = SvGROW(sv, len + 1);
5007 SvCUR_set(sv, cur);
5008 *(SvEND(sv))= '\0';
5009 (void)SvPOK_only_UTF8(sv); /* validate pointer */
5010
5011 SvTAINT(sv);
5012 if (SvTYPE(sv) == SVt_PVCV) CvAUTOLOAD_off(sv);
5013 return pv;
5014 }
5015
5016 /*
5017 =for apidoc sv_setpv
5018 =for apidoc_item sv_setpv_mg
5019 =for apidoc_item sv_setpvn
5020 =for apidoc_item sv_setpvn_fresh
5021 =for apidoc_item sv_setpvn_mg
5022 =for apidoc_item |void|sv_setpvs|SV* sv|"literal string"
5023 =for apidoc_item |void|sv_setpvs_mg|SV* sv|"literal string"
5024
5025 These copy a string into the SV C<sv>, making sure it is C<L</SvPOK_only>>.
5026
5027 In the C<pvs> forms, the string must be a C literal string, enclosed in double
5028 quotes.
5029
5030 In the C<pvn> forms, the first byte of the string is pointed to by C<ptr>, and
5031 C<len> indicates the number of bytes to be copied, potentially including
5032 embedded C<NUL> characters.
5033
5034 In the plain C<pv> forms, C<ptr> points to a NUL-terminated C string. That is,
5035 it points to the first byte of the string, and the copy proceeds up through the
5036 first encountered C<NUL> byte.
5037
5038 In the forms that take a C<ptr> argument, if it is NULL, the SV will become
5039 undefined.
5040
5041 B<The UTF-8 flag is not changed by these functions.>
5042
5043 A terminating NUL byte is guaranteed in the result.
5044
5045 The C<_mg> forms handle 'set' magic; the other forms skip all magic.
5046
5047 C<sv_setpvn_fresh> is a cut-down alternative to C<sv_setpvn>, intended ONLY
5048 to be used with a fresh sv that has been upgraded to a SVt_PV, SVt_PVIV,
5049 SVt_PVNV, or SVt_PVMG.
5050
5051 =cut
5052 */
5053
5054 void
5055 Perl_sv_setpvn(pTHX_ SV *const sv, const char *const ptr, const STRLEN len)
5056 {
5057 char *dptr;
5058
5059 PERL_ARGS_ASSERT_SV_SETPVN;
5060
5061 SV_CHECK_THINKFIRST_COW_DROP(sv);
5062 if (isGV_with_GP(sv))
5063 Perl_croak_no_modify();
5064 if (!ptr) {
5065 (void)SvOK_off(sv);
5066 return;
5067 }
5068 else {
5069 /* len is STRLEN which is unsigned, need to copy to signed */
5070 const IV iv = len;
5071 if (iv < 0)
5072 Perl_croak(aTHX_ "panic: sv_setpvn called with negative strlen %"
5073 IVdf, iv);
5074 }
5075 SvUPGRADE(sv, SVt_PV);
5076
5077 dptr = SvGROW(sv, len + 1);
5078 Move(ptr,dptr,len,char);
5079 dptr[len] = '\0';
5080 SvCUR_set(sv, len);
5081 (void)SvPOK_only_UTF8(sv); /* validate pointer */
5082 SvTAINT(sv);
5083 if (SvTYPE(sv) == SVt_PVCV) CvAUTOLOAD_off(sv);
5084 }
5085
5086 void
5087 Perl_sv_setpvn_mg(pTHX_ SV *const sv, const char *const ptr, const STRLEN len)
5088 {
5089 PERL_ARGS_ASSERT_SV_SETPVN_MG;
5090
5091 sv_setpvn(sv,ptr,len);
5092 SvSETMAGIC(sv);
5093 }
5094
5095 void
5096 Perl_sv_setpvn_fresh(pTHX_ SV *const sv, const char *const ptr, const STRLEN len)
5097 {
5098 char *dptr;
5099
5100 PERL_ARGS_ASSERT_SV_SETPVN_FRESH;
5101 assert(SvTYPE(sv) >= SVt_PV && SvTYPE(sv) <= SVt_PVMG);
5102 assert(!SvTHINKFIRST(sv));
5103 assert(!isGV_with_GP(sv));
5104
5105 if (ptr) {
5106 const IV iv = len;
5107 /* len is STRLEN which is unsigned, need to copy to signed */
5108 if (iv < 0)
5109 Perl_croak(aTHX_ "panic: sv_setpvn_fresh called with negative strlen %"
5110 IVdf, iv);
5111
5112 dptr = sv_grow_fresh(sv, len + 1);
5113 Move(ptr,dptr,len,char);
5114 dptr[len] = '\0';
5115 SvCUR_set(sv, len);
5116 SvPOK_on(sv);
5117 SvTAINT(sv);
5118 }
5119 }
5120
5121 void
5122 Perl_sv_setpv(pTHX_ SV *const sv, const char *const ptr)
5123 {
5124 STRLEN len;
5125
5126 PERL_ARGS_ASSERT_SV_SETPV;
5127
5128 SV_CHECK_THINKFIRST_COW_DROP(sv);
5129 if (!ptr) {
5130 (void)SvOK_off(sv);
5131 return;
5132 }
5133 len = strlen(ptr);
5134 SvUPGRADE(sv, SVt_PV);
5135
5136 SvGROW(sv, len + 1);
5137 Move(ptr,SvPVX(sv),len+1,char);
5138 SvCUR_set(sv, len);
5139 (void)SvPOK_only_UTF8(sv); /* validate pointer */
5140 SvTAINT(sv);
5141 if (SvTYPE(sv) == SVt_PVCV) CvAUTOLOAD_off(sv);
5142 }
5143
5144 void
5145 Perl_sv_setpv_mg(pTHX_ SV *const sv, const char *const ptr)
5146 {
5147 PERL_ARGS_ASSERT_SV_SETPV_MG;
5148
5149 sv_setpv(sv,ptr);
5150 SvSETMAGIC(sv);
5151 }
5152
5153 void
5154 Perl_sv_sethek(pTHX_ SV *const sv, const HEK *const hek)
5155 {
5156 PERL_ARGS_ASSERT_SV_SETHEK;
5157
5158 if (!hek) {
5159 return;
5160 }
5161
5162 if (HEK_LEN(hek) == HEf_SVKEY) {
5163 sv_setsv(sv, *(SV**)HEK_KEY(hek));
5164 return;
5165 } else {
5166 const int flags = HEK_FLAGS(hek);
5167 if (flags & HVhek_WASUTF8) {
5168 STRLEN utf8_len = HEK_LEN(hek);
5169 char *as_utf8 = (char *)bytes_to_utf8((U8*)HEK_KEY(hek), &utf8_len);
5170 sv_usepvn_flags(sv, as_utf8, utf8_len, SV_HAS_TRAILING_NUL);
5171 SvUTF8_on(sv);
5172 return;
5173 } else if (flags & HVhek_NOTSHARED) {
5174 sv_setpvn(sv, HEK_KEY(hek), HEK_LEN(hek));
5175 if (HEK_UTF8(hek))
5176 SvUTF8_on(sv);
5177 else SvUTF8_off(sv);
5178 return;
5179 }
5180 {
5181 SV_CHECK_THINKFIRST_COW_DROP(sv);
5182 SvUPGRADE(sv, SVt_PV);
5183 SvPV_free(sv);
5184 SvPV_set(sv,(char *)HEK_KEY(share_hek_hek(hek)));
5185 SvCUR_set(sv, HEK_LEN(hek));
5186 SvLEN_set(sv, 0);
5187 SvIsCOW_on(sv);
5188 SvPOK_on(sv);
5189 if (HEK_UTF8(hek))
5190 SvUTF8_on(sv);
5191 else SvUTF8_off(sv);
5192 return;
5193 }
5194 }
5195 }
5196
5197
5198 /*
5199 =for apidoc sv_usepvn
5200 =for apidoc_item sv_usepvn_flags
5201 =for apidoc_item sv_usepvn_mg
5202
5203 These tell an SV to use C<ptr> for its string value. Normally SVs have
5204 their string stored inside the SV, but these tell the SV to use an
5205 external string instead.
5206
5207 C<ptr> should point to memory that was allocated
5208 by L</C<Newx>>. It must be
5209 the start of a C<Newx>-ed block of memory, and not a pointer to the
5210 middle of it (beware of L<C<OOK>|perlguts/Offsets> and copy-on-write),
5211 and not be from a non-C<Newx> memory allocator like C<malloc>. The
5212 string length, C<len>, must be supplied. By default this function
5213 will L</C<Renew>> (i.e. realloc, move) the memory pointed to by C<ptr>,
5214 so that the pointer should not be freed or used by the programmer after giving
5215 it to C<sv_usepvn>, and neither should any pointers from "behind" that pointer
5216 (I<e.g.>, S<C<ptr> + 1>) be used.
5217
5218 In the C<sv_usepvn_flags> form, if S<C<flags & SV_SMAGIC>> is true,
5219 C<SvSETMAGIC> is called before returning.
5220 And if S<C<flags & SV_HAS_TRAILING_NUL>> is true, then C<ptr[len]> must be
5221 C<NUL>, and the realloc will be skipped (I<i.e.>, the buffer is actually at
5222 least 1 byte longer than C<len>, and already meets the requirements for storing
5223 in C<SvPVX>).
5224
5225 C<sv_usepvn> is merely C<sv_usepvn_flags> with C<flags> set to 0, so 'set'
5226 magic is skipped.
5227
5228 C<sv_usepvn_mg> is merely C<sv_usepvn_flags> with C<flags> set to C<SV_SMAGIC>,
5229 so 'set' magic is performed.
5230
5231 =for apidoc Amnh||SV_SMAGIC
5232 =for apidoc Amnh||SV_HAS_TRAILING_NUL
5233
5234 =cut
5235 */
5236
5237 void
5238 Perl_sv_usepvn_flags(pTHX_ SV *const sv, char *ptr, const STRLEN len, const U32 flags)
5239 {
5240 STRLEN allocate;
5241
5242 PERL_ARGS_ASSERT_SV_USEPVN_FLAGS;
5243
5244 SV_CHECK_THINKFIRST_COW_DROP(sv);
5245 SvUPGRADE(sv, SVt_PV);
5246 if (!ptr) {
5247 (void)SvOK_off(sv);
5248 if (flags & SV_SMAGIC)
5249 SvSETMAGIC(sv);
5250 return;
5251 }
5252 if (SvPVX_const(sv))
5253 SvPV_free(sv);
5254
5255 #ifdef DEBUGGING
5256 if (flags & SV_HAS_TRAILING_NUL)
5257 assert(ptr[len] == '\0');
5258 #endif
5259
5260 allocate = (flags & SV_HAS_TRAILING_NUL)
5261 ? len + 1 :
5262 #ifdef Perl_safesysmalloc_size
5263 len + 1;
5264 #else
5265 PERL_STRLEN_ROUNDUP(len + 1);
5266 #endif
5267 if (flags & SV_HAS_TRAILING_NUL) {
5268 /* It's long enough - do nothing.
5269 Specifically Perl_newCONSTSUB is relying on this. */
5270 } else {
5271 #ifdef DEBUGGING
5272 /* Force a move to shake out bugs in callers. */
5273 char *new_ptr = (char*)safemalloc(allocate);
5274 Copy(ptr, new_ptr, len, char);
5275 PoisonFree(ptr,len,char);
5276 Safefree(ptr);
5277 ptr = new_ptr;
5278 #else
5279 ptr = (char*) saferealloc (ptr, allocate);
5280 #endif
5281 }
5282 #ifdef Perl_safesysmalloc_size
5283 SvLEN_set(sv, Perl_safesysmalloc_size(ptr));
5284 #else
5285 SvLEN_set(sv, allocate);
5286 #endif
5287 SvCUR_set(sv, len);
5288 SvPV_set(sv, ptr);
5289 if (!(flags & SV_HAS_TRAILING_NUL)) {
5290 ptr[len] = '\0';
5291 }
5292 (void)SvPOK_only_UTF8(sv); /* validate pointer */
5293 SvTAINT(sv);
5294 if (flags & SV_SMAGIC)
5295 SvSETMAGIC(sv);
5296 }
5297
5298
5299 static void
5300 S_sv_uncow(pTHX_ SV * const sv, const U32 flags)
5301 {
5302 assert(SvIsCOW(sv));
5303 {
5304 #ifdef PERL_ANY_COW
5305 const char * const pvx = SvPVX_const(sv);
5306 const STRLEN len = SvLEN(sv);
5307 const STRLEN cur = SvCUR(sv);
5308 const bool was_shared_hek = SvIsCOW_shared_hash(sv);
5309
5310 #ifdef DEBUGGING
5311 if (DEBUG_C_TEST) {
5312 PerlIO_printf(Perl_debug_log,
5313 "Copy on write: Force normal %ld\n",
5314 (long) flags);
5315 sv_dump(sv);
5316 }
5317 #endif
5318 SvIsCOW_off(sv);
5319 # ifdef PERL_COPY_ON_WRITE
5320 if (len) {
5321 /* Must do this first, since the CowREFCNT uses SvPVX and
5322 we need to write to CowREFCNT, or de-RO the whole buffer if we are
5323 the only owner left of the buffer. */
5324 sv_buf_to_rw(sv); /* NOOP if RO-ing not supported */
5325 {
5326 U8 cowrefcnt = CowREFCNT(sv);
5327 if(cowrefcnt != 0) {
5328 cowrefcnt--;
5329 CowREFCNT(sv) = cowrefcnt;
5330 sv_buf_to_ro(sv);
5331 goto copy_over;
5332 }
5333 }
5334 /* Else we are the only owner of the buffer. */
5335 }
5336 else
5337 # endif
5338 {
5339 /* This SV doesn't own the buffer, so need to Newx() a new one: */
5340 copy_over:
5341 SvPV_set(sv, NULL);
5342 SvCUR_set(sv, 0);
5343 SvLEN_set(sv, 0);
5344 if (flags & SV_COW_DROP_PV) {
5345 /* OK, so we don't need to copy our buffer. */
5346 SvPOK_off(sv);
5347 } else {
5348 SvGROW(sv, cur + 1);
5349 Move(pvx,SvPVX(sv),cur,char);
5350 SvCUR_set(sv, cur);
5351 *SvEND(sv) = '\0';
5352 }
5353 if (was_shared_hek) {
5354 unshare_hek(SvSHARED_HEK_FROM_PV(pvx));
5355 }
5356 #ifdef DEBUGGING
5357 if (DEBUG_C_TEST)
5358 sv_dump(sv);
5359 #endif
5360 }
5361 #else
5362 const char * const pvx = SvPVX_const(sv);
5363 const STRLEN len = SvCUR(sv);
5364 SvIsCOW_off(sv);
5365 SvPV_set(sv, NULL);
5366 SvLEN_set(sv, 0);
5367 if (flags & SV_COW_DROP_PV) {
5368 /* OK, so we don't need to copy our buffer. */
5369 SvPOK_off(sv);
5370 } else {
5371 SvGROW(sv, len + 1);
5372 Move(pvx,SvPVX(sv),len,char);
5373 *SvEND(sv) = '\0';
5374 }
5375 unshare_hek(SvSHARED_HEK_FROM_PV(pvx));
5376 #endif
5377 }
5378 }
5379
5380
5381 /*
5382 =for apidoc sv_force_normal_flags
5383
5384 Undo various types of fakery on an SV, where fakery means
5385 "more than" a string: if the PV is a shared string, make
5386 a private copy; if we're a ref, stop refing; if we're a glob, downgrade to
5387 an C<xpvmg>; if we're a copy-on-write scalar, this is the on-write time when
5388 we do the copy, and is also used locally; if this is a
5389 vstring, drop the vstring magic. If C<SV_COW_DROP_PV> is set
5390 then a copy-on-write scalar drops its PV buffer (if any) and becomes
5391 C<SvPOK_off> rather than making a copy. (Used where this
5392 scalar is about to be set to some other value.) In addition,
5393 the C<flags> parameter gets passed to C<sv_unref_flags()>
5394 when unreffing. C<sv_force_normal> calls this function
5395 with flags set to 0.
5396
5397 This function is expected to be used to signal to perl that this SV is
5398 about to be written to, and any extra book-keeping needs to be taken care
5399 of. Hence, it croaks on read-only values.
5400
5401 =for apidoc Amnh||SV_COW_DROP_PV
5402
5403 =cut
5404 */
5405
5406 void
5407 Perl_sv_force_normal_flags(pTHX_ SV *const sv, const U32 flags)
5408 {
5409 PERL_ARGS_ASSERT_SV_FORCE_NORMAL_FLAGS;
5410
5411 if (SvREADONLY(sv))
5412 Perl_croak_no_modify();
5413 else if (SvIsCOW(sv) && LIKELY(SvTYPE(sv) != SVt_PVHV))
5414 S_sv_uncow(aTHX_ sv, flags);
5415 if (SvROK(sv))
5416 sv_unref_flags(sv, flags);
5417 else if (SvFAKE(sv) && isGV_with_GP(sv))
5418 sv_unglob(sv, flags);
5419 else if (SvFAKE(sv) && isREGEXP(sv)) {
5420 /* Need to downgrade the REGEXP to a simple(r) scalar. This is analogous
5421 to sv_unglob. We only need it here, so inline it. */
5422 const bool islv = SvTYPE(sv) == SVt_PVLV;
5423 const svtype new_type =
5424 islv ? SVt_NULL : SvMAGIC(sv) || SvSTASH(sv) ? SVt_PVMG : SVt_PV;
5425 SV *const temp = newSV_type(new_type);
5426 regexp *old_rx_body;
5427
5428 if (new_type == SVt_PVMG) {
5429 SvMAGIC_set(temp, SvMAGIC(sv));
5430 SvMAGIC_set(sv, NULL);
5431 SvSTASH_set(temp, SvSTASH(sv));
5432 SvSTASH_set(sv, NULL);
5433 }
5434 if (!islv)
5435 SvCUR_set(temp, SvCUR(sv));
5436 /* Remember that SvPVX is in the head, not the body. */
5437 assert(ReANY((REGEXP *)sv)->mother_re);
5438
5439 if (islv) {
5440 /* LV-as-regex has sv->sv_any pointing to an XPVLV body,
5441 * whose xpvlenu_rx field points to the regex body */
5442 XPV *xpv = (XPV*)(SvANY(sv));
5443 old_rx_body = xpv->xpv_len_u.xpvlenu_rx;
5444 xpv->xpv_len_u.xpvlenu_rx = NULL;
5445 }
5446 else
5447 old_rx_body = ReANY((REGEXP *)sv);
5448
5449 /* Their buffer is already owned by someone else. */
5450 if (flags & SV_COW_DROP_PV) {
5451 /* SvLEN is already 0. For SVt_REGEXP, we have a brand new
5452 zeroed body. For SVt_PVLV, we zeroed it above (len field
5453 a union with xpvlenu_rx) */
5454 assert(!SvLEN(islv ? sv : temp));
5455 sv->sv_u.svu_pv = 0;
5456 }
5457 else {
5458 sv->sv_u.svu_pv = savepvn(RX_WRAPPED((REGEXP *)sv), SvCUR(sv));
5459 SvLEN_set(islv ? sv : temp, SvCUR(sv)+1);
5460 SvPOK_on(sv);
5461 }
5462
5463 /* Now swap the rest of the bodies. */
5464
5465 SvFAKE_off(sv);
5466 if (!islv) {
5467 SvFLAGS(sv) &= ~SVTYPEMASK;
5468 SvFLAGS(sv) |= new_type;
5469 SvANY(sv) = SvANY(temp);
5470 }
5471
5472 SvFLAGS(temp) &= ~(SVTYPEMASK);
5473 SvFLAGS(temp) |= SVt_REGEXP|SVf_FAKE;
5474 SvANY(temp) = old_rx_body;
5475
5476 /* temp is now rebuilt as a correctly structured SVt_REGEXP, so this
5477 * will trigger a call to sv_clear() which will correctly free the
5478 * body. */
5479 SvREFCNT_dec_NN(temp);
5480 }
5481 else if (SvVOK(sv)) sv_unmagic(sv, PERL_MAGIC_vstring);
5482 }
5483
5484 /*
5485 =for apidoc sv_chop
5486
5487 Efficient removal of characters from the beginning of the string buffer.
5488 C<SvPOK(sv)>, or at least C<SvPOKp(sv)>, must be true and C<ptr> must be a
5489 pointer to somewhere inside the string buffer. C<ptr> becomes the first
5490 character of the adjusted string. Uses the C<OOK> hack. On return, only
5491 C<SvPOK(sv)> and C<SvPOKp(sv)> among the C<OK> flags will be true.
5492
5493 Beware: after this function returns, C<ptr> and SvPVX_const(sv) may no longer
5494 refer to the same chunk of data.
5495
5496 The unfortunate similarity of this function's name to that of Perl's C<chop>
5497 operator is strictly coincidental. This function works from the left;
5498 C<chop> works from the right.
5499
5500 =cut
5501 */
5502
5503 void
5504 Perl_sv_chop(pTHX_ SV *const sv, const char *const ptr)
5505 {
5506 STRLEN delta;
5507 STRLEN old_delta;
5508 U8 *p;
5509 #ifdef DEBUGGING
5510 const U8 *evacp;
5511 STRLEN evacn;
5512 #endif
5513 STRLEN max_delta;
5514
5515 PERL_ARGS_ASSERT_SV_CHOP;
5516
5517 if (!ptr || !SvPOKp(sv))
5518 return;
5519 delta = ptr - SvPVX_const(sv);
5520 if (!delta) {
5521 /* Nothing to do. */
5522 return;
5523 }
5524 max_delta = SvLEN(sv) ? SvLEN(sv) : SvCUR(sv);
5525 if (delta > max_delta)
5526 Perl_croak(aTHX_ "panic: sv_chop ptr=%p, start=%p, end=%p",
5527 ptr, SvPVX_const(sv), SvPVX_const(sv) + max_delta);
5528 /* SvPVX(sv) may move in SV_CHECK_THINKFIRST(sv), so don't use ptr any more */
5529 SV_CHECK_THINKFIRST(sv);
5530 SvPOK_only_UTF8(sv);
5531
5532 if (!SvOOK(sv)) {
5533 if (!SvLEN(sv)) { /* make copy of shared string */
5534 const char *pvx = SvPVX_const(sv);
5535 const STRLEN len = SvCUR(sv);
5536 SvGROW(sv, len + 1);
5537 Move(pvx,SvPVX(sv),len,char);
5538 *SvEND(sv) = '\0';
5539 }
5540 SvOOK_on(sv);
5541 old_delta = 0;
5542 } else {
5543 SvOOK_offset(sv, old_delta);
5544 }
5545 SvLEN_set(sv, SvLEN(sv) - delta);
5546 SvCUR_set(sv, SvCUR(sv) - delta);
5547 SvPV_set(sv, SvPVX(sv) + delta);
5548
5549 p = (U8 *)SvPVX_const(sv);
5550
5551 #ifdef DEBUGGING
5552 /* how many bytes were evacuated? we will fill them with sentinel
5553 bytes, except for the part holding the new offset of course. */
5554 evacn = delta;
5555 if (old_delta)
5556 evacn += (old_delta < 0x100 ? 1 : 1 + sizeof(STRLEN));
5557 assert(evacn);
5558 assert(evacn <= delta + old_delta);
5559 evacp = p - evacn;
5560 #endif
5561
5562 /* This sets 'delta' to the accumulated value of all deltas so far */
5563 delta += old_delta;
5564 assert(delta);
5565
5566 /* If 'delta' fits in a byte, store it just prior to the new beginning of
5567 * the string; otherwise store a 0 byte there and store 'delta' just prior
5568 * to that, using as many bytes as a STRLEN occupies. Thus it overwrites a
5569 * portion of the chopped part of the string */
5570 if (delta < 0x100) {
5571 *--p = (U8) delta;
5572 } else {
5573 *--p = 0;
5574 p -= sizeof(STRLEN);
5575 Copy((U8*)&delta, p, sizeof(STRLEN), U8);
5576 }
5577
5578 #ifdef DEBUGGING
5579 /* Fill the preceding buffer with sentinals to verify that no-one is
5580 using it. */
5581 while (p > evacp) {
5582 --p;
5583 *p = (U8)PTR2UV(p);
5584 }
5585 #endif
5586 }
5587
5588 /*
5589 =for apidoc sv_catpvn
5590 =for apidoc_item sv_catpvn_flags
5591 =for apidoc_item sv_catpvn_mg
5592 =for apidoc_item sv_catpvn_nomg
5593
5594 These concatenate the C<len> bytes of the string beginning at C<ptr> onto the
5595 end of the string which is in C<dsv>. The caller must make sure C<ptr>
5596 contains at least C<len> bytes.
5597
5598 For all but C<sv_catpvn_flags>, the string appended is assumed to be valid
5599 UTF-8 if the SV has the UTF-8 status set, and a string of bytes otherwise.
5600
5601 They differ in that:
5602
5603 C<sv_catpvn_mg> performs both 'get' and 'set' magic on C<dsv>.
5604
5605 C<sv_catpvn> performs only 'get' magic.
5606
5607 C<sv_catpvn_nomg> skips all magic.
5608
5609 C<sv_catpvn_flags> has an extra C<flags> parameter which allows you to specify
5610 any combination of magic handling (using C<SV_GMAGIC> and/or C<SV_SMAGIC>) and
5611 to also override the UTF-8 handling. By supplying the C<SV_CATBYTES> flag, the
5612 appended string is interpreted as plain bytes; by supplying instead the
5613 C<SV_CATUTF8> flag, it will be interpreted as UTF-8, and the C<dsv> will be
5614 upgraded to UTF-8 if necessary.
5615
5616 C<sv_catpvn>, C<sv_catpvn_mg>, and C<sv_catpvn_nomg> are implemented
5617 in terms of C<sv_catpvn_flags>.
5618
5619 =for apidoc Amnh||SV_CATUTF8
5620 =for apidoc Amnh||SV_CATBYTES
5621
5622 =cut
5623 */
5624
5625 void
5626 Perl_sv_catpvn_flags(pTHX_ SV *const dsv, const char *sstr, const STRLEN slen, const I32 flags)
5627 {
5628 STRLEN dlen;
5629 const char * const dstr = SvPV_force_flags(dsv, dlen, flags);
5630
5631 PERL_ARGS_ASSERT_SV_CATPVN_FLAGS;
5632 assert((flags & (SV_CATBYTES|SV_CATUTF8)) != (SV_CATBYTES|SV_CATUTF8));
5633
5634 if (!(flags & SV_CATBYTES) || !SvUTF8(dsv)) {
5635 if (flags & SV_CATUTF8 && !SvUTF8(dsv)) {
5636 sv_utf8_upgrade_flags_grow(dsv, 0, slen + 1);
5637 dlen = SvCUR(dsv);
5638 }
5639 else SvGROW(dsv, dlen + slen + 3);
5640 if (sstr == dstr)
5641 sstr = SvPVX_const(dsv);
5642 Move(sstr, SvPVX(dsv) + dlen, slen, char);
5643 SvCUR_set(dsv, SvCUR(dsv) + slen);
5644 }
5645 else {
5646 /* We inline bytes_to_utf8, to avoid an extra malloc. */
5647 const char * const send = sstr + slen;
5648 U8 *d;
5649
5650 /* Something this code does not account for, which I think is
5651 impossible; it would require the same pv to be treated as
5652 bytes *and* utf8, which would indicate a bug elsewhere. */
5653 assert(sstr != dstr);
5654
5655 SvGROW(dsv, dlen + slen * 2 + 3);
5656 d = (U8 *)SvPVX(dsv) + dlen;
5657
5658 while (sstr < send) {
5659 append_utf8_from_native_byte(*sstr, &d);
5660 sstr++;
5661 }
5662 SvCUR_set(dsv, d-(const U8 *)SvPVX(dsv));
5663 }
5664 *SvEND(dsv) = '\0';
5665 (void)SvPOK_only_UTF8(dsv); /* validate pointer */
5666 SvTAINT(dsv);
5667 if (flags & SV_SMAGIC)
5668 SvSETMAGIC(dsv);
5669 }
5670
5671 /*
5672 =for apidoc sv_catsv
5673 =for apidoc_item sv_catsv_flags
5674 =for apidoc_item sv_catsv_mg
5675 =for apidoc_item sv_catsv_nomg
5676
5677 These concatenate the string from SV C<sstr> onto the end of the string in SV
5678 C<dsv>. If C<sstr> is null, these are no-ops; otherwise only C<dsv> is
5679 modified.
5680
5681 They differ only in what magic they perform:
5682
5683 C<sv_catsv_mg> performs 'get' magic on both SVs before the copy, and 'set' magic
5684 on C<dsv> afterwards.
5685
5686 C<sv_catsv> performs just 'get' magic, on both SVs.
5687
5688 C<sv_catsv_nomg> skips all magic.
5689
5690 C<sv_catsv_flags> has an extra C<flags> parameter which allows you to use
5691 C<SV_GMAGIC> and/or C<SV_SMAGIC> to specify any combination of magic handling
5692 (although either both or neither SV will have 'get' magic applied to it.)
5693
5694 C<sv_catsv>, C<sv_catsv_mg>, and C<sv_catsv_nomg> are implemented
5695 in terms of C<sv_catsv_flags>.
5696
5697 =cut */
5698
5699 void
5700 Perl_sv_catsv_flags(pTHX_ SV *const dsv, SV *const sstr, const I32 flags)
5701 {
5702 PERL_ARGS_ASSERT_SV_CATSV_FLAGS;
5703
5704 if (sstr) {
5705 STRLEN slen;
5706 const char *spv = SvPV_flags_const(sstr, slen, flags);
5707 if (flags & SV_GMAGIC)
5708 SvGETMAGIC(dsv);
5709 sv_catpvn_flags(dsv, spv, slen,
5710 DO_UTF8(sstr) ? SV_CATUTF8 : SV_CATBYTES);
5711 if (flags & SV_SMAGIC)
5712 SvSETMAGIC(dsv);
5713 }
5714 }
5715
5716 /*
5717 =for apidoc sv_catpv
5718 =for apidoc_item sv_catpv_flags
5719 =for apidoc_item sv_catpv_mg
5720 =for apidoc_item sv_catpv_nomg
5721
5722 These concatenate the C<NUL>-terminated string C<sstr> onto the end of the
5723 string which is in the SV.
5724 If the SV has the UTF-8 status set, then the bytes appended should be
5725 valid UTF-8.
5726
5727 They differ only in how they handle magic:
5728
5729 C<sv_catpv_mg> performs both 'get' and 'set' magic.
5730
5731 C<sv_catpv> performs only 'get' magic.
5732
5733 C<sv_catpv_nomg> skips all magic.
5734
5735 C<sv_catpv_flags> has an extra C<flags> parameter which allows you to specify
5736 any combination of magic handling (using C<SV_GMAGIC> and/or C<SV_SMAGIC>), and
5737 to also override the UTF-8 handling. By supplying the C<SV_CATUTF8> flag, the
5738 appended string is forced to be interpreted as UTF-8; by supplying instead the
5739 C<SV_CATBYTES> flag, it will be interpreted as just bytes. Either the SV or
5740 the string appended will be upgraded to UTF-8 if necessary.
5741
5742 =cut
5743 */
5744
5745 void
5746 Perl_sv_catpv(pTHX_ SV *const dsv, const char *sstr)
5747 {
5748 STRLEN len;
5749 STRLEN tlen;
5750 char *junk;
5751
5752 PERL_ARGS_ASSERT_SV_CATPV;
5753
5754 if (!sstr)
5755 return;
5756 junk = SvPV_force(dsv, tlen);
5757 len = strlen(sstr);
5758 SvGROW(dsv, tlen + len + 1);
5759 if (sstr == junk)
5760 sstr = SvPVX_const(dsv);
5761 Move(sstr,SvPVX(dsv)+tlen,len+1,char);
5762 SvCUR_set(dsv, SvCUR(dsv) + len);
5763 (void)SvPOK_only_UTF8(dsv); /* validate pointer */
5764 SvTAINT(dsv);
5765 }
5766
5767 void
5768 Perl_sv_catpv_flags(pTHX_ SV *dsv, const char *sstr, const I32 flags)
5769 {
5770 PERL_ARGS_ASSERT_SV_CATPV_FLAGS;
5771 sv_catpvn_flags(dsv, sstr, strlen(sstr), flags);
5772 }
5773
5774 void
5775 Perl_sv_catpv_mg(pTHX_ SV *const dsv, const char *const sstr)
5776 {
5777 PERL_ARGS_ASSERT_SV_CATPV_MG;
5778
5779 sv_catpv(dsv,sstr);
5780 SvSETMAGIC(dsv);
5781 }
5782
5783 /*
5784 =for apidoc newSV
5785
5786 Creates a new SV. A non-zero C<len> parameter indicates the number of
5787 bytes of preallocated string space the SV should have. An extra byte for a
5788 trailing C<NUL> is also reserved. (C<SvPOK> is not set for the SV even if string
5789 space is allocated.) The reference count for the new SV is set to 1.
5790
5791 In 5.9.3, C<newSV()> replaces the older C<NEWSV()> API, and drops the first
5792 parameter, I<x>, a debug aid which allowed callers to identify themselves.
5793 This aid has been superseded by a new build option, C<PERL_MEM_LOG> (see
5794 L<perlhacktips/PERL_MEM_LOG>). The older API is still there for use in XS
5795 modules supporting older perls.
5796
5797 =cut
5798 */
5799
5800 SV *
5801 Perl_newSV(pTHX_ const STRLEN len)
5802 {
5803 SV *sv;
5804
5805 if (!len)
5806 new_SV(sv);
5807 else {
5808 sv = newSV_type(SVt_PV);
5809 sv_grow_fresh(sv, len + 1);
5810 }
5811 return sv;
5812 }
5813 /*
5814 =for apidoc sv_magicext
5815
5816 Adds magic to an SV, upgrading it if necessary. Applies the
5817 supplied C<vtable> and returns a pointer to the magic added.
5818
5819 Note that C<sv_magicext> will allow things that C<sv_magic> will not.
5820 In particular, you can add magic to C<SvREADONLY> SVs, and add more than
5821 one instance of the same C<how>.
5822
5823 If C<namlen> is greater than zero then a C<savepvn> I<copy> of C<name> is
5824 stored, if C<namlen> is zero then C<name> is stored as-is and - as another
5825 special case - if C<(name && namlen == HEf_SVKEY)> then C<name> is assumed
5826 to contain an SV* and is stored as-is with its C<REFCNT> incremented.
5827
5828 (This is now used as a subroutine by C<sv_magic>.)
5829
5830 =cut
5831 */
5832 MAGIC *
5833 Perl_sv_magicext(pTHX_ SV *const sv, SV *const obj, const int how,
5834 const MGVTBL *const vtable, const char *const name, const I32 namlen)
5835 {
5836 MAGIC* mg;
5837
5838 PERL_ARGS_ASSERT_SV_MAGICEXT;
5839
5840 SvUPGRADE(sv, SVt_PVMG);
5841 Newxz(mg, 1, MAGIC);
5842 mg->mg_moremagic = SvMAGIC(sv);
5843 SvMAGIC_set(sv, mg);
5844
5845 /* Sometimes a magic contains a reference loop, where the sv and
5846 object refer to each other. To prevent a reference loop that
5847 would prevent such objects being freed, we look for such loops
5848 and if we find one we avoid incrementing the object refcount.
5849
5850 Note we cannot do this to avoid self-tie loops as intervening RV must
5851 have its REFCNT incremented to keep it in existence.
5852
5853 */
5854 if (!obj || obj == sv ||
5855 how == PERL_MAGIC_arylen ||
5856 how == PERL_MAGIC_regdata ||
5857 how == PERL_MAGIC_regdatum ||
5858 how == PERL_MAGIC_symtab ||
5859 (SvTYPE(obj) == SVt_PVGV &&
5860 (GvSV(obj) == sv || GvHV(obj) == (const HV *)sv
5861 || GvAV(obj) == (const AV *)sv || GvCV(obj) == (const CV *)sv
5862 || GvIOp(obj) == (const IO *)sv || GvFORM(obj) == (const CV *)sv)))
5863 {
5864 mg->mg_obj = obj;
5865 }
5866 else {
5867 mg->mg_obj = SvREFCNT_inc_simple(obj);
5868 mg->mg_flags |= MGf_REFCOUNTED;
5869 }
5870
5871 /* Normal self-ties simply pass a null object, and instead of
5872 using mg_obj directly, use the SvTIED_obj macro to produce a
5873 new RV as needed. For glob "self-ties", we are tieing the PVIO
5874 with an RV obj pointing to the glob containing the PVIO. In
5875 this case, to avoid a reference loop, we need to weaken the
5876 reference.
5877 */
5878
5879 if (how == PERL_MAGIC_tiedscalar && SvTYPE(sv) == SVt_PVIO &&
5880 obj && SvROK(obj) && GvIO(SvRV(obj)) == (const IO *)sv)
5881 {
5882 sv_rvweaken(obj);
5883 }
5884
5885 mg->mg_type = how;
5886 mg->mg_len = namlen;
5887 if (name) {
5888 if (namlen > 0)
5889 mg->mg_ptr = savepvn(name, namlen);
5890 else if (namlen == HEf_SVKEY) {
5891 /* Yes, this is casting away const. This is only for the case of
5892 HEf_SVKEY. I think we need to document this aberration of the
5893 constness of the API, rather than making name non-const, as
5894 that change propagating outwards a long way. */
5895 mg->mg_ptr = (char*)SvREFCNT_inc_simple_NN((SV *)name);
5896 } else
5897 mg->mg_ptr = (char *) name;
5898 }
5899 mg->mg_virtual = (MGVTBL *) vtable;
5900
5901 mg_magical(sv);
5902 return mg;
5903 }
5904
5905 MAGIC *
5906 Perl_sv_magicext_mglob(pTHX_ SV *sv)
5907 {
5908 PERL_ARGS_ASSERT_SV_MAGICEXT_MGLOB;
5909 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
5910 /* This sv is only a delegate. //g magic must be attached to
5911 its target. */
5912 vivify_defelem(sv);
5913 sv = LvTARG(sv);
5914 }
5915 return sv_magicext(sv, NULL, PERL_MAGIC_regex_global,
5916 &PL_vtbl_mglob, 0, 0);
5917 }
5918
5919 /*
5920 =for apidoc sv_magic
5921
5922 Adds magic to an SV. First upgrades C<sv> to type C<SVt_PVMG> if
5923 necessary, then adds a new magic item of type C<how> to the head of the
5924 magic list.
5925
5926 See C<L</sv_magicext>> (which C<sv_magic> now calls) for a description of the
5927 handling of the C<name> and C<namlen> arguments.
5928
5929 You need to use C<sv_magicext> to add magic to C<SvREADONLY> SVs and also
5930 to add more than one instance of the same C<how>.
5931
5932 =cut
5933 */
5934
5935 void
5936 Perl_sv_magic(pTHX_ SV *const sv, SV *const obj, const int how,
5937 const char *const name, const I32 namlen)
5938 {
5939 const MGVTBL *vtable;
5940 MAGIC* mg;
5941 unsigned int flags;
5942 unsigned int vtable_index;
5943
5944 PERL_ARGS_ASSERT_SV_MAGIC;
5945
5946 if (how < 0 || (unsigned)how >= C_ARRAY_LENGTH(PL_magic_data)
5947 || ((flags = PL_magic_data[how]),
5948 (vtable_index = flags & PERL_MAGIC_VTABLE_MASK)
5949 > magic_vtable_max))
5950 Perl_croak(aTHX_ "Don't know how to handle magic of type \\%o", how);
5951
5952 /* PERL_MAGIC_ext is reserved for use by extensions not perl internals.
5953 Useful for attaching extension internal data to perl vars.
5954 Note that multiple extensions may clash if magical scalars
5955 etc holding private data from one are passed to another. */
5956
5957 vtable = (vtable_index == magic_vtable_max)
5958 ? NULL : PL_magic_vtables + vtable_index;
5959
5960 if (SvREADONLY(sv)) {
5961 if (
5962 !PERL_MAGIC_TYPE_READONLY_ACCEPTABLE(how)
5963 )
5964 {
5965 Perl_croak_no_modify();
5966 }
5967 }
5968 if (SvMAGICAL(sv) || (how == PERL_MAGIC_taint && SvTYPE(sv) >= SVt_PVMG)) {
5969 if (SvMAGIC(sv) && (mg = mg_find(sv, how))) {
5970 /* sv_magic() refuses to add a magic of the same 'how' as an
5971 existing one
5972 */
5973 if (how == PERL_MAGIC_taint)
5974 mg->mg_len |= 1;
5975 return;
5976 }
5977 }
5978
5979 /* Rest of work is done else where */
5980 mg = sv_magicext(sv,obj,how,vtable,name,namlen);
5981
5982 switch (how) {
5983 case PERL_MAGIC_taint:
5984 mg->mg_len = 1;
5985 break;
5986 case PERL_MAGIC_ext:
5987 case PERL_MAGIC_dbfile:
5988 SvRMAGICAL_on(sv);
5989 break;
5990 }
5991 }
5992
5993 static int
5994 S_sv_unmagicext_flags(pTHX_ SV *const sv, const int type, const MGVTBL *vtbl, const U32 flags)
5995 {
5996 MAGIC* mg;
5997 MAGIC** mgp;
5998
5999 assert(flags <= 1);
6000
6001 if (SvTYPE(sv) < SVt_PVMG || !SvMAGIC(sv))
6002 return 0;
6003 mgp = &(((XPVMG*) SvANY(sv))->xmg_u.xmg_magic);
6004 for (mg = *mgp; mg; mg = *mgp) {
6005 const MGVTBL* const virt = mg->mg_virtual;
6006 if (mg->mg_type == type && (!flags || virt == vtbl)) {
6007 *mgp = mg->mg_moremagic;
6008 if (virt && virt->svt_free)
6009 virt->svt_free(aTHX_ sv, mg);
6010 if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) {
6011 if (mg->mg_len > 0)
6012 Safefree(mg->mg_ptr);
6013 else if (mg->mg_len == HEf_SVKEY)
6014 SvREFCNT_dec(MUTABLE_SV(mg->mg_ptr));
6015 else if (mg->mg_type == PERL_MAGIC_utf8)
6016 Safefree(mg->mg_ptr);
6017 }
6018 if (mg->mg_flags & MGf_REFCOUNTED)
6019 SvREFCNT_dec(mg->mg_obj);
6020 Safefree(mg);
6021 }
6022 else
6023 mgp = &mg->mg_moremagic;
6024 }
6025 if (SvMAGIC(sv)) {
6026 if (SvMAGICAL(sv)) /* if we're under save_magic, wait for restore_magic; */
6027 mg_magical(sv); /* else fix the flags now */
6028 }
6029 else
6030 SvMAGICAL_off(sv);
6031
6032 return 0;
6033 }
6034
6035 /*
6036 =for apidoc sv_unmagic
6037
6038 Removes all magic of type C<type> from an SV.
6039
6040 =cut
6041 */
6042
6043 int
6044 Perl_sv_unmagic(pTHX_ SV *const sv, const int type)
6045 {
6046 PERL_ARGS_ASSERT_SV_UNMAGIC;
6047 return S_sv_unmagicext_flags(aTHX_ sv, type, NULL, 0);
6048 }
6049
6050 /*
6051 =for apidoc sv_unmagicext
6052
6053 Removes all magic of type C<type> with the specified C<vtbl> from an SV.
6054
6055 =cut
6056 */
6057
6058 int
6059 Perl_sv_unmagicext(pTHX_ SV *const sv, const int type, const MGVTBL *vtbl)
6060 {
6061 PERL_ARGS_ASSERT_SV_UNMAGICEXT;
6062 return S_sv_unmagicext_flags(aTHX_ sv, type, vtbl, 1);
6063 }
6064
6065 /*
6066 =for apidoc sv_rvweaken
6067
6068 Weaken a reference: set the C<SvWEAKREF> flag on this RV; give the
6069 referred-to SV C<PERL_MAGIC_backref> magic if it hasn't already; and
6070 push a back-reference to this RV onto the array of backreferences
6071 associated with that magic. If the RV is magical, set magic will be
6072 called after the RV is cleared. Silently ignores C<undef> and warns
6073 on already-weak references.
6074
6075 =cut
6076 */
6077
6078 SV *
6079 Perl_sv_rvweaken(pTHX_ SV *const sv)
6080 {
6081 SV *tsv;
6082
6083 PERL_ARGS_ASSERT_SV_RVWEAKEN;
6084
6085 if (!SvOK(sv)) /* let undefs pass */
6086 return sv;
6087 if (!SvROK(sv))
6088 Perl_croak(aTHX_ "Can't weaken a nonreference");
6089 else if (SvWEAKREF(sv)) {
6090 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Reference is already weak");
6091 return sv;
6092 }
6093 else if (SvREADONLY(sv)) croak_no_modify();
6094 tsv = SvRV(sv);
6095 Perl_sv_add_backref(aTHX_ tsv, sv);
6096 SvWEAKREF_on(sv);
6097 SvREFCNT_dec_NN(tsv);
6098 return sv;
6099 }
6100
6101 /*
6102 =for apidoc sv_rvunweaken
6103
6104 Unweaken a reference: Clear the C<SvWEAKREF> flag on this RV; remove
6105 the backreference to this RV from the array of backreferences
6106 associated with the target SV, increment the refcount of the target.
6107 Silently ignores C<undef> and warns on non-weak references.
6108
6109 =cut
6110 */
6111
6112 SV *
6113 Perl_sv_rvunweaken(pTHX_ SV *const sv)
6114 {
6115 SV *tsv;
6116
6117 PERL_ARGS_ASSERT_SV_RVUNWEAKEN;
6118
6119 if (!SvOK(sv)) /* let undefs pass */
6120 return sv;
6121 if (!SvROK(sv))
6122 Perl_croak(aTHX_ "Can't unweaken a nonreference");
6123 else if (!SvWEAKREF(sv)) {
6124 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Reference is not weak");
6125 return sv;
6126 }
6127 else if (SvREADONLY(sv)) croak_no_modify();
6128
6129 tsv = SvRV(sv);
6130 SvWEAKREF_off(sv);
6131 SvROK_on(sv);
6132 SvREFCNT_inc_NN(tsv);
6133 Perl_sv_del_backref(aTHX_ tsv, sv);
6134 return sv;
6135 }
6136
6137 /*
6138 =for apidoc sv_get_backrefs
6139
6140 If C<sv> is the target of a weak reference then it returns the back
6141 references structure associated with the sv; otherwise return C<NULL>.
6142
6143 When returning a non-null result the type of the return is relevant. If it
6144 is an AV then the elements of the AV are the weak reference RVs which
6145 point at this item. If it is any other type then the item itself is the
6146 weak reference.
6147
6148 See also C<Perl_sv_add_backref()>, C<Perl_sv_del_backref()>,
6149 C<Perl_sv_kill_backrefs()>
6150
6151 =cut
6152 */
6153
6154 SV *
6155 Perl_sv_get_backrefs(SV *const sv)
6156 {
6157 SV *backrefs= NULL;
6158
6159 PERL_ARGS_ASSERT_SV_GET_BACKREFS;
6160
6161 /* find slot to store array or singleton backref */
6162
6163 if (SvTYPE(sv) == SVt_PVHV) {
6164 if (HvHasAUX(sv)) {
6165 struct xpvhv_aux * const iter = HvAUX((HV *)sv);
6166 backrefs = (SV *)iter->xhv_backreferences;
6167 }
6168 } else if (SvMAGICAL(sv)) {
6169 MAGIC *mg = mg_find(sv, PERL_MAGIC_backref);
6170 if (mg)
6171 backrefs = mg->mg_obj;
6172 }
6173 return backrefs;
6174 }
6175
6176 /* Give tsv backref magic if it hasn't already got it, then push a
6177 * back-reference to sv onto the array associated with the backref magic.
6178 *
6179 * As an optimisation, if there's only one backref and it's not an AV,
6180 * store it directly in the HvAUX or mg_obj slot, avoiding the need to
6181 * allocate an AV. (Whether the slot holds an AV tells us whether this is
6182 * active.)
6183 */
6184
6185 /* A discussion about the backreferences array and its refcount:
6186 *
6187 * The AV holding the backreferences is pointed to either as the mg_obj of
6188 * PERL_MAGIC_backref, or in the specific case of a HV, from the
6189 * xhv_backreferences field. The array is created with a refcount
6190 * of 2. This means that if during global destruction the array gets
6191 * picked on before its parent to have its refcount decremented by the
6192 * random zapper, it won't actually be freed, meaning it's still there for
6193 * when its parent gets freed.
6194 *
6195 * When the parent SV is freed, the extra ref is killed by
6196 * Perl_sv_kill_backrefs. The other ref is killed, in the case of magic,
6197 * by mg_free() / MGf_REFCOUNTED, or for a hash, by Perl_hv_kill_backrefs.
6198 *
6199 * When a single backref SV is stored directly, it is not reference
6200 * counted.
6201 */
6202
6203 void
6204 Perl_sv_add_backref(pTHX_ SV *const tsv, SV *const sv)
6205 {
6206 SV **svp;
6207 AV *av = NULL;
6208 MAGIC *mg = NULL;
6209
6210 PERL_ARGS_ASSERT_SV_ADD_BACKREF;
6211
6212 /* find slot to store array or singleton backref */
6213
6214 if (SvTYPE(tsv) == SVt_PVHV) {
6215 svp = (SV**)Perl_hv_backreferences_p(aTHX_ MUTABLE_HV(tsv));
6216 } else {
6217 if (SvMAGICAL(tsv))
6218 mg = mg_find(tsv, PERL_MAGIC_backref);
6219 if (!mg)
6220 mg = sv_magicext(tsv, NULL, PERL_MAGIC_backref, &PL_vtbl_backref, NULL, 0);
6221 svp = &(mg->mg_obj);
6222 }
6223
6224 /* create or retrieve the array */
6225
6226 if ( (!*svp && SvTYPE(sv) == SVt_PVAV)
6227 || (*svp && SvTYPE(*svp) != SVt_PVAV)
6228 ) {
6229 /* create array */
6230 if (mg)
6231 mg->mg_flags |= MGf_REFCOUNTED;
6232 av = newAV();
6233 AvREAL_off(av);
6234 SvREFCNT_inc_simple_void_NN(av);
6235 /* av now has a refcnt of 2; see discussion above */
6236 av_extend(av, *svp ? 2 : 1);
6237 if (*svp) {
6238 /* move single existing backref to the array */
6239 AvARRAY(av)[++AvFILLp(av)] = *svp; /* av_push() */
6240 }
6241 *svp = (SV*)av;
6242 }
6243 else {
6244 av = MUTABLE_AV(*svp);
6245 if (!av) {
6246 /* optimisation: store single backref directly in HvAUX or mg_obj */
6247 *svp = sv;
6248 return;
6249 }
6250 assert(SvTYPE(av) == SVt_PVAV);
6251 if (AvFILLp(av) >= AvMAX(av)) {
6252 av_extend(av, AvFILLp(av)+1);
6253 }
6254 }
6255 /* push new backref */
6256 AvARRAY(av)[++AvFILLp(av)] = sv; /* av_push() */
6257 }
6258
6259 /* delete a back-reference to ourselves from the backref magic associated
6260 * with the SV we point to.
6261 */
6262
6263 void
6264 Perl_sv_del_backref(pTHX_ SV *const tsv, SV *const sv)
6265 {
6266 SV **svp = NULL;
6267
6268 PERL_ARGS_ASSERT_SV_DEL_BACKREF;
6269
6270 if (SvTYPE(tsv) == SVt_PVHV) {
6271 if (HvHasAUX(tsv))
6272 svp = (SV**)Perl_hv_backreferences_p(aTHX_ MUTABLE_HV(tsv));
6273 }
6274 else if (SvIS_FREED(tsv) && PL_phase == PERL_PHASE_DESTRUCT) {
6275 /* It's possible for the last (strong) reference to tsv to have
6276 become freed *before* the last thing holding a weak reference.
6277 If both survive longer than the backreferences array, then when
6278 the referent's reference count drops to 0 and it is freed, it's
6279 not able to chase the backreferences, so they aren't NULLed.
6280
6281 For example, a CV holds a weak reference to its stash. If both the
6282 CV and the stash survive longer than the backreferences array,
6283 and the CV gets picked for the SvBREAK() treatment first,
6284 *and* it turns out that the stash is only being kept alive because
6285 of an our variable in the pad of the CV, then midway during CV
6286 destruction the stash gets freed, but CvSTASH() isn't set to NULL.
6287 It ends up pointing to the freed HV. Hence it's chased in here, and
6288 if this block wasn't here, it would hit the !svp panic just below.
6289
6290 I don't believe that "better" destruction ordering is going to help
6291 here - during global destruction there's always going to be the
6292 chance that something goes out of order. We've tried to make it
6293 foolproof before, and it only resulted in evolutionary pressure on
6294 fools. Which made us look foolish for our hubris. :-(
6295 */
6296 return;
6297 }
6298 else {
6299 MAGIC *const mg
6300 = SvMAGICAL(tsv) ? mg_find(tsv, PERL_MAGIC_backref) : NULL;
6301 svp = mg ? &(mg->mg_obj) : NULL;
6302 }
6303
6304 if (!svp)
6305 Perl_croak(aTHX_ "panic: del_backref, svp=0");
6306 if (!*svp) {
6307 /* It's possible that sv is being freed recursively part way through the
6308 freeing of tsv. If this happens, the backreferences array of tsv has
6309 already been freed, and so svp will be NULL. If this is the case,
6310 we should not panic. Instead, nothing needs doing, so return. */
6311 if (PL_phase == PERL_PHASE_DESTRUCT && SvREFCNT(tsv) == 0)
6312 return;
6313 Perl_croak(aTHX_ "panic: del_backref, *svp=%p phase=%s refcnt=%" UVuf,
6314 (void*)*svp, PL_phase_names[PL_phase], (UV)SvREFCNT(tsv));
6315 }
6316
6317 if (SvTYPE(*svp) == SVt_PVAV) {
6318 #ifdef DEBUGGING
6319 int count = 1;
6320 #endif
6321 AV * const av = (AV*)*svp;
6322 SSize_t fill;
6323 assert(!SvIS_FREED(av));
6324 fill = AvFILLp(av);
6325 assert(fill > -1);
6326 svp = AvARRAY(av);
6327 /* for an SV with N weak references to it, if all those
6328 * weak refs are deleted, then sv_del_backref will be called
6329 * N times and O(N^2) compares will be done within the backref
6330 * array. To ameliorate this potential slowness, we:
6331 * 1) make sure this code is as tight as possible;
6332 * 2) when looking for SV, look for it at both the head and tail of the
6333 * array first before searching the rest, since some create/destroy
6334 * patterns will cause the backrefs to be freed in order.
6335 */
6336 if (*svp == sv) {
6337 AvARRAY(av)++;
6338 AvMAX(av)--;
6339 }
6340 else {
6341 SV **p = &svp[fill];
6342 SV *const topsv = *p;
6343 if (topsv != sv) {
6344 #ifdef DEBUGGING
6345 count = 0;
6346 #endif
6347 while (--p > svp) {
6348 if (*p == sv) {
6349 /* We weren't the last entry.
6350 An unordered list has this property that you
6351 can take the last element off the end to fill
6352 the hole, and it's still an unordered list :-)
6353 */
6354 *p = topsv;
6355 #ifdef DEBUGGING
6356 count++;
6357 #else
6358 break; /* should only be one */
6359 #endif
6360 }
6361 }
6362 }
6363 }
6364 assert(count ==1);
6365 AvFILLp(av) = fill-1;
6366 }
6367 else if (SvIS_FREED(*svp) && PL_phase == PERL_PHASE_DESTRUCT) {
6368 /* freed AV; skip */
6369 }
6370 else {
6371 /* optimisation: only a single backref, stored directly */
6372 if (*svp != sv)
6373 Perl_croak(aTHX_ "panic: del_backref, *svp=%p, sv=%p",
6374 (void*)*svp, (void*)sv);
6375 *svp = NULL;
6376 }
6377
6378 }
6379
6380 void
6381 Perl_sv_kill_backrefs(pTHX_ SV *const sv, AV *const av)
6382 {
6383 SV **svp;
6384 SV **last;
6385 bool is_array;
6386
6387 PERL_ARGS_ASSERT_SV_KILL_BACKREFS;
6388
6389 if (!av)
6390 return;
6391
6392 /* after multiple passes through Perl_sv_clean_all() for a thingy
6393 * that has badly leaked, the backref array may have gotten freed,
6394 * since we only protect it against 1 round of cleanup */
6395 if (SvIS_FREED(av)) {
6396 if (PL_in_clean_all) /* All is fair */
6397 return;
6398 Perl_croak(aTHX_
6399 "panic: magic_killbackrefs (freed backref AV/SV)");
6400 }
6401
6402
6403 is_array = (SvTYPE(av) == SVt_PVAV);
6404 if (is_array) {
6405 assert(!SvIS_FREED(av));
6406 svp = AvARRAY(av);
6407 if (svp)
6408 last = svp + AvFILLp(av);
6409 }
6410 else {
6411 /* optimisation: only a single backref, stored directly */
6412 svp = (SV**)&av;
6413 last = svp;
6414 }
6415
6416 if (svp) {
6417 while (svp <= last) {
6418 if (*svp) {
6419 SV *const referrer = *svp;
6420 if (SvWEAKREF(referrer)) {
6421 /* XXX Should we check that it hasn't changed? */
6422 assert(SvROK(referrer));
6423 SvRV_set(referrer, 0);
6424 SvOK_off(referrer);
6425 SvWEAKREF_off(referrer);
6426 SvSETMAGIC(referrer);
6427 } else if (SvTYPE(referrer) == SVt_PVGV ||
6428 SvTYPE(referrer) == SVt_PVLV) {
6429 assert(SvTYPE(sv) == SVt_PVHV); /* stash backref */
6430 /* You lookin' at me? */
6431 assert(GvSTASH(referrer));
6432 assert(GvSTASH(referrer) == (const HV *)sv);
6433 GvSTASH(referrer) = 0;
6434 } else if (SvTYPE(referrer) == SVt_PVCV ||
6435 SvTYPE(referrer) == SVt_PVFM) {
6436 if (SvTYPE(sv) == SVt_PVHV) { /* stash backref */
6437 /* You lookin' at me? */
6438 assert(CvSTASH(referrer));
6439 assert(CvSTASH(referrer) == (const HV *)sv);
6440 SvANY(MUTABLE_CV(referrer))->xcv_stash = 0;
6441 }
6442 else {
6443 assert(SvTYPE(sv) == SVt_PVGV);
6444 /* You lookin' at me? */
6445 assert(CvGV(referrer));
6446 assert(CvGV(referrer) == (const GV *)sv);
6447 anonymise_cv_maybe(MUTABLE_GV(sv),
6448 MUTABLE_CV(referrer));
6449 }
6450
6451 } else {
6452 Perl_croak(aTHX_
6453 "panic: magic_killbackrefs (flags=%" UVxf ")",
6454 (UV)SvFLAGS(referrer));
6455 }
6456
6457 if (is_array)
6458 *svp = NULL;
6459 }
6460 svp++;
6461 }
6462 }
6463 if (is_array) {
6464 AvFILLp(av) = -1;
6465 SvREFCNT_dec_NN(av); /* remove extra count added by sv_add_backref() */
6466 }
6467 return;
6468 }
6469
6470 /*
6471 =for apidoc sv_insert
6472 =for apidoc_item sv_insert_flags
6473
6474 These insert and/or replace a string at the specified offset/length within the
6475 SV. Similar to the Perl C<substr()> function, with C<littlelen> bytes starting
6476 at C<little> replacing C<len> bytes of the string in C<bigstr> starting at
6477 C<offset>. They handle get magic.
6478
6479 C<sv_insert_flags> is identical to plain C<sv_insert>, but the extra C<flags>
6480 are passed to the C<SvPV_force_flags> operation that is internally applied to
6481 C<bigstr>.
6482
6483 =cut
6484 */
6485
6486 void
6487 Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN len, const char *little, const STRLEN littlelen, const U32 flags)
6488 {
6489 char *big;
6490 char *mid;
6491 char *midend;
6492 char *bigend;
6493 SSize_t i; /* better be sizeof(STRLEN) or bad things happen */
6494 STRLEN curlen;
6495
6496 PERL_ARGS_ASSERT_SV_INSERT_FLAGS;
6497
6498 SvPV_force_flags(bigstr, curlen, flags);
6499 (void)SvPOK_only_UTF8(bigstr);
6500
6501 if (little >= SvPVX(bigstr) &&
6502 little < SvPVX(bigstr) + (SvLEN(bigstr) ? SvLEN(bigstr) : SvCUR(bigstr))) {
6503 /* little is a pointer to within bigstr, since we can reallocate bigstr,
6504 or little...little+littlelen might overlap offset...offset+len we make a copy
6505 */
6506 little = savepvn(little, littlelen);
6507 SAVEFREEPV(little);
6508 }
6509
6510 if (offset + len > curlen) {
6511 SvGROW(bigstr, offset+len+1);
6512 Zero(SvPVX(bigstr)+curlen, offset+len-curlen, char);
6513 SvCUR_set(bigstr, offset+len);
6514 }
6515
6516 SvTAINT(bigstr);
6517 i = littlelen - len;
6518 if (i > 0) { /* string might grow */
6519 big = SvGROW(bigstr, SvCUR(bigstr) + i + 1);
6520 mid = big + offset + len;
6521 midend = bigend = big + SvCUR(bigstr);
6522 bigend += i;
6523 *bigend = '\0';
6524 while (midend > mid) /* shove everything down */
6525 *--bigend = *--midend;
6526 Move(little,big+offset,littlelen,char);
6527 SvCUR_set(bigstr, SvCUR(bigstr) + i);
6528 SvSETMAGIC(bigstr);
6529 return;
6530 }
6531 else if (i == 0) {
6532 Move(little,SvPVX(bigstr)+offset,len,char);
6533 SvSETMAGIC(bigstr);
6534 return;
6535 }
6536
6537 big = SvPVX(bigstr);
6538 mid = big + offset;
6539 midend = mid + len;
6540 bigend = big + SvCUR(bigstr);
6541
6542 if (midend > bigend)
6543 Perl_croak(aTHX_ "panic: sv_insert, midend=%p, bigend=%p",
6544 midend, bigend);
6545
6546 if (mid - big > bigend - midend) { /* faster to shorten from end */
6547 if (littlelen) {
6548 Move(little, mid, littlelen,char);
6549 mid += littlelen;
6550 }
6551 i = bigend - midend;
6552 if (i > 0) {
6553 Move(midend, mid, i,char);
6554 mid += i;
6555 }
6556 *mid = '\0';
6557 SvCUR_set(bigstr, mid - big);
6558 }
6559 else if ((i = mid - big)) { /* faster from front */
6560 midend -= littlelen;
6561 mid = midend;
6562 Move(big, midend - i, i, char);
6563 sv_chop(bigstr,midend-i);
6564 if (littlelen)
6565 Move(little, mid, littlelen,char);
6566 }
6567 else if (littlelen) {
6568 midend -= littlelen;
6569 sv_chop(bigstr,midend);
6570 Move(little,midend,littlelen,char);
6571 }
6572 else {
6573 sv_chop(bigstr,midend);
6574 }
6575 SvSETMAGIC(bigstr);
6576 }
6577
6578 /*
6579 =for apidoc sv_replace
6580
6581 Make the first argument a copy of the second, then delete the original.
6582 The target SV physically takes over ownership of the body of the source SV
6583 and inherits its flags; however, the target keeps any magic it owns,
6584 and any magic in the source is discarded.
6585 Note that this is a rather specialist SV copying operation; most of the
6586 time you'll want to use C<sv_setsv> or one of its many macro front-ends.
6587
6588 =cut
6589 */
6590
6591 void
6592 Perl_sv_replace(pTHX_ SV *const sv, SV *const nsv)
6593 {
6594 const U32 refcnt = SvREFCNT(sv);
6595
6596 PERL_ARGS_ASSERT_SV_REPLACE;
6597
6598 SV_CHECK_THINKFIRST_COW_DROP(sv);
6599 if (SvREFCNT(nsv) != 1) {
6600 Perl_croak(aTHX_ "panic: reference miscount on nsv in sv_replace()"
6601 " (%" UVuf " != 1)", (UV) SvREFCNT(nsv));
6602 }
6603 if (SvMAGICAL(sv)) {
6604 if (SvMAGICAL(nsv))
6605 mg_free(nsv);
6606 else
6607 sv_upgrade(nsv, SVt_PVMG);
6608 SvMAGIC_set(nsv, SvMAGIC(sv));
6609 SvFLAGS(nsv) |= SvMAGICAL(sv);
6610 SvMAGICAL_off(sv);
6611 SvMAGIC_set(sv, NULL);
6612 }
6613 SvREFCNT(sv) = 0;
6614 sv_clear(sv);
6615 assert(!SvREFCNT(sv));
6616 #ifdef DEBUG_LEAKING_SCALARS
6617 sv->sv_flags = nsv->sv_flags;
6618 sv->sv_any = nsv->sv_any;
6619 sv->sv_refcnt = nsv->sv_refcnt;
6620 sv->sv_u = nsv->sv_u;
6621 #else
6622 StructCopy(nsv,sv,SV);
6623 #endif
6624 if(SvTYPE(sv) == SVt_IV) {
6625 SET_SVANY_FOR_BODYLESS_IV(sv);
6626 }
6627
6628
6629 SvREFCNT(sv) = refcnt;
6630 SvFLAGS(nsv) |= SVTYPEMASK; /* Mark as freed */
6631 SvREFCNT(nsv) = 0;
6632 del_SV(nsv);
6633 }
6634
6635 /* We're about to free a GV which has a CV that refers back to us.
6636 * If that CV will outlive us, make it anonymous (i.e. fix up its CvGV
6637 * field) */
6638
6639 STATIC void
6640 S_anonymise_cv_maybe(pTHX_ GV *gv, CV* cv)
6641 {
6642 SV *gvname;
6643 GV *anongv;
6644
6645 PERL_ARGS_ASSERT_ANONYMISE_CV_MAYBE;
6646
6647 /* be assertive! */
6648 assert(SvREFCNT(gv) == 0);
6649 assert(isGV(gv) && isGV_with_GP(gv));
6650 assert(GvGP(gv));
6651 assert(!CvANON(cv));
6652 assert(CvGV(cv) == gv);
6653 assert(!CvNAMED(cv));
6654
6655 /* will the CV shortly be freed by gp_free() ? */
6656 if (GvCV(gv) == cv && GvGP(gv)->gp_refcnt < 2 && SvREFCNT(cv) < 2) {
6657 SvANY(cv)->xcv_gv_u.xcv_gv = NULL;
6658 return;
6659 }
6660
6661 /* if not, anonymise: */
6662 gvname = (GvSTASH(gv) && HvHasNAME(GvSTASH(gv)) && HvHasENAME(GvSTASH(gv)))
6663 ? newSVhek(HvENAME_HEK(GvSTASH(gv)))
6664 : newSVpvn_flags( "__ANON__", 8, 0 );
6665 sv_catpvs(gvname, "::__ANON__");
6666 anongv = gv_fetchsv(gvname, GV_ADDMULTI, SVt_PVCV);
6667 SvREFCNT_dec_NN(gvname);
6668
6669 CvANON_on(cv);
6670 CvCVGV_RC_on(cv);
6671 SvANY(cv)->xcv_gv_u.xcv_gv = MUTABLE_GV(SvREFCNT_inc(anongv));
6672 }
6673
6674
6675 /*
6676 =for apidoc sv_clear
6677
6678 Clear an SV: call any destructors, free up any memory used by the body,
6679 and free the body itself. The SV's head is I<not> freed, although
6680 its type is set to all 1's so that it won't inadvertently be assumed
6681 to be live during global destruction etc.
6682 This function should only be called when C<REFCNT> is zero. Most of the time
6683 you'll want to call C<SvREFCNT_dec> instead.
6684
6685 =cut
6686 */
6687
6688 void
6689 Perl_sv_clear(pTHX_ SV *const orig_sv)
6690 {
6691 SV* iter_sv = NULL;
6692 SV* next_sv = NULL;
6693 SV *sv = orig_sv;
6694 STRLEN hash_index = 0; /* initialise to make Coverity et al happy.
6695 Not strictly necessary */
6696
6697 PERL_ARGS_ASSERT_SV_CLEAR;
6698
6699 /* within this loop, sv is the SV currently being freed, and
6700 * iter_sv is the most recent AV or whatever that's being iterated
6701 * over to provide more SVs */
6702
6703 while (sv) {
6704 U32 type = SvTYPE(sv);
6705 HV *stash;
6706
6707 assert(SvREFCNT(sv) == 0);
6708 assert(!SvIS_FREED(sv));
6709 #if NVSIZE <= IVSIZE
6710 if (type <= SVt_NV) {
6711 #else
6712 if (type <= SVt_IV) {
6713 #endif
6714 /* Historically this check on type was needed so that the code to
6715 * free bodies wasn't reached for these types, because the arena
6716 * slots were re-used for HEs and pointer table entries. The
6717 * metadata table `bodies_by_type` had the information for the sizes
6718 * for HEs and PTEs, hence the code here had to have a special-case
6719 * check to ensure that the "regular" body freeing code wasn't
6720 * reached, and get confused by the "lies" in `bodies_by_type`.
6721 *
6722 * However, it hasn't actually been needed for that reason since
6723 * Aug 2010 (commit 829cd18aa7f45221), because `bodies_by_type` was
6724 * changed to always hold the accurate metadata for the SV types.
6725 * This was possible because PTEs were no longer allocated from the
6726 * "SVt_IV" arena, and the code to allocate HEs from the "SVt_NULL"
6727 * arena is entirely in hv.c, so doesn't access the table.
6728 *
6729 * Some sort of check is still needed to handle SVt_IVs - pure RVs
6730 * need to take one code path which is common with RVs stored in
6731 * SVt_PV (or larger), but pure IVs mustn't take the "PV but not RV"
6732 * path, as SvPVX() doesn't point to valid memory.
6733 *
6734 * Hence this code is still the most efficient way to handle this.
6735 *
6736 * Additionally, for bodyless NVs, riding this branch is more
6737 * efficient than stepping through the general logic.
6738 */
6739
6740 if (SvROK(sv))
6741 goto free_rv;
6742 SvFLAGS(sv) &= SVf_BREAK;
6743 SvFLAGS(sv) |= SVTYPEMASK;
6744 goto free_head;
6745 }
6746
6747 /* objs are always >= MG, but pad names use the SVs_OBJECT flag
6748 for another purpose */
6749 assert(!SvOBJECT(sv) || type >= SVt_PVMG);
6750
6751 if (type >= SVt_PVMG) {
6752 if (SvOBJECT(sv)) {
6753 if (!curse(sv, 1)) goto get_next_sv;
6754 type = SvTYPE(sv); /* destructor may have changed it */
6755 }
6756 /* Free back-references before magic, in case the magic calls
6757 * Perl code that has weak references to sv. */
6758 if (type == SVt_PVHV) {
6759 Perl_hv_kill_backrefs(aTHX_ MUTABLE_HV(sv));
6760 if (SvMAGIC(sv))
6761 mg_free(sv);
6762 }
6763 else if (SvMAGIC(sv)) {
6764 /* Free back-references before other types of magic. */
6765 sv_unmagic(sv, PERL_MAGIC_backref);
6766 mg_free(sv);
6767 }
6768 SvMAGICAL_off(sv);
6769 }
6770 switch (type) {
6771 /* case SVt_INVLIST: */
6772 case SVt_PVIO:
6773 if (IoIFP(sv) &&
6774 IoIFP(sv) != PerlIO_stdin() &&
6775 IoIFP(sv) != PerlIO_stdout() &&
6776 IoIFP(sv) != PerlIO_stderr() &&
6777 !(IoFLAGS(sv) & IOf_FAKE_DIRP))
6778 {
6779 io_close(MUTABLE_IO(sv), NULL, FALSE,
6780 (IoTYPE(sv) == IoTYPE_WRONLY ||
6781 IoTYPE(sv) == IoTYPE_RDWR ||
6782 IoTYPE(sv) == IoTYPE_APPEND));
6783 }
6784 if (IoDIRP(sv) && !(IoFLAGS(sv) & IOf_FAKE_DIRP))
6785 PerlDir_close(IoDIRP(sv));
6786 IoDIRP(sv) = (DIR*)NULL;
6787 Safefree(IoTOP_NAME(sv));
6788 Safefree(IoFMT_NAME(sv));
6789 Safefree(IoBOTTOM_NAME(sv));
6790 if ((const GV *)sv == PL_statgv)
6791 PL_statgv = NULL;
6792 goto freescalar;
6793 case SVt_REGEXP:
6794 /* FIXME for plugins */
6795 pregfree2((REGEXP*) sv);
6796 goto freescalar;
6797 case SVt_PVCV:
6798 case SVt_PVFM:
6799 cv_undef(MUTABLE_CV(sv));
6800 /* If we're in a stash, we don't own a reference to it.
6801 * However it does have a back reference to us, which needs to
6802 * be cleared. */
6803 if ((stash = CvSTASH(sv)))
6804 sv_del_backref(MUTABLE_SV(stash), sv);
6805 goto freescalar;
6806 case SVt_PVHV:
6807 if (HvTOTALKEYS((HV*)sv) > 0) {
6808 const HEK *hek;
6809 /* this statement should match the one at the beginning of
6810 * hv_undef_flags() */
6811 if ( PL_phase != PERL_PHASE_DESTRUCT
6812 && (hek = HvNAME_HEK((HV*)sv)))
6813 {
6814 if (PL_stashcache) {
6815 DEBUG_o(Perl_deb(aTHX_
6816 "sv_clear clearing PL_stashcache for '%" HEKf
6817 "'\n",
6818 HEKfARG(hek)));
6819 (void)hv_deletehek(PL_stashcache,
6820 hek, G_DISCARD);
6821 }
6822 hv_name_set((HV*)sv, NULL, 0, 0);
6823 }
6824
6825 /* save old iter_sv in unused SvSTASH field */
6826 assert(!SvOBJECT(sv));
6827 SvSTASH(sv) = (HV*)iter_sv;
6828 iter_sv = sv;
6829
6830 /* save old hash_index in unused SvMAGIC field */
6831 assert(!SvMAGICAL(sv));
6832 assert(!SvMAGIC(sv));
6833 ((XPVMG*) SvANY(sv))->xmg_u.xmg_hash_index = hash_index;
6834 hash_index = 0;
6835
6836 next_sv = Perl_hfree_next_entry(aTHX_ (HV*)sv, &hash_index);
6837 goto get_next_sv; /* process this new sv */
6838 }
6839 /* free empty hash */
6840 Perl_hv_undef_flags(aTHX_ MUTABLE_HV(sv), HV_NAME_SETALL);
6841 assert(!HvARRAY((HV*)sv));
6842 break;
6843 case SVt_PVAV:
6844 {
6845 AV* av = MUTABLE_AV(sv);
6846 if (PL_comppad == av) {
6847 PL_comppad = NULL;
6848 PL_curpad = NULL;
6849 }
6850 if (AvREAL(av) && AvFILLp(av) > -1) {
6851 next_sv = AvARRAY(av)[AvFILLp(av)--];
6852 /* save old iter_sv in top-most slot of AV,
6853 * and pray that it doesn't get wiped in the meantime */
6854 AvARRAY(av)[AvMAX(av)] = iter_sv;
6855 iter_sv = sv;
6856 goto get_next_sv; /* process this new sv */
6857 }
6858 Safefree(AvALLOC(av));
6859 }
6860
6861 break;
6862 case SVt_PVOBJ:
6863 if(ObjectMAXFIELD(sv) > -1) {
6864 next_sv = ObjectFIELDS(sv)[ObjectMAXFIELD(sv)--];
6865 /* save old iter_sv in top-most field, and pray that it
6866 * doesn't get wiped in the meantime */
6867 ObjectFIELDS(sv)[(ObjectITERSVAT(sv) = ObjectMAXFIELD(sv) + 1)] = iter_sv;
6868 iter_sv = sv;
6869 goto get_next_sv;
6870 }
6871 Safefree(ObjectFIELDS(sv));
6872 break;
6873 case SVt_PVLV:
6874 if (LvTYPE(sv) == 'T') { /* for tie: return HE to pool */
6875 SvREFCNT_dec(HeKEY_sv((HE*)LvTARG(sv)));
6876 HeNEXT((HE*)LvTARG(sv)) = PL_hv_fetch_ent_mh;
6877 PL_hv_fetch_ent_mh = (HE*)LvTARG(sv);
6878 }
6879 else if (LvTYPE(sv) != 't') /* unless tie: unrefcnted fake SV** */
6880 SvREFCNT_dec(LvTARG(sv));
6881 if (isREGEXP(sv)) {
6882 /* This PVLV has had a REGEXP assigned to it - the memory
6883 * normally used to store SvLEN instead points to a regex body.
6884 * Retrieving the pointer to the regex body from the correct
6885 * location is normally abstracted by ReANY(), which handles
6886 * both SVt_PVLV and SVt_REGEXP
6887 *
6888 * This code is unwinding the storage specific to SVt_PVLV.
6889 * We get the body pointer directly from the union, free it,
6890 * then set SvLEN to whatever value was in the now-freed regex
6891 * body. The PVX buffer is shared by multiple re's and only
6892 * freed once, by the re whose SvLEN is non-null.
6893 *
6894 * Perl_sv_force_normal_flags() also has code to free this
6895 * hidden body - it swaps the body into a temporary SV it has
6896 * just allocated, then frees that SV. That causes execution
6897 * to reach the SVt_REGEXP: case about 60 lines earlier in this
6898 * function.
6899 *
6900 * See Perl_reg_temp_copy() for the code that sets up this
6901 * REGEXP body referenced by the PVLV. */
6902 struct regexp *r = ((XPV*)SvANY(sv))->xpv_len_u.xpvlenu_rx;
6903 STRLEN len = r->xpv_len;
6904 pregfree2((REGEXP*) sv);
6905 del_body_by_type(r, SVt_REGEXP);
6906 SvLEN_set((sv), len);
6907 goto freescalar;
6908 }
6909 /* FALLTHROUGH */
6910 case SVt_PVGV:
6911 if (isGV_with_GP(sv)) {
6912 if(GvCVu((const GV *)sv) && (stash = GvSTASH(MUTABLE_GV(sv)))
6913 && HvHasENAME(stash))
6914 mro_method_changed_in(stash);
6915 gp_free(MUTABLE_GV(sv));
6916 if (GvNAME_HEK(sv))
6917 unshare_hek(GvNAME_HEK(sv));
6918 /* If we're in a stash, we don't own a reference to it.
6919 * However it does have a back reference to us, which
6920 * needs to be cleared. */
6921 if ((stash = GvSTASH(sv)))
6922 sv_del_backref(MUTABLE_SV(stash), sv);
6923 }
6924 /* FIXME. There are probably more unreferenced pointers to SVs
6925 * in the interpreter struct that we should check and tidy in
6926 * a similar fashion to this: */
6927 /* See also S_sv_unglob, which does the same thing. */
6928 if ((const GV *)sv == PL_last_in_gv)
6929 PL_last_in_gv = NULL;
6930 else if ((const GV *)sv == PL_statgv)
6931 PL_statgv = NULL;
6932 else if ((const GV *)sv == PL_stderrgv)
6933 PL_stderrgv = NULL;
6934 /* FALLTHROUGH */
6935 case SVt_PVMG:
6936 case SVt_PVNV:
6937 case SVt_PVIV:
6938 case SVt_INVLIST:
6939 case SVt_PV:
6940 freescalar:
6941 /* Don't bother with SvOOK_off(sv); as we're only going to
6942 * free it. */
6943 if (SvOOK(sv)) {
6944 STRLEN offset;
6945 SvOOK_offset(sv, offset);
6946 SvPV_set(sv, SvPVX_mutable(sv) - offset);
6947 /* Don't even bother with turning off the OOK flag. */
6948 }
6949 if (SvROK(sv)) {
6950 free_rv:
6951 {
6952 SV * const target = SvRV(sv);
6953 if (SvWEAKREF(sv))
6954 sv_del_backref(target, sv);
6955 else
6956 next_sv = target;
6957 }
6958 }
6959 #ifdef PERL_ANY_COW
6960 else if (SvPVX_const(sv)
6961 && !(SvTYPE(sv) == SVt_PVIO
6962 && !(IoFLAGS(sv) & IOf_FAKE_DIRP)))
6963 {
6964 if (SvIsCOW(sv)) {
6965 #ifdef DEBUGGING
6966 if (DEBUG_C_TEST) {
6967 PerlIO_printf(Perl_debug_log, "Copy on write: clear\n");
6968 sv_dump(sv);
6969 }
6970 #endif
6971 if (SvIsCOW_static(sv)) {
6972 SvLEN_set(sv, 0);
6973 }
6974 else if (SvIsCOW_shared_hash(sv)) {
6975 unshare_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sv)));
6976 }
6977 else {
6978 if (CowREFCNT(sv)) {
6979 sv_buf_to_rw(sv);
6980 CowREFCNT(sv)--;
6981 sv_buf_to_ro(sv);
6982 SvLEN_set(sv, 0);
6983 }
6984 }
6985 }
6986 if (SvLEN(sv)) {
6987 Safefree(SvPVX_mutable(sv));
6988 }
6989 }
6990 #else
6991 else if (SvPVX_const(sv) && SvLEN(sv)
6992 && !(SvTYPE(sv) == SVt_PVIO
6993 && !(IoFLAGS(sv) & IOf_FAKE_DIRP)))
6994 Safefree(SvPVX_mutable(sv));
6995 else if (SvPVX_const(sv) && SvIsCOW(sv)) {
6996 unshare_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sv)));
6997 }
6998 #endif
6999 break;
7000 case SVt_NV:
7001 break;
7002 }
7003
7004 free_body:
7005
7006 {
7007 U32 arena_index;
7008 const struct body_details *sv_type_details;
7009
7010 if (type == SVt_PVHV && HvHasAUX(sv)) {
7011 arena_index = HVAUX_ARENA_ROOT_IX;
7012 sv_type_details = &fake_hv_with_aux;
7013 }
7014 else {
7015 arena_index = type;
7016 sv_type_details = bodies_by_type + arena_index;
7017 }
7018
7019 SvFLAGS(sv) &= SVf_BREAK;
7020 SvFLAGS(sv) |= SVTYPEMASK;
7021
7022 if (sv_type_details->arena) {
7023 del_body(((char *)SvANY(sv) + sv_type_details->offset),
7024 &PL_body_roots[arena_index]);
7025 }
7026 else if (sv_type_details->body_size) {
7027 safefree(SvANY(sv));
7028 }
7029 }
7030
7031 free_head:
7032 /* caller is responsible for freeing the head of the original sv */
7033 if (sv != orig_sv && !SvREFCNT(sv))
7034 del_SV(sv);
7035
7036 /* grab and free next sv, if any */
7037 get_next_sv:
7038 while (1) {
7039 sv = NULL;
7040 if (next_sv) {
7041 sv = next_sv;
7042 next_sv = NULL;
7043 }
7044 else if (!iter_sv) {
7045 break;
7046 } else if (SvTYPE(iter_sv) == SVt_PVAV) {
7047 AV *const av = (AV*)iter_sv;
7048 if (AvFILLp(av) > -1) {
7049 sv = AvARRAY(av)[AvFILLp(av)--];
7050 }
7051 else { /* no more elements of current AV to free */
7052 sv = iter_sv;
7053 type = SvTYPE(sv);
7054 /* restore previous value, squirrelled away */
7055 iter_sv = AvARRAY(av)[AvMAX(av)];
7056 Safefree(AvALLOC(av));
7057 goto free_body;
7058 }
7059 } else if (SvTYPE(iter_sv) == SVt_PVOBJ) {
7060 if (ObjectMAXFIELD(iter_sv) > -1) {
7061 sv = ObjectFIELDS(iter_sv)[ObjectMAXFIELD(iter_sv)--];
7062 }
7063 else { /* no more fields in the current SV to free */
7064 sv = iter_sv;
7065 type = SvTYPE(sv);
7066 iter_sv = ObjectFIELDS(sv)[ObjectITERSVAT(sv)];
7067 Safefree(ObjectFIELDS(sv));
7068 goto free_body;
7069 }
7070 } else if (SvTYPE(iter_sv) == SVt_PVHV) {
7071 sv = Perl_hfree_next_entry(aTHX_ (HV*)iter_sv, &hash_index);
7072 if (!sv && !HvTOTALKEYS((HV *)iter_sv)) {
7073 /* no more elements of current HV to free */
7074 sv = iter_sv;
7075 type = SvTYPE(sv);
7076 /* Restore previous values of iter_sv and hash_index,
7077 * squirrelled away */
7078 assert(!SvOBJECT(sv));
7079 iter_sv = (SV*)SvSTASH(sv);
7080 assert(!SvMAGICAL(sv));
7081 hash_index = ((XPVMG*) SvANY(sv))->xmg_u.xmg_hash_index;
7082 #ifdef DEBUGGING
7083 /* perl -DA does not like rubbish in SvMAGIC. */
7084 SvMAGIC_set(sv, 0);
7085 #endif
7086
7087 /* free any remaining detritus from the hash struct */
7088 Perl_hv_undef_flags(aTHX_ MUTABLE_HV(sv), HV_NAME_SETALL);
7089 assert(!HvARRAY((HV*)sv));
7090 goto free_body;
7091 }
7092 }
7093
7094 /* unrolled SvREFCNT_dec and sv_free2 follows: */
7095
7096 if (!sv)
7097 continue;
7098 if (!SvREFCNT(sv)) {
7099 sv_free(sv);
7100 continue;
7101 }
7102 if (--(SvREFCNT(sv)))
7103 continue;
7104 if (SvIMMORTAL(sv)) {
7105 /* make sure SvREFCNT(sv)==0 happens very seldom */
7106 SvREFCNT(sv) = SvREFCNT_IMMORTAL;
7107 SvTEMP_off(sv);
7108 continue;
7109 }
7110 #ifdef DEBUGGING
7111 if (SvTEMP(sv)) {
7112 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEBUGGING),
7113 "Attempt to free temp prematurely: SV 0x%" UVxf
7114 pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE);
7115 continue;
7116 }
7117 #endif
7118 break;
7119 } /* while 1 */
7120
7121 } /* while sv */
7122 }
7123
7124 /* This routine curses the sv itself, not the object referenced by sv. So
7125 sv does not have to be ROK. */
7126
7127 static bool
7128 S_curse(pTHX_ SV * const sv, const bool check_refcnt) {
7129 PERL_ARGS_ASSERT_CURSE;
7130 assert(SvOBJECT(sv));
7131
7132 if (PL_defstash && /* Still have a symbol table? */
7133 SvDESTROYABLE(sv))
7134 {
7135 dSP;
7136 HV* stash;
7137 do {
7138 stash = SvSTASH(sv);
7139 assert(SvTYPE(stash) == SVt_PVHV);
7140 if (HvNAME(stash)) {
7141 CV* destructor = NULL;
7142 struct mro_meta *meta;
7143
7144 assert (HvHasAUX(stash));
7145
7146 DEBUG_o( Perl_deb(aTHX_ "Looking for DESTROY method for %s\n",
7147 HvNAME(stash)) );
7148
7149 /* don't make this an initialization above the assert, since it needs
7150 an AUX structure */
7151 meta = HvMROMETA(stash);
7152 if (meta->destroy_gen && meta->destroy_gen == PL_sub_generation) {
7153 destructor = meta->destroy;
7154 DEBUG_o( Perl_deb(aTHX_ "Using cached DESTROY method %p for %s\n",
7155 (void *)destructor, HvNAME(stash)) );
7156 }
7157 else {
7158 bool autoload = FALSE;
7159 GV *gv =
7160 gv_fetchmeth_pvn(stash, S_destroy, S_destroy_len, -1, 0);
7161 if (gv)
7162 destructor = GvCV(gv);
7163 if (!destructor) {
7164 gv = gv_autoload_pvn(stash, S_destroy, S_destroy_len,
7165 GV_AUTOLOAD_ISMETHOD);
7166 if (gv)
7167 destructor = GvCV(gv);
7168 if (destructor)
7169 autoload = TRUE;
7170 }
7171 /* we don't cache AUTOLOAD for DESTROY, since this code
7172 would then need to set $__PACKAGE__::AUTOLOAD, or the
7173 equivalent for XS AUTOLOADs */
7174 if (!autoload) {
7175 meta->destroy_gen = PL_sub_generation;
7176 meta->destroy = destructor;
7177
7178 DEBUG_o( Perl_deb(aTHX_ "Set cached DESTROY method %p for %s\n",
7179 (void *)destructor, HvNAME(stash)) );
7180 }
7181 else {
7182 DEBUG_o( Perl_deb(aTHX_ "Not caching AUTOLOAD for DESTROY method for %s\n",
7183 HvNAME(stash)) );
7184 }
7185 }
7186 assert(!destructor || SvTYPE(destructor) == SVt_PVCV);
7187 if (destructor
7188 /* A constant subroutine can have no side effects, so
7189 don't bother calling it. */
7190 && !CvCONST(destructor)
7191 /* Don't bother calling an empty destructor or one that
7192 returns immediately. */
7193 && (CvISXSUB(destructor)
7194 || (CvSTART(destructor)
7195 && (CvSTART(destructor)->op_next->op_type
7196 != OP_LEAVESUB)
7197 && (CvSTART(destructor)->op_next->op_type
7198 != OP_PUSHMARK
7199 || CvSTART(destructor)->op_next->op_next->op_type
7200 != OP_RETURN
7201 )
7202 ))
7203 )
7204 {
7205 SV* const tmpref = newRV(sv);
7206 SvREADONLY_on(tmpref); /* DESTROY() could be naughty */
7207 ENTER;
7208 PUSHSTACKi(PERLSI_DESTROY);
7209 EXTEND(SP, 2);
7210 PUSHMARK(SP);
7211 PUSHs(tmpref);
7212 PUTBACK;
7213 call_sv(MUTABLE_SV(destructor),
7214 G_DISCARD|G_EVAL|G_KEEPERR|G_VOID);
7215 POPSTACK;
7216 SPAGAIN;
7217 LEAVE;
7218 if(SvREFCNT(tmpref) < 2) {
7219 /* tmpref is not kept alive! */
7220 SvREFCNT(sv)--;
7221 SvRV_set(tmpref, NULL);
7222 SvROK_off(tmpref);
7223 }
7224 SvREFCNT_dec_NN(tmpref);
7225 }
7226 }
7227 } while (SvOBJECT(sv) && SvSTASH(sv) != stash);
7228
7229
7230 if (check_refcnt && SvREFCNT(sv)) {
7231 if (PL_in_clean_objs)
7232 Perl_croak(aTHX_
7233 "DESTROY created new reference to dead object '%" HEKf "'",
7234 HEKfARG(HvNAME_HEK(stash)));
7235 /* DESTROY gave object new lease on life */
7236 return FALSE;
7237 }
7238 }
7239
7240 if (SvOBJECT(sv)) {
7241 HV * const stash = SvSTASH(sv);
7242 /* Curse before freeing the stash, as freeing the stash could cause
7243 a recursive call into S_curse. */
7244 SvOBJECT_off(sv); /* Curse the object. */
7245 SvSTASH_set(sv,0); /* SvREFCNT_dec may try to read this */
7246 SvREFCNT_dec(stash); /* possibly of changed persuasion */
7247 }
7248 return TRUE;
7249 }
7250
7251 /*
7252 =for apidoc sv_newref
7253
7254 Increment an SV's reference count. Use the C<SvREFCNT_inc()> wrapper
7255 instead.
7256
7257 =cut
7258 */
7259
7260 SV *
7261 Perl_sv_newref(pTHX_ SV *const sv)
7262 {
7263 PERL_UNUSED_CONTEXT;
7264 if (sv)
7265 (SvREFCNT(sv))++;
7266 return sv;
7267 }
7268
7269 /*
7270 =for apidoc sv_free
7271
7272 Decrement an SV's reference count, and if it drops to zero, call
7273 C<sv_clear> to invoke destructors and free up any memory used by
7274 the body; finally, deallocating the SV's head itself.
7275 Normally called via a wrapper macro C<SvREFCNT_dec>.
7276
7277 =cut
7278 */
7279
7280 void
7281 Perl_sv_free(pTHX_ SV *const sv)
7282 {
7283 SvREFCNT_dec(sv);
7284 }
7285
7286
7287 /* Private helper function for SvREFCNT_dec().
7288 * Called with rc set to original SvREFCNT(sv), where rc == 0 or 1 */
7289
7290 void
7291 Perl_sv_free2(pTHX_ SV *const sv, const U32 rc)
7292 {
7293
7294 PERL_ARGS_ASSERT_SV_FREE2;
7295
7296 if (LIKELY( rc == 1 )) {
7297 /* normal case */
7298 SvREFCNT(sv) = 0;
7299
7300 if (SvIMMORTAL(sv)) {
7301 /* make sure SvREFCNT(sv)==0 happens very seldom */
7302 SvREFCNT(sv) = SvREFCNT_IMMORTAL;
7303 SvTEMP_off(sv);
7304 return;
7305 }
7306 #ifdef DEBUGGING
7307 if (SvTEMP(sv)) {
7308 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEBUGGING),
7309 "Attempt to free temp prematurely: SV 0x%" UVxf
7310 pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE);
7311 return;
7312 }
7313 #endif
7314 sv_clear(sv);
7315 if (! SvREFCNT(sv)) /* may have have been resurrected */
7316 del_SV(sv);
7317 return;
7318 }
7319
7320 /* handle exceptional cases */
7321
7322 assert(rc == 0);
7323
7324 if (SvFLAGS(sv) & SVf_BREAK)
7325 /* this SV's refcnt has been artificially decremented to
7326 * trigger cleanup */
7327 return;
7328 if (PL_in_clean_all) /* All is fair */
7329 return;
7330 if (SvIMMORTAL(sv)) {
7331 /* make sure SvREFCNT(sv)==0 happens very seldom */
7332 SvREFCNT(sv) = SvREFCNT_IMMORTAL;
7333 return;
7334 }
7335 if (ckWARN_d(WARN_INTERNAL)) {
7336 #ifdef DEBUG_LEAKING_SCALARS_FORK_DUMP
7337 Perl_dump_sv_child(aTHX_ sv);
7338 #else
7339 #ifdef DEBUG_LEAKING_SCALARS
7340 sv_dump(sv);
7341 #endif
7342 #ifdef DEBUG_LEAKING_SCALARS_ABORT
7343 if (PL_warnhook == PERL_WARNHOOK_FATAL
7344 || ckDEAD(packWARN(WARN_INTERNAL))) {
7345 /* Don't let Perl_warner cause us to escape our fate: */
7346 abort();
7347 }
7348 #endif
7349 /* This may not return: */
7350 Perl_warner(aTHX_ packWARN(WARN_INTERNAL),
7351 "Attempt to free unreferenced scalar: SV 0x%" UVxf
7352 pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE);
7353 #endif
7354 }
7355 #ifdef DEBUG_LEAKING_SCALARS_ABORT
7356 abort();
7357 #endif
7358
7359 }
7360
7361
7362 /*
7363 =for apidoc sv_len
7364
7365 Returns the length of the string in the SV. Handles magic and type
7366 coercion and sets the UTF8 flag appropriately. See also C<L</SvCUR>>, which
7367 gives raw access to the C<xpv_cur> slot.
7368
7369 =cut
7370 */
7371
7372 STRLEN
7373 Perl_sv_len(pTHX_ SV *const sv)
7374 {
7375 STRLEN len;
7376
7377 if (!sv)
7378 return 0;
7379
7380 (void)SvPV_const(sv, len);
7381 return len;
7382 }
7383
7384 /*
7385 =for apidoc sv_len_utf8
7386 =for apidoc_item sv_len_utf8_nomg
7387
7388 These return the number of characters in the string in an SV, counting wide
7389 UTF-8 bytes as a single character. Both handle type coercion.
7390 They differ only in that C<sv_len_utf8> performs 'get' magic;
7391 C<sv_len_utf8_nomg> skips any magic.
7392
7393 =cut
7394 */
7395
7396 /*
7397 * The length is cached in PERL_MAGIC_utf8, in the mg_len field. Also the
7398 * mg_ptr is used, by sv_pos_u2b() and sv_pos_b2u() - see the comments below.
7399 * (Note that the mg_len is not the length of the mg_ptr field.
7400 * This allows the cache to store the character length of the string without
7401 * needing to malloc() extra storage to attach to the mg_ptr.)
7402 *
7403 */
7404
7405 STRLEN
7406 Perl_sv_len_utf8(pTHX_ SV *const sv)
7407 {
7408 if (!sv)
7409 return 0;
7410
7411 SvGETMAGIC(sv);
7412 return sv_len_utf8_nomg(sv);
7413 }
7414
7415 STRLEN
7416 Perl_sv_len_utf8_nomg(pTHX_ SV * const sv)
7417 {
7418 STRLEN len;
7419 const U8 *s = (U8*)SvPV_nomg_const(sv, len);
7420
7421 PERL_ARGS_ASSERT_SV_LEN_UTF8_NOMG;
7422
7423 if (PL_utf8cache && SvUTF8(sv)) {
7424 STRLEN ulen;
7425 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
7426
7427 if (mg && (mg->mg_len != -1 || mg->mg_ptr)) {
7428 if (mg->mg_len != -1)
7429 ulen = mg->mg_len;
7430 else {
7431 /* We can use the offset cache for a headstart.
7432 The longer value is stored in the first pair. */
7433 STRLEN *cache = (STRLEN *) mg->mg_ptr;
7434
7435 ulen = cache[0] + Perl_utf8_length(aTHX_ s + cache[1],
7436 s + len);
7437 }
7438
7439 if (PL_utf8cache < 0) {
7440 const STRLEN real = Perl_utf8_length(aTHX_ s, s + len);
7441 assert_uft8_cache_coherent("sv_len_utf8", ulen, real, sv);
7442 }
7443 }
7444 else {
7445 ulen = Perl_utf8_length(aTHX_ s, s + len);
7446 utf8_mg_len_cache_update(sv, &mg, ulen);
7447 }
7448 return ulen;
7449 }
7450 return SvUTF8(sv) ? Perl_utf8_length(aTHX_ s, s + len) : len;
7451 }
7452
7453 /* Walk forwards to find the byte corresponding to the passed in UTF-8
7454 offset. */
7455 static STRLEN
7456 S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send,
7457 STRLEN *const uoffset_p, bool *const at_end,
7458 bool* canonical_position)
7459 {
7460 const U8 *s = start;
7461 STRLEN uoffset = *uoffset_p;
7462
7463 PERL_ARGS_ASSERT_SV_POS_U2B_FORWARDS;
7464
7465 while (s < send && uoffset) {
7466 --uoffset;
7467 s += UTF8SKIP(s);
7468 }
7469 if (s == send) {
7470 *at_end = TRUE;
7471 }
7472 else if (s > send) {
7473 *at_end = TRUE;
7474 /* This is the existing behaviour. Possibly it should be a croak, as
7475 it's actually a bounds error */
7476 s = send;
7477 }
7478 /* If the unicode position is beyond the end, we return the end but
7479 shouldn't cache that position */
7480 *canonical_position = (uoffset == 0);
7481 *uoffset_p -= uoffset;
7482 return s - start;
7483 }
7484
7485 /* Given the length of the string in both bytes and UTF-8 characters, decide
7486 whether to walk forwards or backwards to find the byte corresponding to
7487 the passed in UTF-8 offset. */
7488 static STRLEN
7489 S_sv_pos_u2b_midway(const U8 *const start, const U8 *send,
7490 STRLEN uoffset, const STRLEN uend)
7491 {
7492 STRLEN backw = uend - uoffset;
7493
7494 PERL_ARGS_ASSERT_SV_POS_U2B_MIDWAY;
7495
7496 if (uoffset < 2 * backw) {
7497 /* The assumption is that the average size of a character is 2 bytes,
7498 * so going forwards is twice the speed of going backwards (that's
7499 * where the 2 * backw comes from). (The real figure of course depends
7500 * on the UTF-8 data.) */
7501 const U8 *s = start;
7502
7503 s = utf8_hop_forward(s, uoffset, send);
7504 assert (s <= send);
7505 if (s > send)
7506 s = send;
7507 return s - start;
7508 }
7509
7510 send = utf8_hop_back(send, -backw, start);
7511 return send - start;
7512 }
7513
7514 /* For the string representation of the given scalar, find the byte
7515 corresponding to the passed in UTF-8 offset. uoffset0 and boffset0
7516 give another position in the string, *before* the sought offset, which
7517 (which is always true, as 0, 0 is a valid pair of positions), which should
7518 help reduce the amount of linear searching.
7519 If *mgp is non-NULL, it should point to the UTF-8 cache magic, which
7520 will be used to reduce the amount of linear searching. The cache will be
7521 created if necessary, and the found value offered to it for update. */
7522 static STRLEN
7523 S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start,
7524 const U8 *const send, STRLEN uoffset,
7525 STRLEN uoffset0, STRLEN boffset0)
7526 {
7527 STRLEN boffset = 0; /* Actually always set, but let's keep gcc happy. */
7528 bool found = FALSE;
7529 bool at_end = FALSE;
7530 bool canonical_position = FALSE;
7531
7532 PERL_ARGS_ASSERT_SV_POS_U2B_CACHED;
7533
7534 assert (uoffset >= uoffset0);
7535
7536 if (!uoffset)
7537 return 0;
7538
7539 if (!SvREADONLY(sv) && !SvGMAGICAL(sv) && SvPOK(sv)
7540 && PL_utf8cache
7541 && (*mgp || (SvTYPE(sv) >= SVt_PVMG &&
7542 (*mgp = mg_find(sv, PERL_MAGIC_utf8))))) {
7543 if ((*mgp)->mg_ptr) {
7544 STRLEN *cache = (STRLEN *) (*mgp)->mg_ptr;
7545 if (cache[0] == uoffset) {
7546 /* An exact match. */
7547 return cache[1];
7548 }
7549 if (cache[2] == uoffset) {
7550 /* An exact match. */
7551 return cache[3];
7552 }
7553
7554 if (cache[0] < uoffset) {
7555 /* The cache already knows part of the way. */
7556 if (cache[0] > uoffset0) {
7557 /* The cache knows more than the passed in pair */
7558 uoffset0 = cache[0];
7559 boffset0 = cache[1];
7560 }
7561 if ((*mgp)->mg_len != -1) {
7562 /* And we know the end too. */
7563 boffset = boffset0
7564 + sv_pos_u2b_midway(start + boffset0, send,
7565 uoffset - uoffset0,
7566 (*mgp)->mg_len - uoffset0);
7567 } else {
7568 uoffset -= uoffset0;
7569 boffset = boffset0
7570 + sv_pos_u2b_forwards(start + boffset0,
7571 send, &uoffset, &at_end,
7572 &canonical_position);
7573 uoffset += uoffset0;
7574 }
7575 }
7576 else if (cache[2] < uoffset) {
7577 /* We're between the two cache entries. */
7578 if (cache[2] > uoffset0) {
7579 /* and the cache knows more than the passed in pair */
7580 uoffset0 = cache[2];
7581 boffset0 = cache[3];
7582 }
7583
7584 boffset = boffset0
7585 + sv_pos_u2b_midway(start + boffset0,
7586 start + cache[1],
7587 uoffset - uoffset0,
7588 cache[0] - uoffset0);
7589 } else {
7590 boffset = boffset0
7591 + sv_pos_u2b_midway(start + boffset0,
7592 start + cache[3],
7593 uoffset - uoffset0,
7594 cache[2] - uoffset0);
7595 }
7596 found = TRUE;
7597 }
7598 else if ((*mgp)->mg_len != -1) {
7599 /* If we can take advantage of a passed in offset, do so. */
7600 /* In fact, offset0 is either 0, or less than offset, so don't
7601 need to worry about the other possibility. */
7602 boffset = boffset0
7603 + sv_pos_u2b_midway(start + boffset0, send,
7604 uoffset - uoffset0,
7605 (*mgp)->mg_len - uoffset0);
7606 found = TRUE;
7607 }
7608 }
7609
7610 if (!found || PL_utf8cache < 0) {
7611 STRLEN real_boffset;
7612 uoffset -= uoffset0;
7613 real_boffset = boffset0 + sv_pos_u2b_forwards(start + boffset0,
7614 send, &uoffset, &at_end,
7615 &canonical_position);
7616 uoffset += uoffset0;
7617
7618 if (found && PL_utf8cache < 0)
7619 assert_uft8_cache_coherent("sv_pos_u2b_cache", boffset,
7620 real_boffset, sv);
7621 boffset = real_boffset;
7622 }
7623
7624 if (PL_utf8cache && canonical_position && !SvGMAGICAL(sv) && SvPOK(sv)) {
7625 if (at_end)
7626 utf8_mg_len_cache_update(sv, mgp, uoffset);
7627 else
7628 utf8_mg_pos_cache_update(sv, mgp, boffset, uoffset, send - start);
7629 }
7630 return boffset;
7631 }
7632
7633
7634 /*
7635 =for apidoc sv_pos_u2b_flags
7636
7637 Converts the offset from a count of UTF-8 chars from
7638 the start of the string, to a count of the equivalent number of bytes; if
7639 C<lenp> is non-zero, it does the same to C<lenp>, but this time starting from
7640 C<offset>, rather than from the start
7641 of the string. Handles type coercion.
7642 C<flags> is passed to C<SvPV_flags>, and usually should be
7643 C<SV_GMAGIC|SV_CONST_RETURN> to handle magic.
7644
7645 =cut
7646 */
7647
7648 /*
7649 * sv_pos_u2b_flags() uses, like sv_pos_b2u(), the mg_ptr of the potential
7650 * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and
7651 * byte offsets. See also the comments of S_utf8_mg_pos_cache_update().
7652 *
7653 */
7654
7655 STRLEN
7656 Perl_sv_pos_u2b_flags(pTHX_ SV *const sv, STRLEN uoffset, STRLEN *const lenp,
7657 U32 flags)
7658 {
7659 const U8 *start;
7660 STRLEN len;
7661 STRLEN boffset;
7662
7663 PERL_ARGS_ASSERT_SV_POS_U2B_FLAGS;
7664
7665 start = (U8*)SvPV_flags(sv, len, flags);
7666 if (len) {
7667 const U8 * const send = start + len;
7668 MAGIC *mg = NULL;
7669 boffset = sv_pos_u2b_cached(sv, &mg, start, send, uoffset, 0, 0);
7670
7671 if (lenp
7672 && *lenp /* don't bother doing work for 0, as its bytes equivalent
7673 is 0, and *lenp is already set to that. */) {
7674 /* Convert the relative offset to absolute. */
7675 const STRLEN uoffset2 = uoffset + *lenp;
7676 const STRLEN boffset2
7677 = sv_pos_u2b_cached(sv, &mg, start, send, uoffset2,
7678 uoffset, boffset) - boffset;
7679
7680 *lenp = boffset2;
7681 }
7682 } else {
7683 if (lenp)
7684 *lenp = 0;
7685 boffset = 0;
7686 }
7687
7688 return boffset;
7689 }
7690
7691 /*
7692 =for apidoc sv_pos_u2b
7693
7694 Converts the value pointed to by C<offsetp> from a count of UTF-8 chars from
7695 the start of the string, to a count of the equivalent number of bytes; if
7696 C<lenp> is non-zero, it does the same to C<lenp>, but this time starting from
7697 the offset, rather than from the start of the string. Handles magic and
7698 type coercion.
7699
7700 Use C<sv_pos_u2b_flags> in preference, which correctly handles strings longer
7701 than 2Gb.
7702
7703 =cut
7704 */
7705
7706 /*
7707 * sv_pos_u2b() uses, like sv_pos_b2u(), the mg_ptr of the potential
7708 * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and
7709 * byte offsets. See also the comments of S_utf8_mg_pos_cache_update().
7710 *
7711 */
7712
7713 /* This function is subject to size and sign problems */
7714
7715 void
7716 Perl_sv_pos_u2b(pTHX_ SV *const sv, I32 *const offsetp, I32 *const lenp)
7717 {
7718 PERL_ARGS_ASSERT_SV_POS_U2B;
7719
7720 if (lenp) {
7721 STRLEN ulen = (STRLEN)*lenp;
7722 *offsetp = (I32)sv_pos_u2b_flags(sv, (STRLEN)*offsetp, &ulen,
7723 SV_GMAGIC|SV_CONST_RETURN);
7724 *lenp = (I32)ulen;
7725 } else {
7726 *offsetp = (I32)sv_pos_u2b_flags(sv, (STRLEN)*offsetp, NULL,
7727 SV_GMAGIC|SV_CONST_RETURN);
7728 }
7729 }
7730
7731 static void
7732 S_utf8_mg_len_cache_update(pTHX_ SV *const sv, MAGIC **const mgp,
7733 const STRLEN ulen)
7734 {
7735 PERL_ARGS_ASSERT_UTF8_MG_LEN_CACHE_UPDATE;
7736 if (SvREADONLY(sv) || SvGMAGICAL(sv) || !SvPOK(sv))
7737 return;
7738
7739 if (!*mgp && (SvTYPE(sv) < SVt_PVMG ||
7740 !(*mgp = mg_find(sv, PERL_MAGIC_utf8)))) {
7741 *mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, &PL_vtbl_utf8, 0, 0);
7742 }
7743 assert(*mgp);
7744
7745 (*mgp)->mg_len = ulen;
7746 }
7747
7748 /* Create and update the UTF8 magic offset cache, with the proffered utf8/
7749 byte length pairing. The (byte) length of the total SV is passed in too,
7750 as blen, because for some (more esoteric) SVs, the call to SvPV_const()
7751 may not have updated SvCUR, so we can't rely on reading it directly.
7752
7753 The proffered utf8/byte length pairing isn't used if the cache already has
7754 two pairs, and swapping either for the proffered pair would increase the
7755 RMS of the intervals between known byte offsets.
7756
7757 The cache itself consists of 4 STRLEN values
7758 0: larger UTF-8 offset
7759 1: corresponding byte offset
7760 2: smaller UTF-8 offset
7761 3: corresponding byte offset
7762
7763 Unused cache pairs have the value 0, 0.
7764 Keeping the cache "backwards" means that the invariant of
7765 cache[0] >= cache[2] is maintained even with empty slots, which means that
7766 the code that uses it doesn't need to worry if only 1 entry has actually
7767 been set to non-zero. It also makes the "position beyond the end of the
7768 cache" logic much simpler, as the first slot is always the one to start
7769 from.
7770 */
7771 static void
7772 S_utf8_mg_pos_cache_update(pTHX_ SV *const sv, MAGIC **const mgp, const STRLEN byte,
7773 const STRLEN utf8, const STRLEN blen)
7774 {
7775 STRLEN *cache;
7776
7777 PERL_ARGS_ASSERT_UTF8_MG_POS_CACHE_UPDATE;
7778
7779 if (SvREADONLY(sv))
7780 return;
7781
7782 if (!*mgp && (SvTYPE(sv) < SVt_PVMG ||
7783 !(*mgp = mg_find(sv, PERL_MAGIC_utf8)))) {
7784 *mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, (MGVTBL*)&PL_vtbl_utf8, 0,
7785 0);
7786 (*mgp)->mg_len = -1;
7787 }
7788 assert(*mgp);
7789
7790 if (!(cache = (STRLEN *)(*mgp)->mg_ptr)) {
7791 Newxz(cache, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN);
7792 (*mgp)->mg_ptr = (char *) cache;
7793 }
7794 assert(cache);
7795
7796 if (PL_utf8cache < 0 && SvPOKp(sv)) {
7797 /* SvPOKp() because, if sv is a reference, then SvPVX() is actually
7798 a pointer. Note that we no longer cache utf8 offsets on refer-
7799 ences, but this check is still a good idea, for robustness. */
7800 const U8 *start = (const U8 *) SvPVX_const(sv);
7801 const STRLEN realutf8 = utf8_length(start, start + byte);
7802
7803 assert_uft8_cache_coherent("utf8_mg_pos_cache_update", utf8, realutf8,
7804 sv);
7805 }
7806
7807 /* Cache is held with the later position first, to simplify the code
7808 that deals with unbounded ends. */
7809
7810 ASSERT_UTF8_CACHE(cache);
7811 if (cache[1] == 0) {
7812 /* Cache is totally empty */
7813 cache[0] = utf8;
7814 cache[1] = byte;
7815 } else if (cache[3] == 0) {
7816 if (byte > cache[1]) {
7817 /* New one is larger, so goes first. */
7818 cache[2] = cache[0];
7819 cache[3] = cache[1];
7820 cache[0] = utf8;
7821 cache[1] = byte;
7822 } else {
7823 cache[2] = utf8;
7824 cache[3] = byte;
7825 }
7826 } else {
7827 /* float casts necessary? XXX */
7828 #define THREEWAY_SQUARE(a,b,c,d) \
7829 ((float)((d) - (c))) * ((float)((d) - (c))) \
7830 + ((float)((c) - (b))) * ((float)((c) - (b))) \
7831 + ((float)((b) - (a))) * ((float)((b) - (a)))
7832
7833 /* Cache has 2 slots in use, and we know three potential pairs.
7834 Keep the two that give the lowest RMS distance. Do the
7835 calculation in bytes simply because we always know the byte
7836 length. squareroot has the same ordering as the positive value,
7837 so don't bother with the actual square root. */
7838 if (byte > cache[1]) {
7839 /* New position is after the existing pair of pairs. */
7840 const float keep_earlier
7841 = THREEWAY_SQUARE(0, cache[3], byte, blen);
7842 const float keep_later
7843 = THREEWAY_SQUARE(0, cache[1], byte, blen);
7844
7845 if (keep_later < keep_earlier) {
7846 cache[2] = cache[0];
7847 cache[3] = cache[1];
7848 }
7849 cache[0] = utf8;
7850 cache[1] = byte;
7851 }
7852 else {
7853 const float keep_later = THREEWAY_SQUARE(0, byte, cache[1], blen);
7854 float b, c, keep_earlier;
7855 if (byte > cache[3]) {
7856 /* New position is between the existing pair of pairs. */
7857 b = (float)cache[3];
7858 c = (float)byte;
7859 } else {
7860 /* New position is before the existing pair of pairs. */
7861 b = (float)byte;
7862 c = (float)cache[3];
7863 }
7864 keep_earlier = THREEWAY_SQUARE(0, b, c, blen);
7865 if (byte > cache[3]) {
7866 if (keep_later < keep_earlier) {
7867 cache[2] = utf8;
7868 cache[3] = byte;
7869 }
7870 else {
7871 cache[0] = utf8;
7872 cache[1] = byte;
7873 }
7874 }
7875 else {
7876 if (! (keep_later < keep_earlier)) {
7877 cache[0] = cache[2];
7878 cache[1] = cache[3];
7879 }
7880 cache[2] = utf8;
7881 cache[3] = byte;
7882 }
7883 }
7884 }
7885 ASSERT_UTF8_CACHE(cache);
7886 }
7887
7888 /* We already know all of the way, now we may be able to walk back. The same
7889 assumption is made as in S_sv_pos_u2b_midway(), namely that walking
7890 backward is half the speed of walking forward. */
7891 static STRLEN
7892 S_sv_pos_b2u_midway(pTHX_ const U8 *const s, const U8 *const target,
7893 const U8 *end, STRLEN endu)
7894 {
7895 const STRLEN forw = target - s;
7896 STRLEN backw = end - target;
7897
7898 PERL_ARGS_ASSERT_SV_POS_B2U_MIDWAY;
7899
7900 if (forw < 2 * backw) {
7901 return utf8_length(s, target);
7902 }
7903
7904 while (end > target) {
7905 end = utf8_hop_back(end, -1, target);
7906 endu--;
7907 }
7908 return endu;
7909 }
7910
7911 /*
7912 =for apidoc sv_pos_b2u_flags
7913
7914 Converts C<offset> from a count of bytes from the start of the string, to
7915 a count of the equivalent number of UTF-8 chars. Handles type coercion.
7916 C<flags> is passed to C<SvPV_flags>, and usually should be
7917 C<SV_GMAGIC|SV_CONST_RETURN> to handle magic.
7918
7919 =cut
7920 */
7921
7922 /*
7923 * sv_pos_b2u_flags() uses, like sv_pos_u2b_flags(), the mg_ptr of the
7924 * potential PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8
7925 * and byte offsets.
7926 *
7927 */
7928 STRLEN
7929 Perl_sv_pos_b2u_flags(pTHX_ SV *const sv, STRLEN const offset, U32 flags)
7930 {
7931 const U8* s;
7932 STRLEN len = 0; /* Actually always set, but let's keep gcc happy. */
7933 STRLEN blen;
7934 MAGIC* mg = NULL;
7935 const U8* send;
7936 bool found = FALSE;
7937
7938 PERL_ARGS_ASSERT_SV_POS_B2U_FLAGS;
7939
7940 s = (const U8*)SvPV_flags(sv, blen, flags);
7941
7942 if (blen < offset)
7943 Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset, blen=%" UVuf
7944 ", byte=%" UVuf, (UV)blen, (UV)offset);
7945
7946 send = s + offset;
7947
7948 if (!SvREADONLY(sv)
7949 && PL_utf8cache
7950 && SvTYPE(sv) >= SVt_PVMG
7951 && (mg = mg_find(sv, PERL_MAGIC_utf8)))
7952 {
7953 if (mg->mg_ptr) {
7954 STRLEN * const cache = (STRLEN *) mg->mg_ptr;
7955 if (cache[1] == offset) {
7956 /* An exact match. */
7957 return cache[0];
7958 }
7959 if (cache[3] == offset) {
7960 /* An exact match. */
7961 return cache[2];
7962 }
7963
7964 if (cache[1] < offset) {
7965 /* We already know part of the way. */
7966 if (mg->mg_len != -1) {
7967 /* Actually, we know the end too. */
7968 len = cache[0]
7969 + S_sv_pos_b2u_midway(aTHX_ s + cache[1], send,
7970 s + blen, mg->mg_len - cache[0]);
7971 } else {
7972 len = cache[0] + utf8_length(s + cache[1], send);
7973 }
7974 }
7975 else if (cache[3] < offset) {
7976 /* We're between the two cached pairs, so we do the calculation
7977 offset by the byte/utf-8 positions for the earlier pair,
7978 then add the utf-8 characters from the string start to
7979 there. */
7980 len = S_sv_pos_b2u_midway(aTHX_ s + cache[3], send,
7981 s + cache[1], cache[0] - cache[2])
7982 + cache[2];
7983
7984 }
7985 else { /* cache[3] > offset */
7986 len = S_sv_pos_b2u_midway(aTHX_ s, send, s + cache[3],
7987 cache[2]);
7988
7989 }
7990 ASSERT_UTF8_CACHE(cache);
7991 found = TRUE;
7992 } else if (mg->mg_len != -1) {
7993 len = S_sv_pos_b2u_midway(aTHX_ s, send, s + blen, mg->mg_len);
7994 found = TRUE;
7995 }
7996 }
7997 if (!found || PL_utf8cache < 0) {
7998 const STRLEN real_len = utf8_length(s, send);
7999
8000 if (found && PL_utf8cache < 0)
8001 assert_uft8_cache_coherent("sv_pos_b2u", len, real_len, sv);
8002 len = real_len;
8003 }
8004
8005 if (PL_utf8cache) {
8006 if (blen == offset)
8007 utf8_mg_len_cache_update(sv, &mg, len);
8008 else
8009 utf8_mg_pos_cache_update(sv, &mg, offset, len, blen);
8010 }
8011
8012 return len;
8013 }
8014
8015 /*
8016 =for apidoc sv_pos_b2u
8017
8018 Converts the value pointed to by C<offsetp> from a count of bytes from the
8019 start of the string, to a count of the equivalent number of UTF-8 chars.
8020 Handles magic and type coercion.
8021
8022 Use C<sv_pos_b2u_flags> in preference, which correctly handles strings
8023 longer than 2Gb.
8024
8025 =cut
8026 */
8027
8028 /*
8029 * sv_pos_b2u() uses, like sv_pos_u2b(), the mg_ptr of the potential
8030 * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and
8031 * byte offsets.
8032 *
8033 */
8034 void
8035 Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp)
8036 {
8037 PERL_ARGS_ASSERT_SV_POS_B2U;
8038
8039 if (!sv)
8040 return;
8041
8042 *offsetp = (I32)sv_pos_b2u_flags(sv, (STRLEN)*offsetp,
8043 SV_GMAGIC|SV_CONST_RETURN);
8044 }
8045
8046 static void
8047 S_assert_uft8_cache_coherent(pTHX_ const char *const func, STRLEN from_cache,
8048 STRLEN real, SV *const sv)
8049 {
8050 PERL_ARGS_ASSERT_ASSERT_UFT8_CACHE_COHERENT;
8051
8052 /* As this is debugging only code, save space by keeping this test here,
8053 rather than inlining it in all the callers. */
8054 if (from_cache == real)
8055 return;
8056
8057 /* Need to turn the assertions off otherwise we may recurse infinitely
8058 while printing error messages. */
8059 SAVEI8(PL_utf8cache);
8060 PL_utf8cache = 0;
8061 Perl_croak(aTHX_ "panic: %s cache %" UVuf " real %" UVuf " for %" SVf,
8062 func, (UV) from_cache, (UV) real, SVfARG(sv));
8063 }
8064
8065 /*
8066 =for apidoc sv_eq
8067 =for apidoc_item sv_eq_flags
8068
8069 These each return a boolean indicating whether or not the strings in the two
8070 SVs are equal. If S<C<'use bytes'>> is in effect, the comparison is
8071 byte-by-byte; otherwise character-by-character. Each will coerce its args to
8072 strings if necessary.
8073
8074 They differ only in that C<sv_eq> always processes get magic, while
8075 C<sv_eq_flags> processes get magic only when the C<flags> parameter has the
8076 C<SV_GMAGIC> bit set.
8077
8078 These functions do not handle operator overloading. For versions that do,
8079 see instead C<L</sv_streq>> or C<L</sv_streq_flags>>.
8080
8081 =cut
8082 */
8083
8084 I32
8085 Perl_sv_eq_flags(pTHX_ SV *sv1, SV *sv2, const U32 flags)
8086 {
8087 const char *pv1;
8088 STRLEN cur1;
8089 const char *pv2;
8090 STRLEN cur2;
8091
8092 if (!sv1) {
8093 pv1 = "";
8094 cur1 = 0;
8095 }
8096 else {
8097 /* if pv1 and pv2 are the same, second SvPV_const call may
8098 * invalidate pv1 (if we are handling magic), so we may need to
8099 * make a copy */
8100 if (sv1 == sv2 && flags & SV_GMAGIC
8101 && (SvTHINKFIRST(sv1) || SvGMAGICAL(sv1))) {
8102 pv1 = SvPV_const(sv1, cur1);
8103 sv1 = newSVpvn_flags(pv1, cur1, SVs_TEMP | SvUTF8(sv2));
8104 }
8105 pv1 = SvPV_flags_const(sv1, cur1, flags);
8106 }
8107
8108 if (!sv2){
8109 pv2 = "";
8110 cur2 = 0;
8111 }
8112 else
8113 pv2 = SvPV_flags_const(sv2, cur2, flags);
8114
8115 if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
8116 /* Differing utf8ness. */
8117 if (SvUTF8(sv1)) {
8118 /* sv1 is the UTF-8 one */
8119 return bytes_cmp_utf8((const U8*)pv2, cur2,
8120 (const U8*)pv1, cur1) == 0;
8121 }
8122 else {
8123 /* sv2 is the UTF-8 one */
8124 return bytes_cmp_utf8((const U8*)pv1, cur1,
8125 (const U8*)pv2, cur2) == 0;
8126 }
8127 }
8128
8129 if (cur1 == cur2)
8130 return (pv1 == pv2) || memEQ(pv1, pv2, cur1);
8131 else
8132 return 0;
8133 }
8134
8135 /*
8136 =for apidoc sv_streq_flags
8137
8138 Returns a boolean indicating whether the strings in the two SVs are
8139 identical. If the flags argument has the C<SV_GMAGIC> bit set, it handles
8140 get-magic too. Will coerce its args to strings if necessary. Treats
8141 C<NULL> as undef. Correctly handles the UTF8 flag.
8142
8143 If flags does not have the C<SV_SKIP_OVERLOAD> bit set, an attempt to use
8144 C<eq> overloading will be made. If such overloading does not exist or the
8145 flag is set, then regular string comparison will be used instead.
8146
8147 =for apidoc sv_streq
8148
8149 A convenient shortcut for calling C<sv_streq_flags> with the C<SV_GMAGIC>
8150 flag. This function basically behaves like the Perl code C<$sv1 eq $sv2>.
8151
8152 =cut
8153 */
8154
8155 bool
8156 Perl_sv_streq_flags(pTHX_ SV *sv1, SV *sv2, const U32 flags)
8157 {
8158 PERL_ARGS_ASSERT_SV_STREQ_FLAGS;
8159
8160 if(flags & SV_GMAGIC) {
8161 if(sv1)
8162 SvGETMAGIC(sv1);
8163 if(sv2)
8164 SvGETMAGIC(sv2);
8165 }
8166
8167 /* Treat NULL as undef */
8168 if(!sv1)
8169 sv1 = &PL_sv_undef;
8170 if(!sv2)
8171 sv2 = &PL_sv_undef;
8172
8173 if(!(flags & SV_SKIP_OVERLOAD) &&
8174 (SvAMAGIC(sv1) || SvAMAGIC(sv2))) {
8175 SV *ret = amagic_call(sv1, sv2, seq_amg, 0);
8176 if(ret)
8177 return SvTRUE(ret);
8178 }
8179
8180 return sv_eq_flags(sv1, sv2, 0);
8181 }
8182
8183 /*
8184 =for apidoc sv_numeq_flags
8185
8186 Returns a boolean indicating whether the numbers in the two SVs are
8187 identical. If the flags argument has the C<SV_GMAGIC> bit set, it handles
8188 get-magic too. Will coerce its args to numbers if necessary. Treats
8189 C<NULL> as undef.
8190
8191 If flags does not have the C<SV_SKIP_OVERLOAD> bit set, an attempt to use
8192 C<==> overloading will be made. If such overloading does not exist or the
8193 flag is set, then regular numerical comparison will be used instead.
8194
8195 =for apidoc sv_numeq
8196
8197 A convenient shortcut for calling C<sv_numeq_flags> with the C<SV_GMAGIC>
8198 flag. This function basically behaves like the Perl code C<$sv1 == $sv2>.
8199
8200 =cut
8201 */
8202
8203 bool
8204 Perl_sv_numeq_flags(pTHX_ SV *sv1, SV *sv2, const U32 flags)
8205 {
8206 PERL_ARGS_ASSERT_SV_NUMEQ_FLAGS;
8207
8208 if(flags & SV_GMAGIC) {
8209 if(sv1)
8210 SvGETMAGIC(sv1);
8211 if(sv2)
8212 SvGETMAGIC(sv2);
8213 }
8214
8215 /* Treat NULL as undef */
8216 if(!sv1)
8217 sv1 = &PL_sv_undef;
8218 if(!sv2)
8219 sv2 = &PL_sv_undef;
8220
8221 if(!(flags & SV_SKIP_OVERLOAD) &&
8222 (SvAMAGIC(sv1) || SvAMAGIC(sv2))) {
8223 SV *ret = amagic_call(sv1, sv2, eq_amg, 0);
8224 if(ret)
8225 return SvTRUE(ret);
8226 }
8227
8228 return do_ncmp(sv1, sv2) == 0;
8229 }
8230
8231 /*
8232 =for apidoc sv_cmp
8233
8234 Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the
8235 string in C<sv1> is less than, equal to, or greater than the string in
8236 C<sv2>. Is UTF-8 and S<C<'use bytes'>> aware, handles get magic, and will
8237 coerce its args to strings if necessary. See also C<L</sv_cmp_locale>>.
8238
8239 =for apidoc sv_cmp_flags
8240
8241 Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the
8242 string in C<sv1> is less than, equal to, or greater than the string in
8243 C<sv2>. Is UTF-8 and S<C<'use bytes'>> aware and will coerce its args to strings
8244 if necessary. If the flags has the C<SV_GMAGIC> bit set, it handles get magic. See
8245 also C<L</sv_cmp_locale_flags>>.
8246
8247 =cut
8248 */
8249
8250 I32
8251 Perl_sv_cmp(pTHX_ SV *const sv1, SV *const sv2)
8252 {
8253 return sv_cmp_flags(sv1, sv2, SV_GMAGIC);
8254 }
8255
8256 I32
8257 Perl_sv_cmp_flags(pTHX_ SV *const sv1, SV *const sv2,
8258 const U32 flags)
8259 {
8260 STRLEN cur1, cur2;
8261 const char *pv1, *pv2;
8262 I32 cmp;
8263 SV *svrecode = NULL;
8264
8265 if (!sv1) {
8266 pv1 = "";
8267 cur1 = 0;
8268 }
8269 else
8270 pv1 = SvPV_flags_const(sv1, cur1, flags);
8271
8272 if (!sv2) {
8273 pv2 = "";
8274 cur2 = 0;
8275 }
8276 else
8277 pv2 = SvPV_flags_const(sv2, cur2, flags);
8278
8279 if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
8280 /* Differing utf8ness. */
8281 if (SvUTF8(sv1)) {
8282 const int retval = -bytes_cmp_utf8((const U8*)pv2, cur2,
8283 (const U8*)pv1, cur1);
8284 return retval ? retval < 0 ? -1 : +1 : 0;
8285 }
8286 else {
8287 const int retval = bytes_cmp_utf8((const U8*)pv1, cur1,
8288 (const U8*)pv2, cur2);
8289 return retval ? retval < 0 ? -1 : +1 : 0;
8290 }
8291 }
8292
8293 /* Here, if both are non-NULL, then they have the same UTF8ness. */
8294
8295 if (!cur1) {
8296 cmp = cur2 ? -1 : 0;
8297 } else if (!cur2) {
8298 cmp = 1;
8299 } else {
8300 STRLEN shortest_len = cur1 < cur2 ? cur1 : cur2;
8301
8302 #ifdef EBCDIC
8303 if (! DO_UTF8(sv1)) {
8304 #endif
8305 const I32 retval = memcmp((const void*)pv1,
8306 (const void*)pv2,
8307 shortest_len);
8308 if (retval) {
8309 cmp = retval < 0 ? -1 : 1;
8310 } else if (cur1 == cur2) {
8311 cmp = 0;
8312 } else {
8313 cmp = cur1 < cur2 ? -1 : 1;
8314 }
8315 #ifdef EBCDIC
8316 }
8317 else { /* Both are to be treated as UTF-EBCDIC */
8318
8319 /* EBCDIC UTF-8 is complicated by the fact that it is based on I8
8320 * which remaps code points 0-255. We therefore generally have to
8321 * unmap back to the original values to get an accurate comparison.
8322 * But we don't have to do that for UTF-8 invariants, as by
8323 * definition, they aren't remapped, nor do we have to do it for
8324 * above-latin1 code points, as they also aren't remapped. (This
8325 * code also works on ASCII platforms, but the memcmp() above is
8326 * much faster). */
8327
8328 const char *e = pv1 + shortest_len;
8329
8330 /* Find the first bytes that differ between the two strings */
8331 while (pv1 < e && *pv1 == *pv2) {
8332 pv1++;
8333 pv2++;
8334 }
8335
8336
8337 if (pv1 == e) { /* Are the same all the way to the end */
8338 if (cur1 == cur2) {
8339 cmp = 0;
8340 } else {
8341 cmp = cur1 < cur2 ? -1 : 1;
8342 }
8343 }
8344 else /* Here *pv1 and *pv2 are not equal, but all bytes earlier
8345 * in the strings were. The current bytes may or may not be
8346 * at the beginning of a character. But neither or both are
8347 * (or else earlier bytes would have been different). And
8348 * if we are in the middle of a character, the two
8349 * characters have the same number of bytes
8350 * (because in this case the start bytes are the same, and
8351 * the start bytes encode the character's length). */
8352 if (UTF8_IS_INVARIANT(*pv1))
8353 {
8354 /* If both are invariants; can just compare directly */
8355 if (UTF8_IS_INVARIANT(*pv2)) {
8356 cmp = ((U8) *pv1 < (U8) *pv2) ? -1 : 1;
8357 }
8358 else /* Since *pv1 is invariant, it is the whole character,
8359 which means it is at the beginning of a character.
8360 That means pv2 is also at the beginning of a
8361 character (see earlier comment). Since it isn't
8362 invariant, it must be a start byte. If it starts a
8363 character whose code point is above 255, that
8364 character is greater than any single-byte char, which
8365 *pv1 is */
8366 if (UTF8_IS_ABOVE_LATIN1_START(*pv2))
8367 {
8368 cmp = -1;
8369 }
8370 else {
8371 /* Here, pv2 points to a character composed of 2 bytes
8372 * whose code point is < 256. Get its code point and
8373 * compare with *pv1 */
8374 cmp = ((U8) *pv1 < EIGHT_BIT_UTF8_TO_NATIVE(*pv2, *(pv2 + 1)))
8375 ? -1
8376 : 1;
8377 }
8378 }
8379 else /* The code point starting at pv1 isn't a single byte */
8380 if (UTF8_IS_INVARIANT(*pv2))
8381 {
8382 /* But here, the code point starting at *pv2 is a single byte,
8383 * and so *pv1 must begin a character, hence is a start byte.
8384 * If that character is above 255, it is larger than any
8385 * single-byte char, which *pv2 is */
8386 if (UTF8_IS_ABOVE_LATIN1_START(*pv1)) {
8387 cmp = 1;
8388 }
8389 else {
8390 /* Here, pv1 points to a character composed of 2 bytes
8391 * whose code point is < 256. Get its code point and
8392 * compare with the single byte character *pv2 */
8393 cmp = (EIGHT_BIT_UTF8_TO_NATIVE(*pv1, *(pv1 + 1)) < (U8) *pv2)
8394 ? -1
8395 : 1;
8396 }
8397 }
8398 else /* Here, we've ruled out either *pv1 and *pv2 being
8399 invariant. That means both are part of variants, but not
8400 necessarily at the start of a character */
8401 if ( UTF8_IS_ABOVE_LATIN1_START(*pv1)
8402 || UTF8_IS_ABOVE_LATIN1_START(*pv2))
8403 {
8404 /* Here, at least one is the start of a character, which means
8405 * the other is also a start byte. And the code point of at
8406 * least one of the characters is above 255. It is a
8407 * characteristic of UTF-EBCDIC that all start bytes for
8408 * above-latin1 code points are well behaved as far as code
8409 * point comparisons go, and all are larger than all other
8410 * start bytes, so the comparison with those is also well
8411 * behaved */
8412 cmp = ((U8) *pv1 < (U8) *pv2) ? -1 : 1;
8413 }
8414 else {
8415 /* Here both *pv1 and *pv2 are part of variant characters.
8416 * They could be both continuations, or both start characters.
8417 * (One or both could even be an illegal start character (for
8418 * an overlong) which for the purposes of sorting we treat as
8419 * legal. */
8420 if (UTF8_IS_CONTINUATION(*pv1)) {
8421
8422 /* If they are continuations for code points above 255,
8423 * then comparing the current byte is sufficient, as there
8424 * is no remapping of these and so the comparison is
8425 * well-behaved. We determine if they are such
8426 * continuations by looking at the preceding byte. It
8427 * could be a start byte, from which we can tell if it is
8428 * for an above 255 code point. Or it could be a
8429 * continuation, which means the character occupies at
8430 * least 3 bytes, so must be above 255. */
8431 if ( UTF8_IS_CONTINUATION(*(pv2 - 1))
8432 || UTF8_IS_ABOVE_LATIN1_START(*(pv2 -1)))
8433 {
8434 cmp = ((U8) *pv1 < (U8) *pv2) ? -1 : 1;
8435 goto cmp_done;
8436 }
8437
8438 /* Here, the continuations are for code points below 256;
8439 * back up one to get to the start byte */
8440 pv1--;
8441 pv2--;
8442 }
8443
8444 /* We need to get the actual native code point of each of these
8445 * variants in order to compare them */
8446 cmp = ( EIGHT_BIT_UTF8_TO_NATIVE(*pv1, *(pv1 + 1))
8447 < EIGHT_BIT_UTF8_TO_NATIVE(*pv2, *(pv2 + 1)))
8448 ? -1
8449 : 1;
8450 }
8451 }
8452 cmp_done: ;
8453 #endif
8454 }
8455
8456 SvREFCNT_dec(svrecode);
8457
8458 return cmp;
8459 }
8460
8461 /*
8462 =for apidoc sv_cmp_locale
8463
8464 Compares the strings in two SVs in a locale-aware manner. Is UTF-8 and
8465 S<C<'use bytes'>> aware, handles get magic, and will coerce its args to strings
8466 if necessary. See also C<L</sv_cmp>>.
8467
8468 =for apidoc sv_cmp_locale_flags
8469
8470 Compares the strings in two SVs in a locale-aware manner. Is UTF-8 and
8471 S<C<'use bytes'>> aware and will coerce its args to strings if necessary. If
8472 the flags contain C<SV_GMAGIC>, it handles get magic. See also
8473 C<L</sv_cmp_flags>>.
8474
8475 =cut
8476 */
8477
8478 I32
8479 Perl_sv_cmp_locale(pTHX_ SV *const sv1, SV *const sv2)
8480 {
8481 return sv_cmp_locale_flags(sv1, sv2, SV_GMAGIC);
8482 }
8483
8484 I32
8485 Perl_sv_cmp_locale_flags(pTHX_ SV *const sv1, SV *const sv2,
8486 const U32 flags)
8487 {
8488 #ifdef USE_LOCALE_COLLATE
8489
8490 char *pv1, *pv2;
8491 STRLEN len1, len2;
8492 I32 retval;
8493
8494 if (PL_collation_standard)
8495 goto raw_compare;
8496
8497 len1 = len2 = 0;
8498
8499 /* Revert to using raw compare if both operands exist, but either one
8500 * doesn't transform properly for collation */
8501 if (sv1 && sv2) {
8502 pv1 = sv_collxfrm_flags(sv1, &len1, flags);
8503 if (! pv1) {
8504 goto raw_compare;
8505 }
8506 pv2 = sv_collxfrm_flags(sv2, &len2, flags);
8507 if (! pv2) {
8508 goto raw_compare;
8509 }
8510 }
8511 else {
8512 pv1 = sv1 ? sv_collxfrm_flags(sv1, &len1, flags) : (char *) NULL;
8513 pv2 = sv2 ? sv_collxfrm_flags(sv2, &len2, flags) : (char *) NULL;
8514 }
8515
8516 if (!pv1 || !len1) {
8517 if (pv2 && len2)
8518 return -1;
8519 else
8520 goto raw_compare;
8521 }
8522 else {
8523 if (!pv2 || !len2)
8524 return 1;
8525 }
8526
8527 retval = memcmp((void*)pv1, (void*)pv2, len1 < len2 ? len1 : len2);
8528
8529 if (retval)
8530 return retval < 0 ? -1 : 1;
8531
8532 /*
8533 * When the result of collation is equality, that doesn't mean
8534 * that there are no differences -- some locales exclude some
8535 * characters from consideration. So to avoid false equalities,
8536 * we use the raw string as a tiebreaker.
8537 */
8538
8539 raw_compare:
8540 /* FALLTHROUGH */
8541
8542 #else
8543 PERL_UNUSED_ARG(flags);
8544 #endif /* USE_LOCALE_COLLATE */
8545
8546 return sv_cmp(sv1, sv2);
8547 }
8548
8549
8550 #ifdef USE_LOCALE_COLLATE
8551
8552 /*
8553 =for apidoc sv_collxfrm
8554
8555 This calls C<sv_collxfrm_flags> with the SV_GMAGIC flag. See
8556 C<L</sv_collxfrm_flags>>.
8557
8558 =for apidoc sv_collxfrm_flags
8559
8560 Add Collate Transform magic to an SV if it doesn't already have it. If the
8561 flags contain C<SV_GMAGIC>, it handles get-magic.
8562
8563 Any scalar variable may carry C<PERL_MAGIC_collxfrm> magic that contains the
8564 scalar data of the variable, but transformed to such a format that a normal
8565 memory comparison can be used to compare the data according to the locale
8566 settings.
8567
8568 =cut
8569 */
8570
8571 char *
8572 Perl_sv_collxfrm_flags(pTHX_ SV *const sv, STRLEN *const nxp, const I32 flags)
8573 {
8574 MAGIC *mg;
8575
8576 PERL_ARGS_ASSERT_SV_COLLXFRM_FLAGS;
8577
8578 mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_collxfrm) : (MAGIC *) NULL;
8579
8580 /* If we don't have collation magic on 'sv', or the locale has changed
8581 * since the last time we calculated it, get it and save it now */
8582 if (!mg || !mg->mg_ptr || *(U32*)mg->mg_ptr != PL_collation_ix) {
8583 const char *s;
8584 char *xf;
8585 STRLEN len, xlen;
8586
8587 /* Free the old space */
8588 if (mg)
8589 Safefree(mg->mg_ptr);
8590
8591 s = SvPV_flags_const(sv, len, flags);
8592 if ((xf = mem_collxfrm_(s, len, &xlen, cBOOL(SvUTF8(sv))))) {
8593 if (! mg) {
8594 mg = sv_magicext(sv, 0, PERL_MAGIC_collxfrm, &PL_vtbl_collxfrm,
8595 0, 0);
8596 assert(mg);
8597 }
8598 mg->mg_ptr = xf;
8599 mg->mg_len = xlen;
8600 }
8601 else {
8602 if (mg) {
8603 mg->mg_ptr = NULL;
8604 mg->mg_len = -1;
8605 }
8606 }
8607 }
8608
8609 if (mg && mg->mg_ptr) {
8610 *nxp = mg->mg_len;
8611 return mg->mg_ptr + sizeof(PL_collation_ix);
8612 }
8613 else {
8614 *nxp = 0;
8615 return NULL;
8616 }
8617 }
8618
8619 #endif /* USE_LOCALE_COLLATE */
8620
8621 static char *
8622 S_sv_gets_append_to_utf8(pTHX_ SV *const sv, PerlIO *const fp, I32 append)
8623 {
8624 SV * const tsv = newSV_type(SVt_NULL);
8625 ENTER;
8626 SAVEFREESV(tsv);
8627 sv_gets(tsv, fp, 0);
8628 sv_utf8_upgrade_nomg(tsv);
8629 SvCUR_set(sv,append);
8630 sv_catsv(sv,tsv);
8631 LEAVE;
8632 return (SvCUR(sv) - append) ? SvPVX(sv) : NULL;
8633 }
8634
8635 static char *
8636 S_sv_gets_read_record(pTHX_ SV *const sv, PerlIO *const fp, I32 append)
8637 {
8638 SSize_t bytesread;
8639 const STRLEN recsize = SvUV(SvRV(PL_rs)); /* RsRECORD() guarantees > 0. */
8640 /* Grab the size of the record we're getting */
8641 char *buffer = SvGROW(sv, (STRLEN)(recsize + append + 1)) + append;
8642
8643 /* Go yank in */
8644 #ifdef __VMS
8645 int fd;
8646 Stat_t st;
8647
8648 /* With a true, record-oriented file on VMS, we need to use read directly
8649 * to ensure that we respect RMS record boundaries. The user is responsible
8650 * for providing a PL_rs value that corresponds to the FAB$W_MRS (maximum
8651 * record size) field. N.B. This is likely to produce invalid results on
8652 * varying-width character data when a record ends mid-character.
8653 */
8654 fd = PerlIO_fileno(fp);
8655 if (fd != -1
8656 && PerlLIO_fstat(fd, &st) == 0
8657 && (st.st_fab_rfm == FAB$C_VAR
8658 || st.st_fab_rfm == FAB$C_VFC
8659 || st.st_fab_rfm == FAB$C_FIX)) {
8660
8661 bytesread = PerlLIO_read(fd, buffer, recsize);
8662 }
8663 else /* in-memory file from PerlIO::Scalar
8664 * or not a record-oriented file
8665 */
8666 #endif
8667 {
8668 bytesread = PerlIO_read(fp, buffer, recsize);
8669
8670 /* At this point, the logic in sv_get() means that sv will
8671 be treated as utf-8 if the handle is utf8.
8672 */
8673 if (PerlIO_isutf8(fp) && bytesread > 0) {
8674 char *bend = buffer + bytesread;
8675 char *bufp = buffer;
8676 size_t charcount = 0;
8677 bool charstart = TRUE;
8678 STRLEN skip = 0;
8679
8680 while (charcount < recsize) {
8681 /* count accumulated characters */
8682 while (bufp < bend) {
8683 if (charstart) {
8684 skip = UTF8SKIP(bufp);
8685 }
8686 if (bufp + skip > bend) {
8687 /* partial at the end */
8688 charstart = FALSE;
8689 break;
8690 }
8691 else {
8692 ++charcount;
8693 bufp += skip;
8694 charstart = TRUE;
8695 }
8696 }
8697
8698 if (charcount < recsize) {
8699 STRLEN readsize;
8700 STRLEN bufp_offset = bufp - buffer;
8701 SSize_t morebytesread;
8702
8703 /* originally I read enough to fill any incomplete
8704 character and the first byte of the next
8705 character if needed, but if there's many
8706 multi-byte encoded characters we're going to be
8707 making a read call for every character beyond
8708 the original read size.
8709
8710 So instead, read the rest of the character if
8711 any, and enough bytes to match at least the
8712 start bytes for each character we're going to
8713 read.
8714 */
8715 if (charstart)
8716 readsize = recsize - charcount;
8717 else
8718 readsize = skip - (bend - bufp) + recsize - charcount - 1;
8719 buffer = SvGROW(sv, append + bytesread + readsize + 1) + append;
8720 bend = buffer + bytesread;
8721 morebytesread = PerlIO_read(fp, bend, readsize);
8722 if (morebytesread <= 0) {
8723 /* we're done, if we still have incomplete
8724 characters the check code in sv_gets() will
8725 warn about them.
8726
8727 I'd originally considered doing
8728 PerlIO_ungetc() on all but the lead
8729 character of the incomplete character, but
8730 read() doesn't do that, so I don't.
8731 */
8732 break;
8733 }
8734
8735 /* prepare to scan some more */
8736 bytesread += morebytesread;
8737 bend = buffer + bytesread;
8738 bufp = buffer + bufp_offset;
8739 }
8740 }
8741 }
8742 }
8743
8744 if (bytesread < 0)
8745 bytesread = 0;
8746 SvCUR_set(sv, bytesread + append);
8747 buffer[bytesread] = '\0';
8748 return (SvCUR(sv) - append) ? SvPVX(sv) : NULL;
8749 }
8750
8751 /*
8752 =for apidoc sv_gets
8753
8754 Get a line from the filehandle and store it into the SV, optionally
8755 appending to the currently-stored string. If C<append> is not 0, the
8756 line is appended to the SV instead of overwriting it. C<append> should
8757 be set to the byte offset that the appended string should start at
8758 in the SV (typically, C<SvCUR(sv)> is a suitable choice).
8759
8760 =cut
8761 */
8762
8763 char *
8764 Perl_sv_gets(pTHX_ SV *const sv, PerlIO *const fp, I32 append)
8765 {
8766 const char *rsptr;
8767 STRLEN rslen;
8768 STDCHAR rslast;
8769 STDCHAR *bp;
8770 SSize_t cnt;
8771 int i = 0;
8772 int rspara = 0;
8773
8774 PERL_ARGS_ASSERT_SV_GETS;
8775
8776 if (SvTHINKFIRST(sv))
8777 sv_force_normal_flags(sv, append ? 0 : SV_COW_DROP_PV);
8778 /* XXX. If you make this PVIV, then copy on write can copy scalars read
8779 from <>.
8780 However, perlbench says it's slower, because the existing swipe code
8781 is faster than copy on write.
8782 Swings and roundabouts. */
8783 SvUPGRADE(sv, SVt_PV);
8784
8785 if (append) {
8786 /* line is going to be appended to the existing buffer in the sv */
8787 if (PerlIO_isutf8(fp)) {
8788 if (!SvUTF8(sv)) {
8789 sv_utf8_upgrade_nomg(sv);
8790 sv_pos_u2b(sv,&append,0);
8791 }
8792 } else if (SvUTF8(sv)) {
8793 return S_sv_gets_append_to_utf8(aTHX_ sv, fp, append);
8794 }
8795 }
8796
8797 SvPOK_only(sv);
8798 if (!append) {
8799 /* not appending - "clear" the string by setting SvCUR to 0,
8800 * the pv is still available. */
8801 SvCUR_set(sv,0);
8802 }
8803 if (PerlIO_isutf8(fp))
8804 SvUTF8_on(sv);
8805
8806 if (IN_PERL_COMPILETIME) {
8807 /* we always read code in line mode */
8808 rsptr = "\n";
8809 rslen = 1;
8810 }
8811 else if (RsSNARF(PL_rs)) {
8812 /* If it is a regular disk file use size from stat() as estimate
8813 of amount we are going to read -- may result in mallocing
8814 more memory than we really need if the layers below reduce
8815 the size we read (e.g. CRLF or a gzip layer).
8816 */
8817 Stat_t st;
8818 int fd = PerlIO_fileno(fp);
8819 if (fd >= 0 && (PerlLIO_fstat(fd, &st) == 0) && S_ISREG(st.st_mode)) {
8820 const Off_t offset = PerlIO_tell(fp);
8821 if (offset != (Off_t) -1 && st.st_size + append > offset) {
8822 #ifdef PERL_COPY_ON_WRITE
8823 /* Add an extra byte for the sake of copy-on-write's
8824 * buffer reference count. */
8825 (void) SvGROW(sv, (STRLEN)((st.st_size - offset) + append + 2));
8826 #else
8827 (void) SvGROW(sv, (STRLEN)((st.st_size - offset) + append + 1));
8828 #endif
8829 }
8830 }
8831 rsptr = NULL;
8832 rslen = 0;
8833 }
8834 else if (RsRECORD(PL_rs)) {
8835 return S_sv_gets_read_record(aTHX_ sv, fp, append);
8836 }
8837 else if (RsPARA(PL_rs)) {
8838 rsptr = "\n\n";
8839 rslen = 2;
8840 rspara = 1;
8841 }
8842 else {
8843 /* Get $/ i.e. PL_rs into same encoding as stream wants */
8844 if (PerlIO_isutf8(fp)) {
8845 rsptr = SvPVutf8(PL_rs, rslen);
8846 }
8847 else {
8848 if (SvUTF8(PL_rs)) {
8849 if (!sv_utf8_downgrade(PL_rs, TRUE)) {
8850 Perl_croak(aTHX_ "Wide character in $/");
8851 }
8852 }
8853 /* extract the raw pointer to the record separator */
8854 rsptr = SvPV_const(PL_rs, rslen);
8855 }
8856 }
8857
8858 /* rslast is the last character in the record separator
8859 * note we don't use rslast except when rslen is true, so the
8860 * null assign is a placeholder. */
8861 rslast = rslen ? rsptr[rslen - 1] : '\0';
8862
8863 if (rspara) { /* have to do this both before and after */
8864 /* to make sure file boundaries work right */
8865 while (1) {
8866 if (PerlIO_eof(fp))
8867 return 0;
8868 i = PerlIO_getc(fp);
8869 if (i != '\n') {
8870 if (i == -1)
8871 return 0;
8872 PerlIO_ungetc(fp,i);
8873 break;
8874 }
8875 }
8876 }
8877
8878 /* See if we know enough about I/O mechanism to cheat it ! */
8879
8880 /* This used to be #ifdef test - it is made run-time test for ease
8881 of abstracting out stdio interface. One call should be cheap
8882 enough here - and may even be a macro allowing compile
8883 time optimization.
8884 */
8885
8886 if (PerlIO_fast_gets(fp)) {
8887 /*
8888 * We can do buffer based IO operations on this filehandle.
8889 *
8890 * This means we can bypass a lot of subcalls and process
8891 * the buffer directly, it also means we know the upper bound
8892 * on the amount of data we might read of the current buffer
8893 * into our sv. Knowing this allows us to preallocate the pv
8894 * to be able to hold that maximum, which allows us to simplify
8895 * a lot of logic. */
8896
8897 /*
8898 * We're going to steal some values from the stdio struct
8899 * and put EVERYTHING in the innermost loop into registers.
8900 */
8901 STDCHAR *ptr; /* pointer into fp's read-ahead buffer */
8902 STRLEN bpx; /* length of the data in the target sv
8903 used to fix pointers after a SvGROW */
8904 I32 shortbuffered; /* If the pv buffer is shorter than the amount
8905 of data left in the read-ahead buffer.
8906 If 0 then the pv buffer can hold the full
8907 amount left, otherwise this is the amount it
8908 can hold. */
8909
8910 /* Here is some breathtakingly efficient cheating */
8911
8912 /* When you read the following logic resist the urge to think
8913 * of record separators that are 1 byte long. They are an
8914 * uninteresting special (simple) case.
8915 *
8916 * Instead think of record separators which are at least 2 bytes
8917 * long, and keep in mind that we need to deal with such
8918 * separators when they cross a read-ahead buffer boundary.
8919 *
8920 * Also consider that we need to gracefully deal with separators
8921 * that may be longer than a single read ahead buffer.
8922 *
8923 * Lastly do not forget we want to copy the delimiter as well. We
8924 * are copying all data in the file _up_to_and_including_ the separator
8925 * itself.
8926 *
8927 * Now that you have all that in mind here is what is happening below:
8928 *
8929 * 1. When we first enter the loop we do some memory book keeping to see
8930 * how much free space there is in the target SV. (This sub assumes that
8931 * it is operating on the same SV most of the time via $_ and that it is
8932 * going to be able to reuse the same pv buffer each call.) If there is
8933 * "enough" room then we set "shortbuffered" to how much space there is
8934 * and start reading forward.
8935 *
8936 * 2. When we scan forward we copy from the read-ahead buffer to the target
8937 * SV's pv buffer. While we go we watch for the end of the read-ahead buffer,
8938 * and the end of the of pv, as well as for the "rslast", which is the last
8939 * char of the separator.
8940 *
8941 * 3. When scanning forward if we see rslast then we jump backwards in *pv*
8942 * (which has a "complete" record up to the point we saw rslast) and check
8943 * it to see if it matches the separator. If it does we are done. If it doesn't
8944 * we continue on with the scan/copy.
8945 *
8946 * 4. If we run out of read-ahead buffer (cnt goes to 0) then we have to get
8947 * the IO system to read the next buffer. We do this by doing a getc(), which
8948 * returns a single char read (or EOF), and prefills the buffer, and also
8949 * allows us to find out how full the buffer is. We use this information to
8950 * SvGROW() the sv to the size remaining in the buffer, after which we copy
8951 * the returned single char into the target sv, and then go back into scan
8952 * forward mode.
8953 *
8954 * 5. If we run out of write-buffer then we SvGROW() it by the size of the
8955 * remaining space in the read-buffer.
8956 *
8957 * Note that this code despite its twisty-turny nature is pretty darn slick.
8958 * It manages single byte separators, multi-byte cross boundary separators,
8959 * and cross-read-buffer separators cleanly and efficiently at the cost
8960 * of potentially greatly overallocating the target SV.
8961 *
8962 * Yves
8963 */
8964
8965
8966 /* get the number of bytes remaining in the read-ahead buffer
8967 * on first call on a given fp this will return 0.*/
8968 cnt = PerlIO_get_cnt(fp);
8969
8970 /* make sure we have the room */
8971 if ((I32)(SvLEN(sv) - append) <= cnt + 1) {
8972 /* Not room for all of it
8973 if we are looking for a separator and room for some
8974 */
8975 if (rslen && cnt > 80 && (I32)SvLEN(sv) > append) {
8976 /* just process what we have room for */
8977 shortbuffered = cnt - SvLEN(sv) + append + 1;
8978 cnt -= shortbuffered;
8979 }
8980 else {
8981 /* ensure that the target sv has enough room to hold
8982 * the rest of the read-ahead buffer */
8983 shortbuffered = 0;
8984 /* remember that cnt can be negative */
8985 SvGROW(sv, (STRLEN)(append + (cnt <= 0 ? 2 : (cnt + 1))));
8986 }
8987 }
8988 else {
8989 /* we have enough room to hold the full buffer, lets scream */
8990 shortbuffered = 0;
8991 }
8992
8993 /* extract the pointer to sv's string buffer, offset by append as necessary */
8994 bp = (STDCHAR*)SvPVX_const(sv) + append; /* move these two too to registers */
8995 /* extract the point to the read-ahead buffer */
8996 ptr = (STDCHAR*)PerlIO_get_ptr(fp);
8997
8998 /* some trace debug output */
8999 DEBUG_P(PerlIO_printf(Perl_debug_log,
9000 "Screamer: entering, ptr=%" UVuf ", cnt=%ld\n",PTR2UV(ptr),(long)cnt));
9001 DEBUG_P(PerlIO_printf(Perl_debug_log,
9002 "Screamer: entering: PerlIO * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%"
9003 UVuf "\n",
9004 PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp),
9005 PTR2UV(PerlIO_has_base(fp) ? PerlIO_get_base(fp) : 0)));
9006
9007 for (;;) {
9008 screamer:
9009 /* if there is stuff left in the read-ahead buffer */
9010 if (cnt > 0) {
9011 /* if there is a separator */
9012 if (rslen) {
9013 /* find next rslast */
9014 STDCHAR *p;
9015
9016 /* shortcut common case of blank line */
9017 cnt--;
9018 if ((*bp++ = *ptr++) == rslast)
9019 goto thats_all_folks;
9020
9021 p = (STDCHAR *)memchr(ptr, rslast, cnt);
9022 if (p) {
9023 SSize_t got = p - ptr + 1;
9024 Copy(ptr, bp, got, STDCHAR);
9025 ptr += got;
9026 bp += got;
9027 cnt -= got;
9028 goto thats_all_folks;
9029 }
9030 Copy(ptr, bp, cnt, STDCHAR);
9031 ptr += cnt;
9032 bp += cnt;
9033 cnt = 0;
9034 }
9035 else {
9036 /* no separator, slurp the full buffer */
9037 Copy(ptr, bp, cnt, char); /* this | eat */
9038 bp += cnt; /* screams | dust */
9039 ptr += cnt; /* louder | sed :-) */
9040 cnt = 0;
9041 assert (!shortbuffered);
9042 goto cannot_be_shortbuffered;
9043 }
9044 }
9045
9046 if (shortbuffered) { /* oh well, must extend */
9047 /* we didn't have enough room to fit the line into the target buffer
9048 * so we must extend the target buffer and keep going */
9049 cnt = shortbuffered;
9050 shortbuffered = 0;
9051 bpx = bp - (STDCHAR*)SvPVX_const(sv); /* box up before relocation */
9052 SvCUR_set(sv, bpx);
9053 /* extned the target sv's buffer so it can hold the full read-ahead buffer */
9054 SvGROW(sv, SvLEN(sv) + append + cnt + 2);
9055 bp = (STDCHAR*)SvPVX_const(sv) + bpx; /* unbox after relocation */
9056 continue;
9057 }
9058
9059 cannot_be_shortbuffered:
9060 /* we need to refill the read-ahead buffer if possible */
9061
9062 DEBUG_P(PerlIO_printf(Perl_debug_log,
9063 "Screamer: going to getc, ptr=%" UVuf ", cnt=%" IVdf "\n",
9064 PTR2UV(ptr),(IV)cnt));
9065 PerlIO_set_ptrcnt(fp, (STDCHAR*)ptr, cnt); /* deregisterize cnt and ptr */
9066
9067 DEBUG_Pv(PerlIO_printf(Perl_debug_log,
9068 "Screamer: pre: FILE * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" UVuf "\n",
9069 PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp),
9070 PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0)));
9071
9072 /*
9073 call PerlIO_getc() to let it prefill the lookahead buffer
9074
9075 This used to call 'filbuf' in stdio form, but as that behaves like
9076 getc when cnt <= 0 we use PerlIO_getc here to avoid introducing
9077 another abstraction.
9078
9079 Note we have to deal with the char in 'i' if we are not at EOF
9080 */
9081 bpx = bp - (STDCHAR*)SvPVX_const(sv);
9082 /* signals might be called here, possibly modifying sv */
9083 i = PerlIO_getc(fp); /* get more characters */
9084 bp = (STDCHAR*)SvPVX_const(sv) + bpx;
9085
9086 DEBUG_Pv(PerlIO_printf(Perl_debug_log,
9087 "Screamer: post: FILE * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" UVuf "\n",
9088 PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp),
9089 PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0)));
9090
9091 /* find out how much is left in the read-ahead buffer, and rextract its pointer */
9092 cnt = PerlIO_get_cnt(fp);
9093 ptr = (STDCHAR*)PerlIO_get_ptr(fp); /* reregisterize cnt and ptr */
9094 DEBUG_P(PerlIO_printf(Perl_debug_log,
9095 "Screamer: after getc, ptr=%" UVuf ", cnt=%" IVdf "\n",
9096 PTR2UV(ptr),(IV)cnt));
9097
9098 if (i == EOF) /* all done for ever? */
9099 goto thats_really_all_folks;
9100
9101 /* make sure we have enough space in the target sv */
9102 bpx = bp - (STDCHAR*)SvPVX_const(sv); /* box up before relocation */
9103 SvCUR_set(sv, bpx);
9104 SvGROW(sv, bpx + cnt + 2);
9105 bp = (STDCHAR*)SvPVX_const(sv) + bpx; /* unbox after relocation */
9106
9107 /* copy of the char we got from getc() */
9108 *bp++ = (STDCHAR)i; /* store character from PerlIO_getc */
9109
9110 /* make sure we deal with the i being the last character of a separator */
9111 if (rslen && (STDCHAR)i == rslast) /* all done for now? */
9112 goto thats_all_folks;
9113 }
9114
9115 thats_all_folks:
9116 /* check if we have actually found the separator - only really applies
9117 * when rslen > 1 */
9118 if ((rslen > 1 && (STRLEN)(bp - (STDCHAR*)SvPVX_const(sv)) < rslen) ||
9119 memNE((char*)bp - rslen, rsptr, rslen))
9120 goto screamer; /* go back to the fray */
9121 thats_really_all_folks:
9122 if (shortbuffered)
9123 cnt += shortbuffered;
9124 DEBUG_P(PerlIO_printf(Perl_debug_log,
9125 "Screamer: quitting, ptr=%" UVuf ", cnt=%" IVdf "\n",PTR2UV(ptr),(IV)cnt));
9126 PerlIO_set_ptrcnt(fp, (STDCHAR*)ptr, cnt); /* put these back or we're in trouble */
9127 DEBUG_P(PerlIO_printf(Perl_debug_log,
9128 "Screamer: end: FILE * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" UVuf
9129 "\n",
9130 PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp),
9131 PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0)));
9132 *bp = '\0';
9133 SvCUR_set(sv, bp - (STDCHAR*)SvPVX_const(sv)); /* set length */
9134 DEBUG_P(PerlIO_printf(Perl_debug_log,
9135 "Screamer: done, len=%ld, string=|%.*s|\n",
9136 (long)SvCUR(sv),(int)SvCUR(sv),SvPVX_const(sv)));
9137 }
9138 else
9139 {
9140 /*The big, slow, and stupid way. */
9141 STDCHAR buf[8192];
9142
9143 screamer2:
9144 if (rslen) {
9145 const STDCHAR * const bpe = buf + sizeof(buf);
9146 bp = buf;
9147 while ((i = PerlIO_getc(fp)) != EOF && (*bp++ = (STDCHAR)i) != rslast && bp < bpe)
9148 ; /* keep reading */
9149 cnt = bp - buf;
9150 }
9151 else {
9152 cnt = PerlIO_read(fp,(char*)buf, sizeof(buf));
9153 /* Accommodate broken VAXC compiler, which applies U8 cast to
9154 * both args of ?: operator, causing EOF to change into 255
9155 */
9156 if (cnt > 0)
9157 i = (U8)buf[cnt - 1];
9158 else
9159 i = EOF;
9160 }
9161
9162 if (cnt < 0)
9163 cnt = 0; /* we do need to re-set the sv even when cnt <= 0 */
9164 if (append)
9165 sv_catpvn_nomg(sv, (char *) buf, cnt);
9166 else
9167 sv_setpvn(sv, (char *) buf, cnt); /* "nomg" is implied */
9168
9169 if (i != EOF && /* joy */
9170 (!rslen ||
9171 SvCUR(sv) < rslen ||
9172 memNE(SvPVX_const(sv) + SvCUR(sv) - rslen, rsptr, rslen)))
9173 {
9174 append = -1;
9175 /*
9176 * If we're reading from a TTY and we get a short read,
9177 * indicating that the user hit his EOF character, we need
9178 * to notice it now, because if we try to read from the TTY
9179 * again, the EOF condition will disappear.
9180 *
9181 * The comparison of cnt to sizeof(buf) is an optimization
9182 * that prevents unnecessary calls to feof().
9183 *
9184 * - jik 9/25/96
9185 */
9186 if (!(cnt < (I32)sizeof(buf) && PerlIO_eof(fp)))
9187 goto screamer2;
9188 }
9189
9190 }
9191
9192 if (rspara) { /* have to do this both before and after */
9193 while (i != EOF) { /* to make sure file boundaries work right */
9194 i = PerlIO_getc(fp);
9195 if (i != '\n') {
9196 PerlIO_ungetc(fp,i);
9197 break;
9198 }
9199 }
9200 }
9201
9202 return (SvCUR(sv) - append) ? SvPVX(sv) : NULL;
9203 }
9204
9205 /*
9206 =for apidoc sv_inc
9207 =for apidoc_item sv_inc_nomg
9208
9209 These auto-increment the value in the SV, doing string to numeric conversion
9210 if necessary. They both handle operator overloading.
9211
9212 They differ only in that C<sv_inc> performs 'get' magic; C<sv_inc_nomg> skips
9213 any magic.
9214
9215 =cut
9216 */
9217
9218 void
9219 Perl_sv_inc(pTHX_ SV *const sv)
9220 {
9221 if (!sv)
9222 return;
9223 SvGETMAGIC(sv);
9224 sv_inc_nomg(sv);
9225 }
9226
9227 void
9228 Perl_sv_inc_nomg(pTHX_ SV *const sv)
9229 {
9230 char *d;
9231 int flags;
9232
9233 if (!sv)
9234 return;
9235 if (SvTHINKFIRST(sv)) {
9236 if (SvREADONLY(sv)) {
9237 Perl_croak_no_modify();
9238 }
9239 if (SvROK(sv)) {
9240 IV i;
9241 if (SvAMAGIC(sv) && AMG_CALLunary(sv, inc_amg))
9242 return;
9243 i = PTR2IV(SvRV(sv));
9244 sv_unref(sv);
9245 sv_setiv(sv, i);
9246 }
9247 else sv_force_normal_flags(sv, 0);
9248 }
9249 flags = SvFLAGS(sv);
9250 if ((flags & (SVp_NOK|SVp_IOK)) == SVp_NOK) {
9251 /* It's (privately or publicly) a float, but not tested as an
9252 integer, so test it to see. */
9253 (void) SvIV(sv);
9254 flags = SvFLAGS(sv);
9255 }
9256 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
9257 /* It's publicly an integer, or privately an integer-not-float */
9258 #ifdef PERL_PRESERVE_IVUV
9259 oops_its_int:
9260 #endif
9261 if (SvIsUV(sv)) {
9262 if (SvUVX(sv) == UV_MAX)
9263 sv_setnv(sv, UV_MAX_P1);
9264 else {
9265 (void)SvIOK_only_UV(sv);
9266 SvUV_set(sv, SvUVX(sv) + 1);
9267 }
9268 } else {
9269 if (SvIVX(sv) == IV_MAX)
9270 sv_setuv(sv, (UV)IV_MAX + 1);
9271 else {
9272 (void)SvIOK_only(sv);
9273 SvIV_set(sv, SvIVX(sv) + 1);
9274 }
9275 }
9276 return;
9277 }
9278 if (flags & SVp_NOK) {
9279 const NV was = SvNVX(sv);
9280 if (NV_OVERFLOWS_INTEGERS_AT != 0.0 &&
9281 /* If NVX was NaN, the following comparisons return always false */
9282 UNLIKELY(was >= NV_OVERFLOWS_INTEGERS_AT ||
9283 was < -NV_OVERFLOWS_INTEGERS_AT) &&
9284 #if defined(NAN_COMPARE_BROKEN)
9285 LIKELY(!Perl_isinfnan(was))
9286 #else
9287 LIKELY(!Perl_isinf(was))
9288 #endif
9289 ) {
9290 /* diag_listed_as: Lost precision when %s %f by 1 */
9291 Perl_ck_warner(aTHX_ packWARN(WARN_IMPRECISION),
9292 "Lost precision when incrementing %" NVff " by 1",
9293 was);
9294 }
9295 (void)SvNOK_only(sv);
9296 SvNV_set(sv, was + 1.0);
9297 return;
9298 }
9299
9300 /* treat AV/HV/CV/FM/IO and non-fake GVs as immutable */
9301 if (SvTYPE(sv) >= SVt_PVAV || (isGV_with_GP(sv) && !SvFAKE(sv)))
9302 Perl_croak_no_modify();
9303
9304 if (!(flags & SVp_POK) || !*SvPVX_const(sv)) {
9305 if ((flags & SVTYPEMASK) < SVt_PVIV)
9306 sv_upgrade(sv, ((flags & SVTYPEMASK) > SVt_IV ? SVt_PVIV : SVt_IV));
9307 (void)SvIOK_only(sv);
9308 SvIV_set(sv, 1);
9309 return;
9310 }
9311 d = SvPVX(sv);
9312 while (isALPHA(*d)) d++;
9313 while (isDIGIT(*d)) d++;
9314 if (d < SvEND(sv)) {
9315 const int numtype = grok_number_flags(SvPVX_const(sv), SvCUR(sv), NULL, PERL_SCAN_TRAILING);
9316 #ifdef PERL_PRESERVE_IVUV
9317 /* Got to punt this as an integer if needs be, but we don't issue
9318 warnings. Probably ought to make the sv_iv_please() that does
9319 the conversion if possible, and silently. */
9320 if (numtype && !(numtype & IS_NUMBER_INFINITY)) {
9321 /* Need to try really hard to see if it's an integer.
9322 9.22337203685478e+18 is an integer.
9323 but "9.22337203685478e+18" + 0 is UV=9223372036854779904
9324 so $a="9.22337203685478e+18"; $a+0; $a++
9325 needs to be the same as $a="9.22337203685478e+18"; $a++
9326 or we go insane. */
9327
9328 (void) sv_2iv(sv);
9329 if (SvIOK(sv))
9330 goto oops_its_int;
9331
9332 /* sv_2iv *should* have made this an NV */
9333 if (flags & SVp_NOK) {
9334 (void)SvNOK_only(sv);
9335 SvNV_set(sv, SvNVX(sv) + 1.0);
9336 return;
9337 }
9338 /* I don't think we can get here. Maybe I should assert this
9339 And if we do get here I suspect that sv_setnv will croak. NWC
9340 Fall through. */
9341 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%" UVxf " NV=%" NVgf "\n",
9342 SvPVX_const(sv), SvIVX(sv), SvNVX(sv)));
9343 }
9344 #endif /* PERL_PRESERVE_IVUV */
9345 if (!numtype && ckWARN(WARN_NUMERIC))
9346 not_incrementable(sv);
9347 sv_setnv(sv,Atof(SvPVX_const(sv)) + 1.0);
9348 return;
9349 }
9350 d--;
9351 while (d >= SvPVX_const(sv)) {
9352 if (isDIGIT(*d)) {
9353 if (++*d <= '9')
9354 return;
9355 *(d--) = '0';
9356 }
9357 else {
9358 #ifdef EBCDIC
9359 /* MKS: The original code here died if letters weren't consecutive.
9360 * at least it didn't have to worry about non-C locales. The
9361 * new code assumes that ('z'-'a')==('Z'-'A'), letters are
9362 * arranged in order (although not consecutively) and that only
9363 * [A-Za-z] are accepted by isALPHA in the C locale.
9364 */
9365 if (isALPHA_FOLD_NE(*d, 'z')) {
9366 do { ++*d; } while (!isALPHA(*d));
9367 return;
9368 }
9369 *(d--) -= 'z' - 'a';
9370 #else
9371 ++*d;
9372 if (isALPHA(*d))
9373 return;
9374 *(d--) -= 'z' - 'a' + 1;
9375 #endif
9376 }
9377 }
9378 /* oh,oh, the number grew */
9379 SvGROW(sv, SvCUR(sv) + 2);
9380 SvCUR_set(sv, SvCUR(sv) + 1);
9381 for (d = SvPVX(sv) + SvCUR(sv); d > SvPVX_const(sv); d--)
9382 *d = d[-1];
9383 if (isDIGIT(d[1]))
9384 *d = '1';
9385 else
9386 *d = d[1];
9387 }
9388
9389 /*
9390 =for apidoc sv_dec
9391 =for apidoc_item sv_dec_nomg
9392
9393 These auto-decrement the value in the SV, doing string to numeric conversion
9394 if necessary. They both handle operator overloading.
9395
9396 They differ only in that:
9397
9398 C<sv_dec> handles 'get' magic; C<sv_dec_nomg> skips 'get' magic.
9399
9400 =cut
9401 */
9402
9403 void
9404 Perl_sv_dec(pTHX_ SV *const sv)
9405 {
9406 if (!sv)
9407 return;
9408 SvGETMAGIC(sv);
9409 sv_dec_nomg(sv);
9410 }
9411
9412 void
9413 Perl_sv_dec_nomg(pTHX_ SV *const sv)
9414 {
9415 int flags;
9416
9417 if (!sv)
9418 return;
9419 if (SvTHINKFIRST(sv)) {
9420 if (SvREADONLY(sv)) {
9421 Perl_croak_no_modify();
9422 }
9423 if (SvROK(sv)) {
9424 IV i;
9425 if (SvAMAGIC(sv) && AMG_CALLunary(sv, dec_amg))
9426 return;
9427 i = PTR2IV(SvRV(sv));
9428 sv_unref(sv);
9429 sv_setiv(sv, i);
9430 }
9431 else sv_force_normal_flags(sv, 0);
9432 }
9433 /* Unlike sv_inc we don't have to worry about string-never-numbers
9434 and keeping them magic. But we mustn't warn on punting */
9435 flags = SvFLAGS(sv);
9436 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
9437 /* It's publicly an integer, or privately an integer-not-float */
9438 #ifdef PERL_PRESERVE_IVUV
9439 oops_its_int:
9440 #endif
9441 if (SvIsUV(sv)) {
9442 if (SvUVX(sv) == 0) {
9443 (void)SvIOK_only(sv);
9444 SvIV_set(sv, -1);
9445 }
9446 else {
9447 (void)SvIOK_only_UV(sv);
9448 SvUV_set(sv, SvUVX(sv) - 1);
9449 }
9450 } else {
9451 if (SvIVX(sv) == IV_MIN) {
9452 sv_setnv(sv, (NV)IV_MIN);
9453 goto oops_its_num;
9454 }
9455 else {
9456 (void)SvIOK_only(sv);
9457 SvIV_set(sv, SvIVX(sv) - 1);
9458 }
9459 }
9460 return;
9461 }
9462 if (flags & SVp_NOK) {
9463 oops_its_num:
9464 {
9465 const NV was = SvNVX(sv);
9466 if (NV_OVERFLOWS_INTEGERS_AT != 0.0 &&
9467 /* If NVX was NaN, these comparisons return always false */
9468 UNLIKELY(was <= -NV_OVERFLOWS_INTEGERS_AT ||
9469 was > NV_OVERFLOWS_INTEGERS_AT) &&
9470 #if defined(NAN_COMPARE_BROKEN)
9471 LIKELY(!Perl_isinfnan(was))
9472 #else
9473 LIKELY(!Perl_isinf(was))
9474 #endif
9475 ) {
9476 /* diag_listed_as: Lost precision when %s %f by 1 */
9477 Perl_ck_warner(aTHX_ packWARN(WARN_IMPRECISION),
9478 "Lost precision when decrementing %" NVff " by 1",
9479 was);
9480 }
9481 (void)SvNOK_only(sv);
9482 SvNV_set(sv, was - 1.0);
9483 return;
9484 }
9485 }
9486
9487 /* treat AV/HV/CV/FM/IO and non-fake GVs as immutable */
9488 if (SvTYPE(sv) >= SVt_PVAV || (isGV_with_GP(sv) && !SvFAKE(sv)))
9489 Perl_croak_no_modify();
9490
9491 if (!(flags & SVp_POK)) {
9492 if ((flags & SVTYPEMASK) < SVt_PVIV)
9493 sv_upgrade(sv, ((flags & SVTYPEMASK) > SVt_IV) ? SVt_PVIV : SVt_IV);
9494 SvIV_set(sv, -1);
9495 (void)SvIOK_only(sv);
9496 return;
9497 }
9498 #ifdef PERL_PRESERVE_IVUV
9499 {
9500 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), NULL);
9501 if (numtype && !(numtype & IS_NUMBER_INFINITY)) {
9502 /* Need to try really hard to see if it's an integer.
9503 9.22337203685478e+18 is an integer.
9504 but "9.22337203685478e+18" + 0 is UV=9223372036854779904
9505 so $a="9.22337203685478e+18"; $a+0; $a--
9506 needs to be the same as $a="9.22337203685478e+18"; $a--
9507 or we go insane. */
9508
9509 (void) sv_2iv(sv);
9510 if (SvIOK(sv))
9511 goto oops_its_int;
9512
9513 /* sv_2iv *should* have made this an NV */
9514 if (flags & SVp_NOK) {
9515 (void)SvNOK_only(sv);
9516 SvNV_set(sv, SvNVX(sv) - 1.0);
9517 return;
9518 }
9519 /* I don't think we can get here. Maybe I should assert this
9520 And if we do get here I suspect that sv_setnv will croak. NWC
9521 Fall through. */
9522 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%" UVxf " NV=%" NVgf "\n",
9523 SvPVX_const(sv), SvIVX(sv), SvNVX(sv)));
9524 }
9525 }
9526 #endif /* PERL_PRESERVE_IVUV */
9527 sv_setnv(sv,Atof(SvPVX_const(sv)) - 1.0); /* punt */
9528 }
9529
9530 /* this define is used to eliminate a chunk of duplicated but shared logic
9531 * it has the suffix __SV_C to signal that it isnt API, and isnt meant to be
9532 * used anywhere but here - yves
9533 */
9534 #define PUSH_EXTEND_MORTAL__SV_C(AnSv) \
9535 STMT_START { \
9536 SSize_t ix = ++PL_tmps_ix; \
9537 if (UNLIKELY(ix >= PL_tmps_max)) \
9538 ix = tmps_grow_p(ix); \
9539 PL_tmps_stack[ix] = (AnSv); \
9540 } STMT_END
9541
9542 /*
9543 =for apidoc sv_mortalcopy
9544
9545 Creates a new SV which is a copy of the original SV (using C<sv_setsv>).
9546 The new SV is marked as mortal. It will be destroyed "soon", either by an
9547 explicit call to C<FREETMPS>, or by an implicit call at places such as
9548 statement boundaries. See also C<L</sv_newmortal>> and C<L</sv_2mortal>>.
9549
9550 =for apidoc sv_mortalcopy_flags
9551
9552 Like C<sv_mortalcopy>, but the extra C<flags> are passed to the
9553 C<sv_setsv_flags>.
9554
9555 =cut
9556 */
9557
9558 /* Make a string that will exist for the duration of the expression
9559 * evaluation. Actually, it may have to last longer than that, but
9560 * hopefully we won't free it until it has been assigned to a
9561 * permanent location. */
9562
9563 SV *
9564 Perl_sv_mortalcopy_flags(pTHX_ SV *const oldstr, U32 flags)
9565 {
9566 SV *sv;
9567
9568 if (flags & SV_GMAGIC)
9569 SvGETMAGIC(oldstr); /* before new_SV, in case it dies */
9570 new_SV(sv);
9571 sv_setsv_flags(sv,oldstr,flags & ~SV_GMAGIC);
9572 PUSH_EXTEND_MORTAL__SV_C(sv);
9573 SvTEMP_on(sv);
9574 return sv;
9575 }
9576
9577 /*
9578 =for apidoc sv_newmortal
9579
9580 Creates a new null SV which is mortal. The reference count of the SV is
9581 set to 1. It will be destroyed "soon", either by an explicit call to
9582 C<FREETMPS>, or by an implicit call at places such as statement boundaries.
9583 See also C<L</sv_mortalcopy>> and C<L</sv_2mortal>>.
9584
9585 =cut
9586 */
9587
9588 SV *
9589 Perl_sv_newmortal(pTHX)
9590 {
9591 SV *sv;
9592
9593 new_SV(sv);
9594 SvFLAGS(sv) = SVs_TEMP;
9595 PUSH_EXTEND_MORTAL__SV_C(sv);
9596 return sv;
9597 }
9598
9599
9600 /*
9601 =for apidoc newSVpvn_flags
9602
9603 Creates a new SV and copies a string (which may contain C<NUL> (C<\0>)
9604 characters) into it. The reference count for the
9605 SV is set to 1. Note that if C<len> is zero, Perl will create a zero length
9606 string. You are responsible for ensuring that the source string is at least
9607 C<len> bytes long. If the C<s> argument is NULL the new SV will be undefined.
9608 Currently the only flag bits accepted are C<SVf_UTF8> and C<SVs_TEMP>.
9609 If C<SVs_TEMP> is set, then C<sv_2mortal()> is called on the result before
9610 returning. If C<SVf_UTF8> is set, C<s>
9611 is considered to be in UTF-8 and the
9612 C<SVf_UTF8> flag will be set on the new SV.
9613 C<newSVpvn_utf8()> is a convenience wrapper for this function, defined as
9614
9615 #define newSVpvn_utf8(s, len, u) \
9616 newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0)
9617
9618 =for apidoc Amnh||SVs_TEMP
9619
9620 =cut
9621 */
9622
9623 SV *
9624 Perl_newSVpvn_flags(pTHX_ const char *const s, const STRLEN len, const U32 flags)
9625 {
9626 SV *sv;
9627
9628 /* All the flags we don't support must be zero.
9629 And we're new code so I'm going to assert this from the start. */
9630 assert(!(flags & ~(SVf_UTF8|SVs_TEMP)));
9631 sv = newSV_type(SVt_PV);
9632 sv_setpvn_fresh(sv,s,len);
9633
9634 /* This code used to do a sv_2mortal(), however we now unroll the call to
9635 * sv_2mortal() and do what it does ourselves here. Since we have asserted
9636 * that flags can only have the SVf_UTF8 and/or SVs_TEMP flags set above we
9637 * can use it to enable the sv flags directly (bypassing SvTEMP_on), which
9638 * in turn means we don't need to mask out the SVf_UTF8 flag below, which
9639 * means that we eliminate quite a few steps than it looks - Yves
9640 * (explaining patch by gfx) */
9641
9642 SvFLAGS(sv) |= flags;
9643
9644 if(flags & SVs_TEMP){
9645 PUSH_EXTEND_MORTAL__SV_C(sv);
9646 }
9647
9648 return sv;
9649 }
9650
9651 /*
9652 =for apidoc sv_2mortal
9653
9654 Marks an existing SV as mortal. The SV will be destroyed "soon", either
9655 by an explicit call to C<FREETMPS>, or by an implicit call at places such as
9656 statement boundaries. C<SvTEMP()> is turned on which means that the SV's
9657 string buffer can be "stolen" if this SV is copied. See also
9658 C<L</sv_newmortal>> and C<L</sv_mortalcopy>>.
9659
9660 =cut
9661 */
9662
9663 SV *
9664 Perl_sv_2mortal(pTHX_ SV *const sv)
9665 {
9666 if (!sv)
9667 return sv;
9668 if (SvIMMORTAL(sv))
9669 return sv;
9670 PUSH_EXTEND_MORTAL__SV_C(sv);
9671 SvTEMP_on(sv);
9672 return sv;
9673 }
9674
9675 /*
9676 =for apidoc newSVpv
9677
9678 Creates a new SV and copies a string (which may contain C<NUL> (C<\0>)
9679 characters) into it. The reference count for the
9680 SV is set to 1. If C<len> is zero, Perl will compute the length using
9681 C<strlen()>, (which means if you use this option, that C<s> can't have embedded
9682 C<NUL> characters and has to have a terminating C<NUL> byte).
9683
9684 This function can cause reliability issues if you are likely to pass in
9685 empty strings that are not null terminated, because it will run
9686 strlen on the string and potentially run past valid memory.
9687
9688 Using L</newSVpvn> is a safer alternative for non C<NUL> terminated strings.
9689 For string literals use L</newSVpvs> instead. This function will work fine for
9690 C<NUL> terminated strings, but if you want to avoid the if statement on whether
9691 to call C<strlen> use C<newSVpvn> instead (calling C<strlen> yourself).
9692
9693 =cut
9694 */
9695
9696 SV *
9697 Perl_newSVpv(pTHX_ const char *const s, const STRLEN len)
9698 {
9699 SV *sv = newSV_type(SVt_PV);
9700 sv_setpvn_fresh(sv, s, len || s == NULL ? len : strlen(s));
9701 return sv;
9702 }
9703
9704 /*
9705 =for apidoc newSVpvn
9706
9707 Creates a new SV and copies a string into it, which may contain C<NUL> characters
9708 (C<\0>) and other binary data. The reference count for the SV is set to 1.
9709 Note that if C<len> is zero, Perl will create a zero length (Perl) string. You
9710 are responsible for ensuring that the source buffer is at least
9711 C<len> bytes long. If the C<buffer> argument is NULL the new SV will be
9712 undefined.
9713
9714 =cut
9715 */
9716
9717 SV *
9718 Perl_newSVpvn(pTHX_ const char *const buffer, const STRLEN len)
9719 {
9720 SV *sv = newSV_type(SVt_PV);
9721 sv_setpvn_fresh(sv,buffer,len);
9722 return sv;
9723 }
9724
9725 /*
9726 =for apidoc newSVhek_mortal
9727
9728 Creates a new mortal SV from the hash key structure. It will generate
9729 scalars that point to the shared string table where possible. Returns
9730 a new (undefined) SV if C<hek> is NULL.
9731
9732 This is more efficient than using sv_2mortal(newSVhek( ... ))
9733
9734 =cut
9735 */
9736
9737 SV *
9738 Perl_newSVhek_mortal(pTHX_ const HEK *const hek)
9739 {
9740 SV * const sv = newSVhek(hek);
9741 assert(sv);
9742 assert(!SvIMMORTAL(sv));
9743
9744 PUSH_EXTEND_MORTAL__SV_C(sv);
9745 SvTEMP_on(sv);
9746 return sv;
9747 }
9748
9749 /*
9750 =for apidoc newSVhek
9751
9752 Creates a new SV from the hash key structure. It will generate scalars that
9753 point to the shared string table where possible. Returns a new (undefined)
9754 SV if C<hek> is NULL.
9755
9756 =cut
9757 */
9758
9759 SV *
9760 Perl_newSVhek(pTHX_ const HEK *const hek)
9761 {
9762 if (!hek) {
9763 SV *sv;
9764
9765 new_SV(sv);
9766 return sv;
9767 }
9768
9769 if (HEK_LEN(hek) == HEf_SVKEY) {
9770 return newSVsv(*(SV**)HEK_KEY(hek));
9771 } else {
9772 const int flags = HEK_FLAGS(hek);
9773 if (flags & HVhek_WASUTF8) {
9774 /* Trouble :-)
9775 Andreas would like keys he put in as utf8 to come back as utf8
9776 */
9777 STRLEN utf8_len = HEK_LEN(hek);
9778 SV * const sv = newSV_type(SVt_PV);
9779 char *as_utf8 = (char *)bytes_to_utf8 ((U8*)HEK_KEY(hek), &utf8_len);
9780 /* bytes_to_utf8() allocates a new string, which we can repurpose: */
9781 sv_usepvn_flags(sv, as_utf8, utf8_len, SV_HAS_TRAILING_NUL);
9782 SvUTF8_on (sv);
9783 return sv;
9784 } else if (flags & HVhek_NOTSHARED) {
9785 /* A hash that isn't using shared hash keys has to have
9786 the flag in every key so that we know not to try to call
9787 share_hek_hek on it. */
9788
9789 SV * const sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek));
9790 if (HEK_UTF8(hek))
9791 SvUTF8_on (sv);
9792 return sv;
9793 }
9794 /* This will be overwhelmingly the most common case. */
9795 {
9796 /* Inline most of newSVpvn_share(), because share_hek_hek() is far
9797 more efficient than sharepvn(). */
9798 SV *sv = newSV_type(SVt_PV);
9799
9800 SvPV_set(sv, (char *)HEK_KEY(share_hek_hek(hek)));
9801 SvCUR_set(sv, HEK_LEN(hek));
9802 SvLEN_set(sv, 0);
9803 SvIsCOW_on(sv);
9804 SvPOK_on(sv);
9805 if (HEK_UTF8(hek))
9806 SvUTF8_on(sv);
9807 return sv;
9808 }
9809 }
9810 }
9811
9812 /*
9813 =for apidoc newSVpvn_share
9814
9815 Creates a new SV with its C<SvPVX_const> pointing to a shared string in the string
9816 table. If the string does not already exist in the table, it is
9817 created first. Turns on the C<SvIsCOW> flag (or C<READONLY>
9818 and C<FAKE> in 5.16 and earlier). If the C<hash> parameter
9819 is non-zero, that value is used; otherwise the hash is computed.
9820 The string's hash can later be retrieved from the SV
9821 with the C<L</SvSHARED_HASH>> macro. The idea here is
9822 that as the string table is used for shared hash keys these strings will have
9823 C<SvPVX_const == HeKEY> and hash lookup will avoid string compare.
9824
9825 =cut
9826 */
9827
9828 SV *
9829 Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash)
9830 {
9831 SV *sv;
9832 bool is_utf8 = FALSE;
9833 const char *const orig_src = src;
9834
9835 if (len < 0) {
9836 STRLEN tmplen = -len;
9837 is_utf8 = TRUE;
9838 /* See the note in hv.c:hv_fetch() --jhi */
9839 src = (char*)bytes_from_utf8((const U8*)src, &tmplen, &is_utf8);
9840 len = tmplen;
9841 }
9842 if (!hash)
9843 PERL_HASH(hash, src, len);
9844 sv = newSV_type(SVt_PV);
9845 /* The logic for this is inlined in S_mro_get_linear_isa_dfs(), so if it
9846 changes here, update it there too. */
9847 SvPV_set(sv, sharepvn(src, is_utf8?-len:len, hash));
9848 SvCUR_set(sv, len);
9849 SvLEN_set(sv, 0);
9850 SvIsCOW_on(sv);
9851 SvPOK_on(sv);
9852 if (is_utf8)
9853 SvUTF8_on(sv);
9854 if (src != orig_src)
9855 Safefree(src);
9856 return sv;
9857 }
9858
9859 /*
9860 =for apidoc newSVpv_share
9861
9862 Like C<newSVpvn_share>, but takes a C<NUL>-terminated string instead of a
9863 string/length pair.
9864
9865 =cut
9866 */
9867
9868 SV *
9869 Perl_newSVpv_share(pTHX_ const char *src, U32 hash)
9870 {
9871 return newSVpvn_share(src, strlen(src), hash);
9872 }
9873
9874 #if defined(MULTIPLICITY)
9875
9876 /* pTHX_ magic can't cope with varargs, so this is a no-context
9877 * version of the main function, (which may itself be aliased to us).
9878 * Don't access this version directly.
9879 */
9880
9881 SV *
9882 Perl_newSVpvf_nocontext(const char *const pat, ...)
9883 {
9884 dTHX;
9885 SV *sv;
9886 va_list args;
9887
9888 PERL_ARGS_ASSERT_NEWSVPVF_NOCONTEXT;
9889
9890 va_start(args, pat);
9891 sv = vnewSVpvf(pat, &args);
9892 va_end(args);
9893 return sv;
9894 }
9895 #endif
9896
9897 /*
9898 =for apidoc newSVpvf
9899
9900 Creates a new SV and initializes it with the string formatted like
9901 C<sv_catpvf>.
9902
9903 =for apidoc newSVpvf_nocontext
9904 Like C<L</newSVpvf>> but does not take a thread context (C<aTHX>) parameter,
9905 so is used in situations where the caller doesn't already have the thread
9906 context.
9907
9908 =for apidoc vnewSVpvf
9909 Like C<L</newSVpvf>> but the arguments are an encapsulated argument list.
9910
9911 =cut
9912 */
9913
9914 SV *
9915 Perl_newSVpvf(pTHX_ const char *const pat, ...)
9916 {
9917 SV *sv;
9918 va_list args;
9919
9920 PERL_ARGS_ASSERT_NEWSVPVF;
9921
9922 va_start(args, pat);
9923 sv = vnewSVpvf(pat, &args);
9924 va_end(args);
9925 return sv;
9926 }
9927
9928 /* backend for newSVpvf() and newSVpvf_nocontext() */
9929
9930 SV *
9931 Perl_vnewSVpvf(pTHX_ const char *const pat, va_list *const args)
9932 {
9933 SV *sv;
9934
9935 PERL_ARGS_ASSERT_VNEWSVPVF;
9936
9937 sv = newSV(1);
9938 SvPVCLEAR_FRESH(sv);
9939 sv_vcatpvfn_flags(sv, pat, strlen(pat), args, NULL, 0, NULL, 0);
9940 return sv;
9941 }
9942
9943 /*
9944 =for apidoc newSVnv
9945
9946 Creates a new SV and copies a floating point value into it.
9947 The reference count for the SV is set to 1.
9948
9949 =cut
9950 */
9951
9952 SV *
9953 Perl_newSVnv(pTHX_ const NV n)
9954 {
9955 SV *sv = newSV_type(SVt_NV);
9956 (void)SvNOK_on(sv);
9957
9958 SvNV_set(sv, n);
9959 SvTAINT(sv);
9960
9961 return sv;
9962 }
9963
9964 /*
9965 =for apidoc newSViv
9966
9967 Creates a new SV and copies an integer into it. The reference count for the
9968 SV is set to 1.
9969
9970 =cut
9971 */
9972
9973 SV *
9974 Perl_newSViv(pTHX_ const IV i)
9975 {
9976 SV *sv = newSV_type(SVt_IV);
9977 (void)SvIOK_on(sv);
9978
9979 SvIV_set(sv, i);
9980 SvTAINT(sv);
9981
9982 return sv;
9983 }
9984
9985 /*
9986 =for apidoc newSVuv
9987
9988 Creates a new SV and copies an unsigned integer into it.
9989 The reference count for the SV is set to 1.
9990
9991 =cut
9992 */
9993
9994 SV *
9995 Perl_newSVuv(pTHX_ const UV u)
9996 {
9997 SV *sv;
9998
9999 /* Inlining ONLY the small relevant subset of sv_setuv here
10000 * for performance. Makes a significant difference. */
10001
10002 /* Using ivs is more efficient than using uvs - see sv_setuv */
10003 if (u <= (UV)IV_MAX) {
10004 return newSViv((IV)u);
10005 }
10006
10007 new_SV(sv);
10008
10009 /* We're starting from SVt_FIRST, so provided that's
10010 * actual 0, we don't have to unset any SV type flags
10011 * to promote to SVt_IV. */
10012 STATIC_ASSERT_STMT(SVt_FIRST == 0);
10013
10014 SET_SVANY_FOR_BODYLESS_IV(sv);
10015 SvFLAGS(sv) |= SVt_IV;
10016 (void)SvIOK_on(sv);
10017 (void)SvIsUV_on(sv);
10018
10019 SvUV_set(sv, u);
10020 SvTAINT(sv);
10021
10022 return sv;
10023 }
10024
10025 /*
10026 =for apidoc newSVbool
10027
10028 Creates a new SV boolean.
10029
10030 =cut
10031 */
10032
10033 SV *
10034 Perl_newSVbool(pTHX_ bool bool_val)
10035 {
10036 PERL_ARGS_ASSERT_NEWSVBOOL;
10037 SV *sv = newSVsv(bool_val ? &PL_sv_yes : &PL_sv_no);
10038
10039 return sv;
10040 }
10041
10042 /*
10043 =for apidoc newSV_true
10044
10045 Creates a new SV that is a boolean true.
10046
10047 =cut
10048 */
10049 SV *
10050 Perl_newSV_true(pTHX)
10051 {
10052 PERL_ARGS_ASSERT_NEWSV_TRUE;
10053 SV *sv = newSVsv(&PL_sv_yes);
10054
10055 return sv;
10056 }
10057
10058 /*
10059 =for apidoc newSV_false
10060
10061 Creates a new SV that is a boolean false.
10062
10063 =cut
10064 */
10065
10066 SV *
10067 Perl_newSV_false(pTHX)
10068 {
10069 PERL_ARGS_ASSERT_NEWSV_FALSE;
10070 SV *sv = newSVsv(&PL_sv_no);
10071
10072 return sv;
10073 }
10074
10075 /* newRV_inc is the official function name to use now.
10076 * newRV_inc is in fact #defined to newRV in sv.h
10077 */
10078
10079 SV *
10080 Perl_newRV(pTHX_ SV *const sv)
10081 {
10082 PERL_ARGS_ASSERT_NEWRV;
10083
10084 return newRV_noinc(SvREFCNT_inc_simple_NN(sv));
10085 }
10086
10087 /*
10088 =for apidoc newSVsv
10089 =for apidoc_item newSVsv_flags
10090 =for apidoc_item newSVsv_nomg
10091
10092 These create a new SV which is an exact duplicate of the original SV
10093 (using C<sv_setsv>.)
10094
10095 They differ only in that C<newSVsv> performs 'get' magic; C<newSVsv_nomg> skips
10096 any magic; and C<newSVsv_flags> allows you to explicitly set a C<flags>
10097 parameter.
10098
10099 =cut
10100 */
10101
10102 SV *
10103 Perl_newSVsv_flags(pTHX_ SV *const old, I32 flags)
10104 {
10105 SV *sv;
10106
10107 if (!old)
10108 return NULL;
10109 if (SvIS_FREED(old)) {
10110 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), "semi-panic: attempt to dup freed string");
10111 return NULL;
10112 }
10113 /* Do this here, otherwise we leak the new SV if this croaks. */
10114 if (flags & SV_GMAGIC)
10115 SvGETMAGIC(old);
10116 new_SV(sv);
10117 sv_setsv_flags(sv, old, flags & ~SV_GMAGIC);
10118 return sv;
10119 }
10120
10121 /*
10122 =for apidoc sv_reset
10123
10124 Underlying implementation for the C<reset> Perl function.
10125 Note that the perl-level function is vaguely deprecated.
10126
10127 =cut
10128 */
10129
10130 void
10131 Perl_sv_reset(pTHX_ const char *s, HV *const stash)
10132 {
10133 PERL_ARGS_ASSERT_SV_RESET;
10134
10135 sv_resetpvn(*s ? s : NULL, strlen(s), stash);
10136 }
10137
10138 void
10139 Perl_sv_resetpvn(pTHX_ const char *s, STRLEN len, HV * const stash)
10140 {
10141 char todo[PERL_UCHAR_MAX+1];
10142 const char *send;
10143
10144 if (!stash || SvTYPE(stash) != SVt_PVHV)
10145 return;
10146
10147 if (!s) { /* reset ?? searches */
10148 MAGIC * const mg = mg_find((const SV *)stash, PERL_MAGIC_symtab);
10149 if (mg && mg->mg_len) {
10150 const U32 count = mg->mg_len / sizeof(PMOP**);
10151 PMOP **pmp = (PMOP**) mg->mg_ptr;
10152 PMOP *const *const end = pmp + count;
10153
10154 while (pmp < end) {
10155 #ifdef USE_ITHREADS
10156 SvREADONLY_off(PL_regex_pad[(*pmp)->op_pmoffset]);
10157 #else
10158 (*pmp)->op_pmflags &= ~PMf_USED;
10159 #endif
10160 ++pmp;
10161 }
10162 }
10163 return;
10164 }
10165
10166 /* reset variables */
10167
10168 if (!HvTOTALKEYS(stash))
10169 return;
10170
10171 Zero(todo, 256, char);
10172 send = s + len;
10173 while (s < send) {
10174 I32 max;
10175 I32 i = (unsigned char)*s;
10176 if (s[1] == '-') {
10177 s += 2;
10178 }
10179 max = (unsigned char)*s++;
10180 for ( ; i <= max; i++) {
10181 todo[i] = 1;
10182 }
10183 for (i = 0; i <= (I32) HvMAX(stash); i++) {
10184 HE *entry;
10185 for (entry = HvARRAY(stash)[i];
10186 entry;
10187 entry = HeNEXT(entry))
10188 {
10189 GV *gv;
10190 SV *sv;
10191
10192 if (!todo[(U8)*HeKEY(entry)])
10193 continue;
10194 gv = MUTABLE_GV(HeVAL(entry));
10195 if (!isGV(gv))
10196 continue;
10197 sv = GvSV(gv);
10198 if (sv && !SvREADONLY(sv)) {
10199 SV_CHECK_THINKFIRST_COW_DROP(sv);
10200 if (!isGV(sv)) {
10201 SvOK_off(sv);
10202 SvSETMAGIC(sv);
10203 }
10204 }
10205 if (GvAV(gv)) {
10206 av_clear(GvAV(gv));
10207 }
10208 if (GvHV(gv) && !HvHasNAME(GvHV(gv))) {
10209 hv_clear(GvHV(gv));
10210 }
10211 }
10212 }
10213 }
10214 }
10215
10216 /*
10217 =for apidoc sv_2io
10218
10219 Using various gambits, try to get an IO from an SV: the IO slot if its a
10220 GV; or the recursive result if we're an RV; or the IO slot of the symbol
10221 named after the PV if we're a string.
10222
10223 'Get' magic is ignored on the C<sv> passed in, but will be called on
10224 C<SvRV(sv)> if C<sv> is an RV.
10225
10226 =cut
10227 */
10228
10229 IO*
10230 Perl_sv_2io(pTHX_ SV *const sv)
10231 {
10232 IO* io;
10233 GV* gv;
10234
10235 PERL_ARGS_ASSERT_SV_2IO;
10236
10237 switch (SvTYPE(sv)) {
10238 case SVt_PVIO:
10239 io = MUTABLE_IO(sv);
10240 break;
10241 case SVt_PVGV:
10242 case SVt_PVLV:
10243 if (isGV_with_GP(sv)) {
10244 gv = MUTABLE_GV(sv);
10245 io = GvIO(gv);
10246 if (!io)
10247 Perl_croak(aTHX_ "Bad filehandle: %" HEKf,
10248 HEKfARG(GvNAME_HEK(gv)));
10249 break;
10250 }
10251 /* FALLTHROUGH */
10252 default:
10253 if (!SvOK(sv))
10254 Perl_croak(aTHX_ PL_no_usym, "filehandle");
10255 if (SvROK(sv)) {
10256 SvGETMAGIC(SvRV(sv));
10257 return sv_2io(SvRV(sv));
10258 }
10259 gv = gv_fetchsv_nomg(sv, 0, SVt_PVIO);
10260 if (gv)
10261 io = GvIO(gv);
10262 else
10263 io = 0;
10264 if (!io) {
10265 SV *newsv = sv;
10266 if (SvGMAGICAL(sv)) {
10267 newsv = sv_newmortal();
10268 sv_setsv_nomg(newsv, sv);
10269 }
10270 Perl_croak(aTHX_ "Bad filehandle: %" SVf, SVfARG(newsv));
10271 }
10272 break;
10273 }
10274 return io;
10275 }
10276
10277 /*
10278 =for apidoc sv_2cv
10279
10280 Using various gambits, try to get a CV from an SV; in addition, try if
10281 possible to set C<*st> and C<*gvp> to the stash and GV associated with it.
10282 The flags in C<lref> are passed to C<gv_fetchsv>.
10283
10284 =cut
10285 */
10286
10287 CV *
10288 Perl_sv_2cv(pTHX_ SV *sv, HV **const st, GV **const gvp, const I32 lref)
10289 {
10290 GV *gv = NULL;
10291 CV *cv = NULL;
10292
10293 PERL_ARGS_ASSERT_SV_2CV;
10294
10295 if (!sv) {
10296 *st = NULL;
10297 *gvp = NULL;
10298 return NULL;
10299 }
10300 switch (SvTYPE(sv)) {
10301 case SVt_PVCV:
10302 *st = CvSTASH(sv);
10303 *gvp = NULL;
10304 return MUTABLE_CV(sv);
10305 case SVt_PVHV:
10306 case SVt_PVAV:
10307 *st = NULL;
10308 *gvp = NULL;
10309 return NULL;
10310 default:
10311 SvGETMAGIC(sv);
10312 if (SvROK(sv)) {
10313 if (SvAMAGIC(sv))
10314 sv = amagic_deref_call(sv, to_cv_amg);
10315
10316 sv = SvRV(sv);
10317 if (SvTYPE(sv) == SVt_PVCV) {
10318 cv = MUTABLE_CV(sv);
10319 *gvp = NULL;
10320 *st = CvSTASH(cv);
10321 return cv;
10322 }
10323 else if(SvGETMAGIC(sv), isGV_with_GP(sv))
10324 gv = MUTABLE_GV(sv);
10325 else
10326 Perl_croak(aTHX_ "Not a subroutine reference");
10327 }
10328 else if (isGV_with_GP(sv)) {
10329 gv = MUTABLE_GV(sv);
10330 }
10331 else {
10332 gv = gv_fetchsv_nomg(sv, lref, SVt_PVCV);
10333 }
10334 *gvp = gv;
10335 if (!gv) {
10336 *st = NULL;
10337 return NULL;
10338 }
10339 /* Some flags to gv_fetchsv mean don't really create the GV */
10340 if (!isGV_with_GP(gv)) {
10341 *st = NULL;
10342 return NULL;
10343 }
10344 *st = GvESTASH(gv);
10345 if (lref & ~GV_ADDMG && !GvCVu(gv)) {
10346 /* XXX this is probably not what they think they're getting.
10347 * It has the same effect as "sub name;", i.e. just a forward
10348 * declaration! */
10349 newSTUB(gv,0);
10350 }
10351 return GvCVu(gv);
10352 }
10353 }
10354
10355 /*
10356 =for apidoc sv_true
10357
10358 Returns true if the SV has a true value by Perl's rules.
10359 Use the C<SvTRUE> macro instead, which may call C<sv_true()> or may
10360 instead use an in-line version.
10361
10362 =cut
10363 */
10364
10365 I32
10366 Perl_sv_true(pTHX_ SV *const sv)
10367 {
10368 if (!sv)
10369 return 0;
10370 if (SvPOK(sv)) {
10371 const XPV* const tXpv = (XPV*)SvANY(sv);
10372 if (tXpv &&
10373 (tXpv->xpv_cur > 1 ||
10374 (tXpv->xpv_cur && *sv->sv_u.svu_pv != '0')))
10375 return 1;
10376 else
10377 return 0;
10378 }
10379 else {
10380 if (SvIOK(sv))
10381 return SvIVX(sv) != 0;
10382 else {
10383 if (SvNOK(sv))
10384 return SvNVX(sv) != 0.0;
10385 else
10386 return sv_2bool(sv);
10387 }
10388 }
10389 }
10390
10391 /*
10392 =for apidoc sv_pvn_force
10393
10394 Get a sensible string out of the SV somehow.
10395 A private implementation of the C<SvPV_force> macro for compilers which
10396 can't cope with complex macro expressions. Always use the macro instead.
10397
10398 =for apidoc sv_pvn_force_flags
10399
10400 Get a sensible string out of the SV somehow.
10401 If C<flags> has the C<SV_GMAGIC> bit set, will C<L</mg_get>> on C<sv> if
10402 appropriate, else not. C<sv_pvn_force> and C<sv_pvn_force_nomg> are
10403 implemented in terms of this function.
10404 You normally want to use the various wrapper macros instead: see
10405 C<L</SvPV_force>> and C<L</SvPV_force_nomg>>.
10406
10407 =cut
10408 */
10409
10410 char *
10411 Perl_sv_pvn_force_flags(pTHX_ SV *const sv, STRLEN *const lp, const U32 flags)
10412 {
10413 PERL_ARGS_ASSERT_SV_PVN_FORCE_FLAGS;
10414
10415 if (flags & SV_GMAGIC) SvGETMAGIC(sv);
10416 if (SvTHINKFIRST(sv) && (!SvROK(sv) || SvREADONLY(sv)))
10417 sv_force_normal_flags(sv, 0);
10418
10419 if (SvPOK(sv)) {
10420 if (lp)
10421 *lp = SvCUR(sv);
10422 }
10423 else {
10424 char *s;
10425 STRLEN len;
10426
10427 if (SvTYPE(sv) > SVt_PVLV
10428 || isGV_with_GP(sv))
10429 /* diag_listed_as: Can't coerce %s to %s in %s */
10430 Perl_croak(aTHX_ "Can't coerce %s to string in %s", sv_reftype(sv,0),
10431 OP_DESC(PL_op));
10432 s = sv_2pv_flags(sv, &len, flags &~ SV_GMAGIC);
10433 if (!s) {
10434 s = (char *)"";
10435 }
10436 if (lp)
10437 *lp = len;
10438
10439 if (SvTYPE(sv) < SVt_PV ||
10440 s != SvPVX_const(sv)) { /* Almost, but not quite, sv_setpvn() */
10441 if (SvROK(sv))
10442 sv_unref(sv);
10443 SvUPGRADE(sv, SVt_PV); /* Never FALSE */
10444 SvGROW(sv, len + 1);
10445 Move(s,SvPVX(sv),len,char);
10446 SvCUR_set(sv, len);
10447 SvPVX(sv)[len] = '\0';
10448 }
10449 if (!SvPOK(sv)) {
10450 SvPOK_on(sv); /* validate pointer */
10451 SvTAINT(sv);
10452 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2pv(%s)\n",
10453 PTR2UV(sv),SvPVX_const(sv)));
10454 }
10455 }
10456 (void)SvPOK_only_UTF8(sv);
10457 return SvPVX_mutable(sv);
10458 }
10459
10460 /*
10461 =for apidoc sv_pvbyten_force
10462
10463 The backend for the C<SvPVbytex_force> macro. Always use the macro
10464 instead. If the SV cannot be downgraded from UTF-8, this croaks.
10465
10466 =cut
10467 */
10468
10469 char *
10470 Perl_sv_pvbyten_force(pTHX_ SV *const sv, STRLEN *const lp)
10471 {
10472 PERL_ARGS_ASSERT_SV_PVBYTEN_FORCE;
10473
10474 sv_pvn_force(sv,lp);
10475 (void)sv_utf8_downgrade(sv,0);
10476 *lp = SvCUR(sv);
10477 return SvPVX(sv);
10478 }
10479
10480 /*
10481 =for apidoc sv_pvutf8n_force
10482
10483 The backend for the C<SvPVutf8x_force> macro. Always use the macro
10484 instead.
10485
10486 =cut
10487 */
10488
10489 char *
10490 Perl_sv_pvutf8n_force(pTHX_ SV *const sv, STRLEN *const lp)
10491 {
10492 PERL_ARGS_ASSERT_SV_PVUTF8N_FORCE;
10493
10494 sv_pvn_force(sv,0);
10495 sv_utf8_upgrade_nomg(sv);
10496 *lp = SvCUR(sv);
10497 return SvPVX(sv);
10498 }
10499
10500 /*
10501 =for apidoc sv_reftype
10502
10503 Returns a string describing what the SV is a reference to.
10504
10505 If ob is true and the SV is blessed, the string is the class name,
10506 otherwise it is the type of the SV, "SCALAR", "ARRAY" etc.
10507
10508 =cut
10509 */
10510
10511 const char *
10512 Perl_sv_reftype(pTHX_ const SV *const sv, const int ob)
10513 {
10514 PERL_ARGS_ASSERT_SV_REFTYPE;
10515 if (ob && SvOBJECT(sv)) {
10516 return SvPV_nolen_const(sv_ref(NULL, sv, ob));
10517 }
10518 else {
10519 /* WARNING - There is code, for instance in mg.c, that assumes that
10520 * the only reason that sv_reftype(sv,0) would return a string starting
10521 * with 'L' or 'S' is that it is a LVALUE or a SCALAR.
10522 * Yes this a dodgy way to do type checking, but it saves practically reimplementing
10523 * this routine inside other subs, and it saves time.
10524 * Do not change this assumption without searching for "dodgy type check" in
10525 * the code.
10526 * - Yves */
10527 switch (SvTYPE(sv)) {
10528 case SVt_NULL:
10529 case SVt_IV:
10530 case SVt_NV:
10531 case SVt_PV:
10532 case SVt_PVIV:
10533 case SVt_PVNV:
10534 case SVt_PVMG:
10535 if (SvVOK(sv))
10536 return "VSTRING";
10537 if (SvROK(sv))
10538 return "REF";
10539 else
10540 return "SCALAR";
10541
10542 case SVt_PVLV: return (char *) (SvROK(sv) ? "REF"
10543 /* tied lvalues should appear to be
10544 * scalars for backwards compatibility */
10545 : (isALPHA_FOLD_EQ(LvTYPE(sv), 't'))
10546 ? "SCALAR" : "LVALUE");
10547 case SVt_PVAV: return "ARRAY";
10548 case SVt_PVHV: return "HASH";
10549 case SVt_PVCV: return "CODE";
10550 case SVt_PVGV: return (char *) (isGV_with_GP(sv)
10551 ? "GLOB" : "SCALAR");
10552 case SVt_PVFM: return "FORMAT";
10553 case SVt_PVIO: return "IO";
10554 case SVt_INVLIST: return "INVLIST";
10555 case SVt_REGEXP: return "REGEXP";
10556 case SVt_PVOBJ: return "OBJECT";
10557 default: return "UNKNOWN";
10558 }
10559 }
10560 }
10561
10562 /*
10563 =for apidoc sv_ref
10564
10565 Returns a SV describing what the SV passed in is a reference to.
10566
10567 dst can be a SV to be set to the description or NULL, in which case a
10568 mortal SV is returned.
10569
10570 If ob is true and the SV is blessed, the description is the class
10571 name, otherwise it is the type of the SV, "SCALAR", "ARRAY" etc.
10572
10573 =cut
10574 */
10575
10576 SV *
10577 Perl_sv_ref(pTHX_ SV *dst, const SV *const sv, const int ob)
10578 {
10579 PERL_ARGS_ASSERT_SV_REF;
10580
10581 if (!dst)
10582 dst = sv_newmortal();
10583
10584 if (ob && SvOBJECT(sv)) {
10585 if (HvHasNAME(SvSTASH(sv)))
10586 sv_sethek(dst, HvNAME_HEK(SvSTASH(sv)));
10587 else
10588 sv_setpvs(dst, "__ANON__");
10589 }
10590 else {
10591 const char * reftype = sv_reftype(sv, 0);
10592 sv_setpv(dst, reftype);
10593 }
10594 return dst;
10595 }
10596
10597 /*
10598 =for apidoc sv_isobject
10599
10600 Returns a boolean indicating whether the SV is an RV pointing to a blessed
10601 object. If the SV is not an RV, or if the object is not blessed, then this
10602 will return false.
10603
10604 =cut
10605 */
10606
10607 int
10608 Perl_sv_isobject(pTHX_ SV *sv)
10609 {
10610 if (!sv)
10611 return 0;
10612 SvGETMAGIC(sv);
10613 if (!SvROK(sv))
10614 return 0;
10615 sv = SvRV(sv);
10616 if (!SvOBJECT(sv))
10617 return 0;
10618 return 1;
10619 }
10620
10621 /*
10622 =for apidoc sv_isa
10623
10624 Returns a boolean indicating whether the SV is blessed into the specified
10625 class.
10626
10627 This does not check for subtypes or method overloading. Use C<sv_isa_sv> to
10628 verify an inheritance relationship in the same way as the C<isa> operator by
10629 respecting any C<isa()> method overloading; or C<sv_derived_from_sv> to test
10630 directly on the actual object type.
10631
10632 =cut
10633 */
10634
10635 int
10636 Perl_sv_isa(pTHX_ SV *sv, const char *const name)
10637 {
10638 const char *hvname;
10639
10640 PERL_ARGS_ASSERT_SV_ISA;
10641
10642 if (!sv)
10643 return 0;
10644 SvGETMAGIC(sv);
10645 if (!SvROK(sv))
10646 return 0;
10647 sv = SvRV(sv);
10648 if (!SvOBJECT(sv))
10649 return 0;
10650 hvname = HvNAME_get(SvSTASH(sv));
10651 if (!hvname)
10652 return 0;
10653
10654 return strEQ(hvname, name);
10655 }
10656
10657 /*
10658 =for apidoc newSVrv
10659
10660 Creates a new SV for the existing RV, C<rv>, to point to. If C<rv> is not an
10661 RV then it will be upgraded to one. If C<classname> is non-null then the new
10662 SV will be blessed in the specified package. The new SV is returned and its
10663 reference count is 1. The reference count 1 is owned by C<rv>. See also
10664 newRV_inc() and newRV_noinc() for creating a new RV properly.
10665
10666 =cut
10667 */
10668
10669 SV*
10670 Perl_newSVrv(pTHX_ SV *const rv, const char *const classname)
10671 {
10672 SV *sv;
10673
10674 PERL_ARGS_ASSERT_NEWSVRV;
10675
10676 new_SV(sv);
10677
10678 SV_CHECK_THINKFIRST_COW_DROP(rv);
10679
10680 if (UNLIKELY( SvTYPE(rv) >= SVt_PVMG )) {
10681 const U32 refcnt = SvREFCNT(rv);
10682 SvREFCNT(rv) = 0;
10683 sv_clear(rv);
10684 SvFLAGS(rv) = 0;
10685 SvREFCNT(rv) = refcnt;
10686
10687 sv_upgrade(rv, SVt_IV);
10688 } else if (SvROK(rv)) {
10689 SvREFCNT_dec(SvRV(rv));
10690 } else {
10691 prepare_SV_for_RV(rv);
10692 }
10693
10694 SvOK_off(rv);
10695 SvRV_set(rv, sv);
10696 SvROK_on(rv);
10697
10698 if (classname) {
10699 HV* const stash = gv_stashpv(classname, GV_ADD);
10700 (void)sv_bless(rv, stash);
10701 }
10702 return sv;
10703 }
10704
10705 SV *
10706 Perl_newSVavdefelem(pTHX_ AV *av, SSize_t ix, bool extendible)
10707 {
10708 SV * const lv = newSV_type(SVt_PVLV);
10709 PERL_ARGS_ASSERT_NEWSVAVDEFELEM;
10710 LvTYPE(lv) = 'y';
10711 sv_magic(lv, NULL, PERL_MAGIC_defelem, NULL, 0);
10712 LvTARG(lv) = SvREFCNT_inc_simple_NN(av);
10713 LvSTARGOFF(lv) = ix;
10714 LvTARGLEN(lv) = extendible ? 1 : (STRLEN)UV_MAX;
10715 return lv;
10716 }
10717
10718 /*
10719 =for apidoc sv_setref_pv
10720
10721 Copies a pointer into a new SV, optionally blessing the SV. The C<rv>
10722 argument will be upgraded to an RV. That RV will be modified to point to
10723 the new SV. If the C<pv> argument is C<NULL>, then C<PL_sv_undef> will be placed
10724 into the SV. The C<classname> argument indicates the package for the
10725 blessing. Set C<classname> to C<NULL> to avoid the blessing. The new SV
10726 will have a reference count of 1, and the RV will be returned.
10727
10728 Do not use with other Perl types such as HV, AV, SV, CV, because those
10729 objects will become corrupted by the pointer copy process.
10730
10731 Note that C<sv_setref_pvn> copies the string while this copies the pointer.
10732
10733 =cut
10734 */
10735
10736 SV*
10737 Perl_sv_setref_pv(pTHX_ SV *const rv, const char *const classname, void *const pv)
10738 {
10739 PERL_ARGS_ASSERT_SV_SETREF_PV;
10740
10741 if (!pv) {
10742 sv_set_undef(rv);
10743 SvSETMAGIC(rv);
10744 }
10745 else
10746 sv_setiv(newSVrv(rv,classname), PTR2IV(pv));
10747 return rv;
10748 }
10749
10750 /*
10751 =for apidoc sv_setref_iv
10752
10753 Copies an integer into a new SV, optionally blessing the SV. The C<rv>
10754 argument will be upgraded to an RV. That RV will be modified to point to
10755 the new SV. The C<classname> argument indicates the package for the
10756 blessing. Set C<classname> to C<NULL> to avoid the blessing. The new SV
10757 will have a reference count of 1, and the RV will be returned.
10758
10759 =cut
10760 */
10761
10762 SV*
10763 Perl_sv_setref_iv(pTHX_ SV *const rv, const char *const classname, const IV iv)
10764 {
10765 PERL_ARGS_ASSERT_SV_SETREF_IV;
10766
10767 sv_setiv(newSVrv(rv,classname), iv);
10768 return rv;
10769 }
10770
10771 /*
10772 =for apidoc sv_setref_uv
10773
10774 Copies an unsigned integer into a new SV, optionally blessing the SV. The C<rv>
10775 argument will be upgraded to an RV. That RV will be modified to point to
10776 the new SV. The C<classname> argument indicates the package for the
10777 blessing. Set C<classname> to C<NULL> to avoid the blessing. The new SV
10778 will have a reference count of 1, and the RV will be returned.
10779
10780 =cut
10781 */
10782
10783 SV*
10784 Perl_sv_setref_uv(pTHX_ SV *const rv, const char *const classname, const UV uv)
10785 {
10786 PERL_ARGS_ASSERT_SV_SETREF_UV;
10787
10788 sv_setuv(newSVrv(rv,classname), uv);
10789 return rv;
10790 }
10791
10792 /*
10793 =for apidoc sv_setref_nv
10794
10795 Copies a double into a new SV, optionally blessing the SV. The C<rv>
10796 argument will be upgraded to an RV. That RV will be modified to point to
10797 the new SV. The C<classname> argument indicates the package for the
10798 blessing. Set C<classname> to C<NULL> to avoid the blessing. The new SV
10799 will have a reference count of 1, and the RV will be returned.
10800
10801 =cut
10802 */
10803
10804 SV*
10805 Perl_sv_setref_nv(pTHX_ SV *const rv, const char *const classname, const NV nv)
10806 {
10807 PERL_ARGS_ASSERT_SV_SETREF_NV;
10808
10809 sv_setnv(newSVrv(rv,classname), nv);
10810 return rv;
10811 }
10812
10813 /*
10814 =for apidoc sv_setref_pvn
10815
10816 Copies a string into a new SV, optionally blessing the SV. The length of the
10817 string must be specified with C<n>. The C<rv> argument will be upgraded to
10818 an RV. That RV will be modified to point to the new SV. The C<classname>
10819 argument indicates the package for the blessing. Set C<classname> to
10820 C<NULL> to avoid the blessing. The new SV will have a reference count
10821 of 1, and the RV will be returned.
10822
10823 Note that C<sv_setref_pv> copies the pointer while this copies the string.
10824
10825 =cut
10826 */
10827
10828 SV*
10829 Perl_sv_setref_pvn(pTHX_ SV *const rv, const char *const classname,
10830 const char *const pv, const STRLEN n)
10831 {
10832 PERL_ARGS_ASSERT_SV_SETREF_PVN;
10833
10834 sv_setpvn(newSVrv(rv,classname), pv, n);
10835 return rv;
10836 }
10837
10838 /*
10839 =for apidoc sv_bless
10840
10841 Blesses an SV into a specified package. The SV must be an RV. The package
10842 must be designated by its stash (see C<L</gv_stashpv>>). The reference count
10843 of the SV is unaffected.
10844
10845 =cut
10846 */
10847
10848 SV*
10849 Perl_sv_bless(pTHX_ SV *const sv, HV *const stash)
10850 {
10851 SV *tmpRef;
10852 HV *oldstash = NULL;
10853
10854 PERL_ARGS_ASSERT_SV_BLESS;
10855
10856 SvGETMAGIC(sv);
10857 if (!SvROK(sv))
10858 Perl_croak(aTHX_ "Can't bless non-reference value");
10859 if (HvSTASH_IS_CLASS(stash))
10860 Perl_croak(aTHX_ "Attempt to bless into a class");
10861
10862 tmpRef = SvRV(sv);
10863 if (SvFLAGS(tmpRef) & (SVs_OBJECT|SVf_READONLY|SVf_PROTECT)) {
10864 if (SvREADONLY(tmpRef))
10865 Perl_croak_no_modify();
10866 if (SvTYPE(tmpRef) == SVt_PVOBJ)
10867 Perl_croak(aTHX_ "Can't bless an object reference");
10868 if (SvOBJECT(tmpRef)) {
10869 oldstash = SvSTASH(tmpRef);
10870 }
10871 }
10872 SvOBJECT_on(tmpRef);
10873 SvUPGRADE(tmpRef, SVt_PVMG);
10874 SvSTASH_set(tmpRef, MUTABLE_HV(SvREFCNT_inc_simple(stash)));
10875 SvREFCNT_dec(oldstash);
10876
10877 if(SvSMAGICAL(tmpRef))
10878 if(mg_find(tmpRef, PERL_MAGIC_ext) || mg_find(tmpRef, PERL_MAGIC_uvar))
10879 mg_set(tmpRef);
10880
10881
10882
10883 return sv;
10884 }
10885
10886 /* Downgrades a PVGV to a PVMG. If it's actually a PVLV, we leave the type
10887 * as it is after unglobbing it.
10888 */
10889
10890 PERL_STATIC_INLINE void
10891 S_sv_unglob(pTHX_ SV *const sv, U32 flags)
10892 {
10893 void *xpvmg;
10894 HV *stash;
10895 SV * const temp = flags & SV_COW_DROP_PV ? NULL : sv_newmortal();
10896
10897 PERL_ARGS_ASSERT_SV_UNGLOB;
10898
10899 assert(SvTYPE(sv) == SVt_PVGV || SvTYPE(sv) == SVt_PVLV);
10900 SvFAKE_off(sv);
10901 if (!(flags & SV_COW_DROP_PV))
10902 gv_efullname3(temp, MUTABLE_GV(sv), "*");
10903
10904 SvREFCNT_inc_simple_void_NN(sv_2mortal(sv));
10905 if (GvGP(sv)) {
10906 if(GvCVu((const GV *)sv) && (stash = GvSTASH(MUTABLE_GV(sv)))
10907 && HvHasNAME(stash))
10908 mro_method_changed_in(stash);
10909 gp_free(MUTABLE_GV(sv));
10910 }
10911 if (GvSTASH(sv)) {
10912 sv_del_backref(MUTABLE_SV(GvSTASH(sv)), sv);
10913 GvSTASH(sv) = NULL;
10914 }
10915 GvMULTI_off(sv);
10916 if (GvNAME_HEK(sv)) {
10917 unshare_hek(GvNAME_HEK(sv));
10918 }
10919 isGV_with_GP_off(sv);
10920
10921 if(SvTYPE(sv) == SVt_PVGV) {
10922 /* need to keep SvANY(sv) in the right arena */
10923 xpvmg = new_XPVMG();
10924 StructCopy(SvANY(sv), xpvmg, XPVMG);
10925 del_body_by_type(SvANY(sv), SVt_PVGV);
10926 SvANY(sv) = xpvmg;
10927
10928 SvFLAGS(sv) &= ~SVTYPEMASK;
10929 SvFLAGS(sv) |= SVt_PVMG;
10930 }
10931
10932 /* Intentionally not calling any local SET magic, as this isn't so much a
10933 set operation as merely an internal storage change. */
10934 if (flags & SV_COW_DROP_PV) SvOK_off(sv);
10935 else sv_setsv_flags(sv, temp, 0);
10936
10937 if ((const GV *)sv == PL_last_in_gv)
10938 PL_last_in_gv = NULL;
10939 else if ((const GV *)sv == PL_statgv)
10940 PL_statgv = NULL;
10941 }
10942
10943 /*
10944 =for apidoc sv_unref_flags
10945
10946 Unsets the RV status of the SV, and decrements the reference count of
10947 whatever was being referenced by the RV. This can almost be thought of
10948 as a reversal of C<newSVrv>. The C<cflags> argument can contain
10949 C<SV_IMMEDIATE_UNREF> to force the reference count to be decremented
10950 (otherwise the decrementing is conditional on the reference count being
10951 different from one or the reference being a readonly SV).
10952 See C<L</SvROK_off>>.
10953
10954 =for apidoc Amnh||SV_IMMEDIATE_UNREF
10955
10956 =cut
10957 */
10958
10959 void
10960 Perl_sv_unref_flags(pTHX_ SV *const ref, const U32 flags)
10961 {
10962 SV* const target = SvRV(ref);
10963
10964 PERL_ARGS_ASSERT_SV_UNREF_FLAGS;
10965
10966 if (SvWEAKREF(ref)) {
10967 sv_del_backref(target, ref);
10968 SvWEAKREF_off(ref);
10969 SvRV_set(ref, NULL);
10970 return;
10971 }
10972 SvRV_set(ref, NULL);
10973 SvROK_off(ref);
10974 /* You can't have a || SvREADONLY(target) here, as $a = $$a, where $a was
10975 assigned to as BEGIN {$a = \"Foo"} will fail. */
10976 if (SvREFCNT(target) != 1 || (flags & SV_IMMEDIATE_UNREF))
10977 SvREFCNT_dec_NN(target);
10978 else /* XXX Hack, but hard to make $a=$a->[1] work otherwise */
10979 sv_2mortal(target); /* Schedule for freeing later */
10980 }
10981
10982 /*
10983 =for apidoc sv_untaint
10984
10985 Untaint an SV. Use C<SvTAINTED_off> instead.
10986
10987 =cut
10988 */
10989
10990 void
10991 Perl_sv_untaint(pTHX_ SV *const sv)
10992 {
10993 PERL_ARGS_ASSERT_SV_UNTAINT;
10994 PERL_UNUSED_CONTEXT;
10995
10996 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
10997 MAGIC * const mg = mg_find(sv, PERL_MAGIC_taint);
10998 if (mg)
10999 mg->mg_len &= ~1;
11000 }
11001 }
11002
11003 /*
11004 =for apidoc sv_tainted
11005
11006 Test an SV for taintedness. Use C<SvTAINTED> instead.
11007
11008 =cut
11009 */
11010
11011 bool
11012 Perl_sv_tainted(pTHX_ SV *const sv)
11013 {
11014 PERL_ARGS_ASSERT_SV_TAINTED;
11015 PERL_UNUSED_CONTEXT;
11016
11017 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
11018 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_taint);
11019 if (mg && (mg->mg_len & 1) )
11020 return TRUE;
11021 }
11022 return FALSE;
11023 }
11024
11025 #if defined(MULTIPLICITY)
11026
11027 /* pTHX_ magic can't cope with varargs, so this is a no-context
11028 * version of the main function, (which may itself be aliased to us).
11029 * Don't access this version directly.
11030 */
11031
11032 void
11033 Perl_sv_setpvf_nocontext(SV *const sv, const char *const pat, ...)
11034 {
11035 dTHX;
11036 va_list args;
11037
11038 PERL_ARGS_ASSERT_SV_SETPVF_NOCONTEXT;
11039
11040 va_start(args, pat);
11041 sv_vsetpvf(sv, pat, &args);
11042 va_end(args);
11043 }
11044
11045 /* pTHX_ magic can't cope with varargs, so this is a no-context
11046 * version of the main function, (which may itself be aliased to us).
11047 * Don't access this version directly.
11048 */
11049
11050 void
11051 Perl_sv_setpvf_mg_nocontext(SV *const sv, const char *const pat, ...)
11052 {
11053 dTHX;
11054 va_list args;
11055
11056 PERL_ARGS_ASSERT_SV_SETPVF_MG_NOCONTEXT;
11057
11058 va_start(args, pat);
11059 sv_vsetpvf_mg(sv, pat, &args);
11060 va_end(args);
11061 }
11062 #endif
11063
11064 /*
11065 =for apidoc sv_setpvf
11066 =for apidoc_item sv_setpvf_mg
11067 =for apidoc_item sv_setpvf_mg_nocontext
11068 =for apidoc_item sv_setpvf_nocontext
11069
11070 These work like C<L</sv_catpvf>> but copy the text into the SV instead of
11071 appending it.
11072
11073 The differences between these are:
11074
11075 C<sv_setpvf_mg> and C<sv_setpvf_mg_nocontext> perform 'set' magic; C<sv_setpvf>
11076 and C<sv_setpvf_nocontext> skip all magic.
11077
11078 C<sv_setpvf_nocontext> and C<sv_setpvf_mg_nocontext> do not take a thread
11079 context (C<aTHX>) parameter, so are used in situations where the caller
11080 doesn't already have the thread context.
11081
11082 B<The UTF-8 flag is not changed by these functions.>
11083
11084 =cut
11085 */
11086
11087 void
11088 Perl_sv_setpvf(pTHX_ SV *const sv, const char *const pat, ...)
11089 {
11090 va_list args;
11091
11092 PERL_ARGS_ASSERT_SV_SETPVF;
11093
11094 va_start(args, pat);
11095 sv_vsetpvf(sv, pat, &args);
11096 va_end(args);
11097 }
11098
11099 /*
11100 =for apidoc sv_vsetpvf
11101 =for apidoc_item sv_vsetpvf_mg
11102
11103 These work like C<L</sv_vcatpvf>> but copy the text into the SV instead of
11104 appending it.
11105
11106 They differ only in that C<sv_vsetpvf_mg> performs 'set' magic;
11107 C<sv_vsetpvf> skips all magic.
11108
11109 They are usually used via their frontends, C<L</sv_setpvf>> and
11110 C<L</sv_setpvf_mg>>.
11111
11112 B<The UTF-8 flag is not changed by these functions.>
11113
11114 =cut
11115 */
11116
11117 void
11118 Perl_sv_vsetpvf(pTHX_ SV *const sv, const char *const pat, va_list *const args)
11119 {
11120 PERL_ARGS_ASSERT_SV_VSETPVF;
11121
11122 sv_vsetpvfn(sv, pat, strlen(pat), args, NULL, 0, NULL);
11123 }
11124
11125 void
11126 Perl_sv_setpvf_mg(pTHX_ SV *const sv, const char *const pat, ...)
11127 {
11128 va_list args;
11129
11130 PERL_ARGS_ASSERT_SV_SETPVF_MG;
11131
11132 va_start(args, pat);
11133 sv_vsetpvf_mg(sv, pat, &args);
11134 va_end(args);
11135 }
11136
11137 void
11138 Perl_sv_vsetpvf_mg(pTHX_ SV *const sv, const char *const pat, va_list *const args)
11139 {
11140 PERL_ARGS_ASSERT_SV_VSETPVF_MG;
11141
11142 sv_vsetpvfn(sv, pat, strlen(pat), args, NULL, 0, NULL);
11143 SvSETMAGIC(sv);
11144 }
11145
11146 #if defined(MULTIPLICITY)
11147
11148 /* pTHX_ magic can't cope with varargs, so this is a no-context
11149 * version of the main function, (which may itself be aliased to us).
11150 * Don't access this version directly.
11151 */
11152
11153 void
11154 Perl_sv_catpvf_nocontext(SV *const sv, const char *const pat, ...)
11155 {
11156 dTHX;
11157 va_list args;
11158
11159 PERL_ARGS_ASSERT_SV_CATPVF_NOCONTEXT;
11160
11161 va_start(args, pat);
11162 sv_vcatpvfn_flags(sv, pat, strlen(pat), &args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC);
11163 va_end(args);
11164 }
11165
11166 /* pTHX_ magic can't cope with varargs, so this is a no-context
11167 * version of the main function, (which may itself be aliased to us).
11168 * Don't access this version directly.
11169 */
11170
11171 void
11172 Perl_sv_catpvf_mg_nocontext(SV *const sv, const char *const pat, ...)
11173 {
11174 dTHX;
11175 va_list args;
11176
11177 PERL_ARGS_ASSERT_SV_CATPVF_MG_NOCONTEXT;
11178
11179 va_start(args, pat);
11180 sv_vcatpvfn_flags(sv, pat, strlen(pat), &args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC);
11181 SvSETMAGIC(sv);
11182 va_end(args);
11183 }
11184 #endif
11185
11186 /*
11187 =for apidoc sv_catpvf
11188 =for apidoc_item sv_catpvf_mg
11189 =for apidoc_item sv_catpvf_mg_nocontext
11190 =for apidoc_item sv_catpvf_nocontext
11191
11192 These process their arguments like C<sprintf>, and append the formatted
11193 output to an SV. As with C<sv_vcatpvfn>, argument reordering is not supporte
11194 when called with a non-null C-style variable argument list.
11195
11196 If the appended data contains "wide" characters
11197 (including, but not limited to, SVs with a UTF-8 PV formatted with C<%s>,
11198 and characters >255 formatted with C<%c>), the original SV might get
11199 upgraded to UTF-8.
11200
11201 If the original SV was UTF-8, the pattern should be
11202 valid UTF-8; if the original SV was bytes, the pattern should be too.
11203
11204 All perform 'get' magic, but only C<sv_catpvf_mg> and C<sv_catpvf_mg_nocontext>
11205 perform 'set' magic.
11206
11207 C<sv_catpvf_nocontext> and C<sv_catpvf_mg_nocontext> do not take a thread
11208 context (C<aTHX>) parameter, so are used in situations where the caller
11209 doesn't already have the thread context.
11210
11211 =cut
11212 */
11213
11214 void
11215 Perl_sv_catpvf(pTHX_ SV *const sv, const char *const pat, ...)
11216 {
11217 va_list args;
11218
11219 PERL_ARGS_ASSERT_SV_CATPVF;
11220
11221 va_start(args, pat);
11222 sv_vcatpvfn_flags(sv, pat, strlen(pat), &args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC);
11223 va_end(args);
11224 }
11225
11226 /*
11227 =for apidoc sv_vcatpvf
11228 =for apidoc_item sv_vcatpvf_mg
11229
11230 These process their arguments like C<sv_vcatpvfn> called with a non-null
11231 C-style variable argument list, and append the formatted output to C<sv>.
11232
11233 They differ only in that C<sv_vcatpvf_mg> performs 'set' magic;
11234 C<sv_vcatpvf> skips 'set' magic.
11235
11236 Both perform 'get' magic.
11237
11238 They are usually accessed via their frontends C<L</sv_catpvf>> and
11239 C<L</sv_catpvf_mg>>.
11240
11241 =cut
11242 */
11243
11244 void
11245 Perl_sv_vcatpvf(pTHX_ SV *const sv, const char *const pat, va_list *const args)
11246 {
11247 PERL_ARGS_ASSERT_SV_VCATPVF;
11248
11249 sv_vcatpvfn_flags(sv, pat, strlen(pat), args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC);
11250 }
11251
11252 void
11253 Perl_sv_catpvf_mg(pTHX_ SV *const sv, const char *const pat, ...)
11254 {
11255 va_list args;
11256
11257 PERL_ARGS_ASSERT_SV_CATPVF_MG;
11258
11259 va_start(args, pat);
11260 sv_vcatpvfn_flags(sv, pat, strlen(pat), &args, NULL, 0, NULL, SV_GMAGIC|SV_SMAGIC);
11261 SvSETMAGIC(sv);
11262 va_end(args);
11263 }
11264
11265 void
11266 Perl_sv_vcatpvf_mg(pTHX_ SV *const sv, const char *const pat, va_list *const args)
11267 {
11268 PERL_ARGS_ASSERT_SV_VCATPVF_MG;
11269
11270 sv_vcatpvfn(sv, pat, strlen(pat), args, NULL, 0, NULL);
11271 SvSETMAGIC(sv);
11272 }
11273
11274 /*
11275 =for apidoc sv_vsetpvfn
11276
11277 Works like C<sv_vcatpvfn> but copies the text into the SV instead of
11278 appending it.
11279
11280 B<The UTF-8 flag is not changed by this function.>
11281
11282 Usually used via one of its frontends L</C<sv_vsetpvf>> and
11283 L</C<sv_vsetpvf_mg>>.
11284
11285 =cut
11286 */
11287
11288 void
11289 Perl_sv_vsetpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
11290 va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted)
11291 {
11292 PERL_ARGS_ASSERT_SV_VSETPVFN;
11293
11294 SvPVCLEAR(sv);
11295 sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, 0);
11296 }
11297
11298
11299 /* simplified inline Perl_sv_catpvn_nomg() when you know the SV's SvPOK */
11300
11301 PERL_STATIC_INLINE void
11302 S_sv_catpvn_simple(pTHX_ SV *const sv, const char* const buf, const STRLEN len)
11303 {
11304 STRLEN const need = len + SvCUR(sv) + 1;
11305 char *end;
11306
11307 /* can't wrap as both len and SvCUR() are allocated in
11308 * memory and together can't consume all the address space
11309 */
11310 assert(need > len);
11311
11312 assert(SvPOK(sv));
11313 SvGROW(sv, need);
11314 end = SvEND(sv);
11315 Copy(buf, end, len, char);
11316 end += len;
11317 *end = '\0';
11318 SvCUR_set(sv, need - 1);
11319 }
11320
11321
11322 /*
11323 * Warn of missing argument to sprintf. The value used in place of such
11324 * arguments should be &PL_sv_no; an undefined value would yield
11325 * inappropriate "use of uninit" warnings [perl #71000].
11326 */
11327 STATIC void
11328 S_warn_vcatpvfn_missing_argument(pTHX) {
11329 if (ckWARN(WARN_MISSING)) {
11330 Perl_warner(aTHX_ packWARN(WARN_MISSING), "Missing argument in %s",
11331 PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
11332 }
11333 }
11334
11335
11336 static void
11337 S_croak_overflow()
11338 {
11339 dTHX;
11340 Perl_croak(aTHX_ "Integer overflow in format string for %s",
11341 (PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn"));
11342 }
11343
11344
11345 /* Given an int i from the next arg (if args is true) or an sv from an arg
11346 * (if args is false), try to extract a STRLEN-ranged value from the arg,
11347 * with overflow checking.
11348 * Sets *neg to true if the value was negative (untouched otherwise.
11349 * Returns the absolute value.
11350 * As an extra margin of safety, it croaks if the returned value would
11351 * exceed the maximum value of a STRLEN / 4.
11352 */
11353
11354 static STRLEN
11355 S_sprintf_arg_num_val(pTHX_ va_list *const args, int i, SV *sv, bool *neg)
11356 {
11357 IV iv;
11358
11359 if (args) {
11360 iv = i;
11361 goto do_iv;
11362 }
11363
11364 if (!sv)
11365 return 0;
11366
11367 SvGETMAGIC(sv);
11368
11369 if (UNLIKELY(SvIsUV(sv))) {
11370 UV uv = SvUV_nomg(sv);
11371 if (uv > IV_MAX)
11372 S_croak_overflow();
11373 iv = uv;
11374 }
11375 else {
11376 iv = SvIV_nomg(sv);
11377 do_iv:
11378 if (iv < 0) {
11379 if (iv < -IV_MAX)
11380 S_croak_overflow();
11381 iv = -iv;
11382 *neg = TRUE;
11383 }
11384 }
11385
11386 if (iv > (IV)(((STRLEN)~0) / 4))
11387 S_croak_overflow();
11388
11389 return (STRLEN)iv;
11390 }
11391
11392 /* Read in and return a number. Updates *pattern to point to the char
11393 * following the number. Expects the first char to 1..9.
11394 * Croaks if the number exceeds 1/4 of the maximum value of STRLEN.
11395 * This is a belt-and-braces safety measure to complement any
11396 * overflow/wrap checks done in the main body of sv_vcatpvfn_flags.
11397 * It means that e.g. on a 32-bit system the width/precision can't be more
11398 * than 1G, which seems reasonable.
11399 */
11400
11401 STATIC STRLEN
11402 S_expect_number(pTHX_ const char **const pattern)
11403 {
11404 STRLEN var;
11405
11406 PERL_ARGS_ASSERT_EXPECT_NUMBER;
11407
11408 assert(inRANGE(**pattern, '1', '9'));
11409
11410 var = *(*pattern)++ - '0';
11411 while (isDIGIT(**pattern)) {
11412 /* if var * 10 + 9 would exceed 1/4 max strlen, croak */
11413 if (var > ((((STRLEN)~0) / 4 - 9) / 10))
11414 S_croak_overflow();
11415 var = var * 10 + (*(*pattern)++ - '0');
11416 }
11417 return var;
11418 }
11419
11420 /* Implement a fast "%.0f": given a pointer to the end of a buffer (caller
11421 * ensures it's big enough), back fill it with the rounded integer part of
11422 * nv. Returns ptr to start of string, and sets *len to its length.
11423 * Returns NULL if not convertible.
11424 */
11425
11426 STATIC char *
11427 S_F0convert(NV nv, char *const endbuf, STRLEN *const len)
11428 {
11429 const int neg = nv < 0;
11430 UV uv;
11431
11432 PERL_ARGS_ASSERT_F0CONVERT;
11433
11434 assert(!Perl_isinfnan(nv));
11435 if (neg)
11436 nv = -nv;
11437 if (nv != 0.0 && nv < (NV) UV_MAX) {
11438 char *p = endbuf;
11439 uv = (UV)nv;
11440 if (uv != nv) {
11441 nv += 0.5;
11442 uv = (UV)nv;
11443 if (uv & 1 && uv == nv)
11444 uv--; /* Round to even */
11445 }
11446 do {
11447 const unsigned dig = uv % 10;
11448 *--p = '0' + dig;
11449 } while (uv /= 10);
11450 if (neg)
11451 *--p = '-';
11452 *len = endbuf - p;
11453 return p;
11454 }
11455 return NULL;
11456 }
11457
11458
11459 /* XXX maybe_tainted is never assigned to, so the doc above is lying. */
11460
11461 void
11462 Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
11463 va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted)
11464 {
11465 PERL_ARGS_ASSERT_SV_VCATPVFN;
11466
11467 sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, SV_GMAGIC|SV_SMAGIC);
11468 }
11469
11470
11471 /* For the vcatpvfn code, we need a long double target in case
11472 * HAS_LONG_DOUBLE, even without USE_LONG_DOUBLE, so that we can printf
11473 * with long double formats, even without NV being long double. But we
11474 * call the target 'fv' instead of 'nv', since most of the time it is not
11475 * (most compilers these days recognize "long double", even if only as a
11476 * synonym for "double").
11477 */
11478 #if defined(HAS_LONG_DOUBLE) && LONG_DOUBLESIZE > DOUBLESIZE && \
11479 defined(PERL_PRIgldbl) && !defined(USE_QUADMATH)
11480 # define VCATPVFN_FV_GF PERL_PRIgldbl
11481 # if defined(__VMS) && defined(__ia64) && defined(__IEEE_FLOAT)
11482 /* Work around breakage in OTS$CVT_FLOAT_T_X */
11483 # define VCATPVFN_NV_TO_FV(nv,fv) \
11484 STMT_START { \
11485 double _dv = nv; \
11486 fv = Perl_isnan(_dv) ? LDBL_QNAN : _dv; \
11487 } STMT_END
11488 # else
11489 # define VCATPVFN_NV_TO_FV(nv,fv) (fv)=(nv)
11490 # endif
11491 typedef long double vcatpvfn_long_double_t;
11492 #else
11493 # define VCATPVFN_FV_GF NVgf
11494 # define VCATPVFN_NV_TO_FV(nv,fv) (fv)=(nv)
11495 typedef NV vcatpvfn_long_double_t;
11496 #endif
11497
11498 #ifdef LONGDOUBLE_DOUBLEDOUBLE
11499 /* The first double can be as large as 2**1023, or '1' x '0' x 1023.
11500 * The second double can be as small as 2**-1074, or '0' x 1073 . '1'.
11501 * The sum of them can be '1' . '0' x 2096 . '1', with implied radix point
11502 * after the first 1023 zero bits.
11503 *
11504 * XXX The 2098 is quite large (262.25 bytes) and therefore some sort
11505 * of dynamically growing buffer might be better, start at just 16 bytes
11506 * (for example) and grow only when necessary. Or maybe just by looking
11507 * at the exponents of the two doubles? */
11508 # define DOUBLEDOUBLE_MAXBITS 2098
11509 #endif
11510
11511 /* vhex will contain the values (0..15) of the hex digits ("nybbles"
11512 * of 4 bits); 1 for the implicit 1, and the mantissa bits, four bits
11513 * per xdigit. For the double-double case, this can be rather many.
11514 * The non-double-double-long-double overshoots since all bits of NV
11515 * are not mantissa bits, there are also exponent bits. */
11516 #ifdef LONGDOUBLE_DOUBLEDOUBLE
11517 # define VHEX_SIZE (3+DOUBLEDOUBLE_MAXBITS/4)
11518 #else
11519 # define VHEX_SIZE (1+(NVSIZE * 8)/4)
11520 #endif
11521
11522 /* If we do not have a known long double format, (including not using
11523 * long doubles, or long doubles being equal to doubles) then we will
11524 * fall back to the ldexp/frexp route, with which we can retrieve at
11525 * most as many bits as our widest unsigned integer type is. We try
11526 * to get a 64-bit unsigned integer even if we are not using a 64-bit UV.
11527 *
11528 * (If you want to test the case of UVSIZE == 4, NVSIZE == 8,
11529 * set the MANTISSATYPE to int and the MANTISSASIZE to 4.)
11530 */
11531 #if defined(HAS_QUAD) && defined(Uquad_t)
11532 # define MANTISSATYPE Uquad_t
11533 # define MANTISSASIZE 8
11534 #else
11535 # define MANTISSATYPE UV
11536 # define MANTISSASIZE UVSIZE
11537 #endif
11538
11539 #if defined(DOUBLE_LITTLE_ENDIAN) || defined(LONGDOUBLE_LITTLE_ENDIAN)
11540 # define HEXTRACT_LITTLE_ENDIAN
11541 #elif defined(DOUBLE_BIG_ENDIAN) || defined(LONGDOUBLE_BIG_ENDIAN)
11542 # define HEXTRACT_BIG_ENDIAN
11543 #else
11544 # define HEXTRACT_MIX_ENDIAN
11545 #endif
11546
11547 /* S_hextract() is a helper for S_format_hexfp, for extracting
11548 * the hexadecimal values (for %a/%A). The nv is the NV where the value
11549 * are being extracted from (either directly from the long double in-memory
11550 * presentation, or from the uquad computed via frexp+ldexp). frexp also
11551 * is used to update the exponent. The subnormal is set to true
11552 * for IEEE 754 subnormals/denormals (including the x86 80-bit format).
11553 * The vhex is the pointer to the beginning of the output buffer of VHEX_SIZE.
11554 *
11555 * The tricky part is that S_hextract() needs to be called twice:
11556 * the first time with vend as NULL, and the second time with vend as
11557 * the pointer returned by the first call. What happens is that on
11558 * the first round the output size is computed, and the intended
11559 * extraction sanity checked. On the second round the actual output
11560 * (the extraction of the hexadecimal values) takes place.
11561 * Sanity failures cause fatal failures during both rounds. */
11562 STATIC U8*
11563 S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal,
11564 U8* vhex, U8* vend)
11565 {
11566 U8* v = vhex;
11567 int ix;
11568 int ixmin = 0, ixmax = 0;
11569
11570 /* XXX Inf/NaN are not handled here, since it is
11571 * assumed they are to be output as "Inf" and "NaN". */
11572
11573 /* These macros are just to reduce typos, they have multiple
11574 * repetitions below, but usually only one (or sometimes two)
11575 * of them is really being used. */
11576 /* HEXTRACT_OUTPUT() extracts the high nybble first. */
11577 #define HEXTRACT_OUTPUT_HI(ix) (*v++ = nvp[ix] >> 4)
11578 #define HEXTRACT_OUTPUT_LO(ix) (*v++ = nvp[ix] & 0xF)
11579 #define HEXTRACT_OUTPUT(ix) \
11580 STMT_START { \
11581 HEXTRACT_OUTPUT_HI(ix); HEXTRACT_OUTPUT_LO(ix); \
11582 } STMT_END
11583 #define HEXTRACT_COUNT(ix, c) \
11584 STMT_START { \
11585 v += c; if (ix < ixmin) ixmin = ix; else if (ix > ixmax) ixmax = ix; \
11586 } STMT_END
11587 #define HEXTRACT_BYTE(ix) \
11588 STMT_START { \
11589 if (vend) HEXTRACT_OUTPUT(ix); else HEXTRACT_COUNT(ix, 2); \
11590 } STMT_END
11591 #define HEXTRACT_LO_NYBBLE(ix) \
11592 STMT_START { \
11593 if (vend) HEXTRACT_OUTPUT_LO(ix); else HEXTRACT_COUNT(ix, 1); \
11594 } STMT_END
11595 /* HEXTRACT_TOP_NYBBLE is just convenience disguise,
11596 * to make it look less odd when the top bits of a NV
11597 * are extracted using HEXTRACT_LO_NYBBLE: the highest
11598 * order bits can be in the "low nybble" of a byte. */
11599 #define HEXTRACT_TOP_NYBBLE(ix) HEXTRACT_LO_NYBBLE(ix)
11600 #define HEXTRACT_BYTES_LE(a, b) \
11601 for (ix = a; ix >= b; ix--) { HEXTRACT_BYTE(ix); }
11602 #define HEXTRACT_BYTES_BE(a, b) \
11603 for (ix = a; ix <= b; ix++) { HEXTRACT_BYTE(ix); }
11604 #define HEXTRACT_GET_SUBNORMAL(nv) *subnormal = Perl_fp_class_denorm(nv)
11605 #define HEXTRACT_IMPLICIT_BIT(nv) \
11606 STMT_START { \
11607 if (!*subnormal) { \
11608 if (vend) *v++ = ((nv) == 0.0) ? 0 : 1; else v++; \
11609 } \
11610 } STMT_END
11611
11612 /* Most formats do. Those which don't should undef this.
11613 *
11614 * But also note that IEEE 754 subnormals do not have it, or,
11615 * expressed alternatively, their implicit bit is zero. */
11616 #define HEXTRACT_HAS_IMPLICIT_BIT
11617
11618 /* Many formats do. Those which don't should undef this. */
11619 #define HEXTRACT_HAS_TOP_NYBBLE
11620
11621 /* HEXTRACTSIZE is the maximum number of xdigits. */
11622 #if defined(USE_LONG_DOUBLE) && defined(LONGDOUBLE_DOUBLEDOUBLE)
11623 # define HEXTRACTSIZE (2+DOUBLEDOUBLE_MAXBITS/4)
11624 #else
11625 # define HEXTRACTSIZE 2 * NVSIZE
11626 #endif
11627
11628 const U8* vmaxend = vhex + HEXTRACTSIZE;
11629
11630 assert(HEXTRACTSIZE <= VHEX_SIZE);
11631
11632 PERL_UNUSED_VAR(ix); /* might happen */
11633 (void)Perl_frexp(PERL_ABS(nv), exponent);
11634 *subnormal = FALSE;
11635 if (vend && (vend <= vhex || vend > vmaxend)) {
11636 /* diag_listed_as: Hexadecimal float: internal error (%s) */
11637 Perl_croak(aTHX_ "Hexadecimal float: internal error (entry)");
11638 }
11639 {
11640 /* First check if using long doubles. */
11641 #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE)
11642 # if LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_LITTLE_ENDIAN
11643 /* Used in e.g. VMS and HP-UX IA-64, e.g. -0.1L:
11644 * 9a 99 99 99 99 99 99 99 99 99 99 99 99 99 fb bf */
11645 /* The bytes 13..0 are the mantissa/fraction,
11646 * the 15,14 are the sign+exponent. */
11647 const U8* nvp = (const U8*)(&nv);
11648 HEXTRACT_GET_SUBNORMAL(nv);
11649 HEXTRACT_IMPLICIT_BIT(nv);
11650 # undef HEXTRACT_HAS_TOP_NYBBLE
11651 HEXTRACT_BYTES_LE(13, 0);
11652 # elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN
11653 /* Used in e.g. Solaris Sparc and HP-UX PA-RISC, e.g. -0.1L:
11654 * bf fb 99 99 99 99 99 99 99 99 99 99 99 99 99 9a */
11655 /* The bytes 2..15 are the mantissa/fraction,
11656 * the 0,1 are the sign+exponent. */
11657 const U8* nvp = (const U8*)(&nv);
11658 HEXTRACT_GET_SUBNORMAL(nv);
11659 HEXTRACT_IMPLICIT_BIT(nv);
11660 # undef HEXTRACT_HAS_TOP_NYBBLE
11661 HEXTRACT_BYTES_BE(2, 15);
11662 # elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
11663 /* x86 80-bit "extended precision", 64 bits of mantissa / fraction /
11664 * significand, 15 bits of exponent, 1 bit of sign. No implicit bit.
11665 * NVSIZE can be either 12 (ILP32, Solaris x86) or 16 (LP64, Linux
11666 * and OS X), meaning that 2 or 6 bytes are empty padding. */
11667 /* The bytes 0..1 are the sign+exponent,
11668 * the bytes 2..9 are the mantissa/fraction. */
11669 const U8* nvp = (const U8*)(&nv);
11670 # undef HEXTRACT_HAS_IMPLICIT_BIT
11671 # undef HEXTRACT_HAS_TOP_NYBBLE
11672 HEXTRACT_GET_SUBNORMAL(nv);
11673 HEXTRACT_BYTES_LE(7, 0);
11674 # elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN
11675 /* Does this format ever happen? (Wikipedia says the Motorola
11676 * 6888x math coprocessors used format _like_ this but padded
11677 * to 96 bits with 16 unused bits between the exponent and the
11678 * mantissa.) */
11679 const U8* nvp = (const U8*)(&nv);
11680 # undef HEXTRACT_HAS_IMPLICIT_BIT
11681 # undef HEXTRACT_HAS_TOP_NYBBLE
11682 HEXTRACT_GET_SUBNORMAL(nv);
11683 HEXTRACT_BYTES_BE(0, 7);
11684 # else
11685 # define HEXTRACT_FALLBACK
11686 /* Double-double format: two doubles next to each other.
11687 * The first double is the high-order one, exactly like
11688 * it would be for a "lone" double. The second double
11689 * is shifted down using the exponent so that that there
11690 * are no common bits. The tricky part is that the value
11691 * of the double-double is the SUM of the two doubles and
11692 * the second one can be also NEGATIVE.
11693 *
11694 * Because of this tricky construction the bytewise extraction we
11695 * use for the other long double formats doesn't work, we must
11696 * extract the values bit by bit.
11697 *
11698 * The little-endian double-double is used .. somewhere?
11699 *
11700 * The big endian double-double is used in e.g. PPC/Power (AIX)
11701 * and MIPS (SGI).
11702 *
11703 * The mantissa bits are in two separate stretches, e.g. for -0.1L:
11704 * 9a 99 99 99 99 99 59 bc 9a 99 99 99 99 99 b9 3f (LE)
11705 * 3f b9 99 99 99 99 99 9a bc 59 99 99 99 99 99 9a (BE)
11706 */
11707 # endif
11708 #else /* #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE) */
11709 /* Using normal doubles, not long doubles.
11710 *
11711 * We generate 4-bit xdigits (nybble/nibble) instead of 8-bit
11712 * bytes, since we might need to handle printf precision, and
11713 * also need to insert the radix. */
11714 # if NVSIZE == 8
11715 # ifdef HEXTRACT_LITTLE_ENDIAN
11716 /* 0 1 2 3 4 5 6 7 (MSB = 7, LSB = 0, 6+7 = exponent+sign) */
11717 const U8* nvp = (const U8*)(&nv);
11718 HEXTRACT_GET_SUBNORMAL(nv);
11719 HEXTRACT_IMPLICIT_BIT(nv);
11720 HEXTRACT_TOP_NYBBLE(6);
11721 HEXTRACT_BYTES_LE(5, 0);
11722 # elif defined(HEXTRACT_BIG_ENDIAN)
11723 /* 7 6 5 4 3 2 1 0 (MSB = 7, LSB = 0, 6+7 = exponent+sign) */
11724 const U8* nvp = (const U8*)(&nv);
11725 HEXTRACT_GET_SUBNORMAL(nv);
11726 HEXTRACT_IMPLICIT_BIT(nv);
11727 HEXTRACT_TOP_NYBBLE(1);
11728 HEXTRACT_BYTES_BE(2, 7);
11729 # elif DOUBLEKIND == DOUBLE_IS_IEEE_754_64_BIT_MIXED_ENDIAN_LE_BE
11730 /* 4 5 6 7 0 1 2 3 (MSB = 7, LSB = 0, 6:7 = nybble:exponent:sign) */
11731 const U8* nvp = (const U8*)(&nv);
11732 HEXTRACT_GET_SUBNORMAL(nv);
11733 HEXTRACT_IMPLICIT_BIT(nv);
11734 HEXTRACT_TOP_NYBBLE(2); /* 6 */
11735 HEXTRACT_BYTE(1); /* 5 */
11736 HEXTRACT_BYTE(0); /* 4 */
11737 HEXTRACT_BYTE(7); /* 3 */
11738 HEXTRACT_BYTE(6); /* 2 */
11739 HEXTRACT_BYTE(5); /* 1 */
11740 HEXTRACT_BYTE(4); /* 0 */
11741 # elif DOUBLEKIND == DOUBLE_IS_IEEE_754_64_BIT_MIXED_ENDIAN_BE_LE
11742 /* 3 2 1 0 7 6 5 4 (MSB = 7, LSB = 0, 7:6 = sign:exponent:nybble) */
11743 const U8* nvp = (const U8*)(&nv);
11744 HEXTRACT_GET_SUBNORMAL(nv);
11745 HEXTRACT_IMPLICIT_BIT(nv);
11746 HEXTRACT_TOP_NYBBLE(5); /* 6 */
11747 HEXTRACT_BYTE(6); /* 5 */
11748 HEXTRACT_BYTE(7); /* 4 */
11749 HEXTRACT_BYTE(0); /* 3 */
11750 HEXTRACT_BYTE(1); /* 2 */
11751 HEXTRACT_BYTE(2); /* 1 */
11752 HEXTRACT_BYTE(3); /* 0 */
11753 # else
11754 # define HEXTRACT_FALLBACK
11755 # endif
11756 # else
11757 # define HEXTRACT_FALLBACK
11758 # endif
11759 #endif /* #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE) #else */
11760
11761 #ifdef HEXTRACT_FALLBACK
11762 HEXTRACT_GET_SUBNORMAL(nv);
11763 # undef HEXTRACT_HAS_TOP_NYBBLE /* Meaningless, but consistent. */
11764 /* The fallback is used for the double-double format, and
11765 * for unknown long double formats, and for unknown double
11766 * formats, or in general unknown NV formats. */
11767 if (nv == (NV)0.0) {
11768 if (vend)
11769 *v++ = 0;
11770 else
11771 v++;
11772 *exponent = 0;
11773 }
11774 else {
11775 NV d = nv < 0 ? -nv : nv;
11776 NV e = (NV)1.0;
11777 U8 ha = 0x0; /* hexvalue accumulator */
11778 U8 hd = 0x8; /* hexvalue digit */
11779
11780 /* Shift d and e (and update exponent) so that e <= d < 2*e,
11781 * this is essentially manual frexp(). Multiplying by 0.5 and
11782 * doubling should be lossless in binary floating point. */
11783
11784 *exponent = 1;
11785
11786 while (e > d) {
11787 e *= (NV)0.5;
11788 (*exponent)--;
11789 }
11790 /* Now d >= e */
11791
11792 while (d >= e + e) {
11793 e += e;
11794 (*exponent)++;
11795 }
11796 /* Now e <= d < 2*e */
11797
11798 /* First extract the leading hexdigit (the implicit bit). */
11799 if (d >= e) {
11800 d -= e;
11801 if (vend)
11802 *v++ = 1;
11803 else
11804 v++;
11805 }
11806 else {
11807 if (vend)
11808 *v++ = 0;
11809 else
11810 v++;
11811 }
11812 e *= (NV)0.5;
11813
11814 /* Then extract the remaining hexdigits. */
11815 while (d > (NV)0.0) {
11816 if (d >= e) {
11817 ha |= hd;
11818 d -= e;
11819 }
11820 if (hd == 1) {
11821 /* Output or count in groups of four bits,
11822 * that is, when the hexdigit is down to one. */
11823 if (vend)
11824 *v++ = ha;
11825 else
11826 v++;
11827 /* Reset the hexvalue. */
11828 ha = 0x0;
11829 hd = 0x8;
11830 }
11831 else
11832 hd >>= 1;
11833 e *= (NV)0.5;
11834 }
11835
11836 /* Flush possible pending hexvalue. */
11837 if (ha) {
11838 if (vend)
11839 *v++ = ha;
11840 else
11841 v++;
11842 }
11843 }
11844 #endif
11845 }
11846 /* Croak for various reasons: if the output pointer escaped the
11847 * output buffer, if the extraction index escaped the extraction
11848 * buffer, or if the ending output pointer didn't match the
11849 * previously computed value. */
11850 if (v <= vhex || v - vhex >= VHEX_SIZE ||
11851 /* For double-double the ixmin and ixmax stay at zero,
11852 * which is convenient since the HEXTRACTSIZE is tricky
11853 * for double-double. */
11854 ixmin < 0 || ixmax >= NVSIZE ||
11855 (vend && v != vend)) {
11856 /* diag_listed_as: Hexadecimal float: internal error (%s) */
11857 Perl_croak(aTHX_ "Hexadecimal float: internal error (overflow)");
11858 }
11859 return v;
11860 }
11861
11862
11863 /* S_format_hexfp(): helper function for Perl_sv_vcatpvfn_flags().
11864 *
11865 * Processes the %a/%A hexadecimal floating-point format, since the
11866 * built-in snprintf()s which are used for most of the f/p formats, don't
11867 * universally handle %a/%A.
11868 * Populates buf of length bufsize, and returns the length of the created
11869 * string.
11870 * The rest of the args have the same meaning as the local vars of the
11871 * same name within Perl_sv_vcatpvfn_flags().
11872 *
11873 * The caller's determination of IN_LC(LC_NUMERIC), passed as in_lc_numeric,
11874 * is used to ensure we do the right thing when we need to access the locale's
11875 * numeric radix.
11876 *
11877 * It requires the caller to make buf large enough.
11878 */
11879
11880 static STRLEN
11881 S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c,
11882 const NV nv, const vcatpvfn_long_double_t fv,
11883 bool has_precis, STRLEN precis, STRLEN width,
11884 bool alt, char plus, bool left, bool fill, bool in_lc_numeric)
11885 {
11886 /* Hexadecimal floating point. */
11887 char* p = buf;
11888 U8 vhex[VHEX_SIZE];
11889 U8* v = vhex; /* working pointer to vhex */
11890 U8* vend; /* pointer to one beyond last digit of vhex */
11891 U8* vfnz = NULL; /* first non-zero */
11892 U8* vlnz = NULL; /* last non-zero */
11893 U8* v0 = NULL; /* first output */
11894 const bool lower = (c == 'a');
11895 /* At output the values of vhex (up to vend) will
11896 * be mapped through the xdig to get the actual
11897 * human-readable xdigits. */
11898 const char* xdig = PL_hexdigit;
11899 STRLEN zerotail = 0; /* how many extra zeros to append */
11900 int exponent = 0; /* exponent of the floating point input */
11901 bool hexradix = FALSE; /* should we output the radix */
11902 bool subnormal = FALSE; /* IEEE 754 subnormal/denormal */
11903 bool negative = FALSE;
11904 STRLEN elen;
11905
11906 /* XXX: NaN, Inf -- though they are printed as "NaN" and "Inf".
11907 *
11908 * For example with denormals, (assuming the vanilla
11909 * 64-bit double): the exponent is zero. 1xp-1074 is
11910 * the smallest denormal and the smallest double, it
11911 * could be output also as 0x0.0000000000001p-1022 to
11912 * match its internal structure. */
11913
11914 vend = S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, NULL);
11915 S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, vend);
11916
11917 #if NVSIZE > DOUBLESIZE
11918 # ifdef HEXTRACT_HAS_IMPLICIT_BIT
11919 /* In this case there is an implicit bit,
11920 * and therefore the exponent is shifted by one. */
11921 exponent--;
11922 # elif defined(NV_X86_80_BIT)
11923 if (subnormal) {
11924 /* The subnormals of the x86-80 have a base exponent of -16382,
11925 * (while the physical exponent bits are zero) but the frexp()
11926 * returned the scientific-style floating exponent. We want
11927 * to map the last one as:
11928 * -16831..-16384 -> -16382 (the last normal is 0x1p-16382)
11929 * -16835..-16388 -> -16384
11930 * since we want to keep the first hexdigit
11931 * as one of the [8421]. */
11932 exponent = -4 * ( (exponent + 1) / -4) - 2;
11933 } else {
11934 exponent -= 4;
11935 }
11936 /* TBD: other non-implicit-bit platforms than the x86-80. */
11937 # endif
11938 #endif
11939
11940 negative = fv < 0 || Perl_signbit(nv);
11941 if (negative)
11942 *p++ = '-';
11943 else if (plus)
11944 *p++ = plus;
11945 *p++ = '0';
11946 if (lower) {
11947 *p++ = 'x';
11948 }
11949 else {
11950 *p++ = 'X';
11951 xdig += 16; /* Use uppercase hex. */
11952 }
11953
11954 /* Find the first non-zero xdigit. */
11955 for (v = vhex; v < vend; v++) {
11956 if (*v) {
11957 vfnz = v;
11958 break;
11959 }
11960 }
11961
11962 if (vfnz) {
11963 /* Find the last non-zero xdigit. */
11964 for (v = vend - 1; v >= vhex; v--) {
11965 if (*v) {
11966 vlnz = v;
11967 break;
11968 }
11969 }
11970
11971 #if NVSIZE == DOUBLESIZE
11972 if (fv != 0.0)
11973 exponent--;
11974 #endif
11975
11976 if (subnormal) {
11977 #ifndef NV_X86_80_BIT
11978 if (vfnz[0] > 1) {
11979 /* IEEE 754 subnormals (but not the x86 80-bit):
11980 * we want "normalize" the subnormal,
11981 * so we need to right shift the hex nybbles
11982 * so that the output of the subnormal starts
11983 * from the first true bit. (Another, equally
11984 * valid, policy would be to dump the subnormal
11985 * nybbles as-is, to display the "physical" layout.) */
11986 int i, n;
11987 U8 *vshr;
11988 /* Find the ceil(log2(v[0])) of
11989 * the top non-zero nybble. */
11990 for (i = vfnz[0], n = 0; i > 1; i >>= 1, n++) { }
11991 assert(n < 4);
11992 assert(vlnz);
11993 vlnz[1] = 0;
11994 for (vshr = vlnz; vshr >= vfnz; vshr--) {
11995 vshr[1] |= (vshr[0] & (0xF >> (4 - n))) << (4 - n);
11996 vshr[0] >>= n;
11997 }
11998 if (vlnz[1]) {
11999 vlnz++;
12000 }
12001 }
12002 #endif
12003 v0 = vfnz;
12004 } else {
12005 v0 = vhex;
12006 }
12007
12008 if (has_precis) {
12009 U8* ve = (subnormal ? vlnz + 1 : vend);
12010 SSize_t vn = ve - v0;
12011 assert(vn >= 1);
12012 if (precis < (Size_t)(vn - 1)) {
12013 bool overflow = FALSE;
12014 if (v0[precis + 1] < 0x8) {
12015 /* Round down, nothing to do. */
12016 } else if (v0[precis + 1] > 0x8) {
12017 /* Round up. */
12018 v0[precis]++;
12019 overflow = v0[precis] > 0xF;
12020 v0[precis] &= 0xF;
12021 } else { /* v0[precis] == 0x8 */
12022 /* Half-point: round towards the one
12023 * with the even least-significant digit:
12024 * 08 -> 0 88 -> 8
12025 * 18 -> 2 98 -> a
12026 * 28 -> 2 a8 -> a
12027 * 38 -> 4 b8 -> c
12028 * 48 -> 4 c8 -> c
12029 * 58 -> 6 d8 -> e
12030 * 68 -> 6 e8 -> e
12031 * 78 -> 8 f8 -> 10 */
12032 if ((v0[precis] & 0x1)) {
12033 v0[precis]++;
12034 }
12035 overflow = v0[precis] > 0xF;
12036 v0[precis] &= 0xF;
12037 }
12038
12039 if (overflow) {
12040 for (v = v0 + precis - 1; v >= v0; v--) {
12041 (*v)++;
12042 overflow = *v > 0xF;
12043 (*v) &= 0xF;
12044 if (!overflow) {
12045 break;
12046 }
12047 }
12048 if (v == v0 - 1 && overflow) {
12049 /* If the overflow goes all the
12050 * way to the front, we need to
12051 * insert 0x1 in front, and adjust
12052 * the exponent. */
12053 Move(v0, v0 + 1, vn - 1, char);
12054 *v0 = 0x1;
12055 exponent += 4;
12056 }
12057 }
12058
12059 /* The new effective "last non zero". */
12060 vlnz = v0 + precis;
12061 }
12062 else {
12063 zerotail =
12064 subnormal ? precis - vn + 1 :
12065 precis - (vlnz - vhex);
12066 }
12067 }
12068
12069 v = v0;
12070 *p++ = xdig[*v++];
12071
12072 /* If there are non-zero xdigits, the radix
12073 * is output after the first one. */
12074 if (vfnz < vlnz) {
12075 hexradix = TRUE;
12076 }
12077 }
12078 else {
12079 *p++ = '0';
12080 exponent = 0;
12081 zerotail = has_precis ? precis : 0;
12082 }
12083
12084 /* The radix is always output if precis, or if alt. */
12085 if ((has_precis && precis > 0) || alt) {
12086 hexradix = TRUE;
12087 }
12088
12089 if (hexradix) {
12090 #ifndef USE_LOCALE_NUMERIC
12091 PERL_UNUSED_ARG(in_lc_numeric);
12092
12093 *p++ = '.';
12094 #else
12095 if (in_lc_numeric) {
12096 STRLEN n;
12097 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(TRUE, {
12098 const char* r = SvPV(PL_numeric_radix_sv, n);
12099 Copy(r, p, n, char);
12100 });
12101 p += n;
12102 }
12103 else {
12104 *p++ = '.';
12105 }
12106 #endif
12107 }
12108
12109 if (vlnz) {
12110 while (v <= vlnz)
12111 *p++ = xdig[*v++];
12112 }
12113
12114 if (zerotail > 0) {
12115 while (zerotail--) {
12116 *p++ = '0';
12117 }
12118 }
12119
12120 elen = p - buf;
12121
12122 /* sanity checks */
12123 if (elen >= bufsize || width >= bufsize)
12124 /* diag_listed_as: Hexadecimal float: internal error (%s) */
12125 Perl_croak(aTHX_ "Hexadecimal float: internal error (overflow)");
12126
12127 elen += my_snprintf(p, bufsize - elen,
12128 "%c%+d", lower ? 'p' : 'P',
12129 exponent);
12130
12131 if (elen < width) {
12132 STRLEN gap = (STRLEN)(width - elen);
12133 if (left) {
12134 /* Pad the back with spaces. */
12135 memset(buf + elen, ' ', gap);
12136 }
12137 else if (fill) {
12138 /* Insert the zeros after the "0x" and the
12139 * the potential sign, but before the digits,
12140 * otherwise we end up with "0000xH.HHH...",
12141 * when we want "0x000H.HHH..." */
12142 STRLEN nzero = gap;
12143 char* zerox = buf + 2;
12144 STRLEN nmove = elen - 2;
12145 if (negative || plus) {
12146 zerox++;
12147 nmove--;
12148 }
12149 Move(zerox, zerox + nzero, nmove, char);
12150 memset(zerox, fill ? '0' : ' ', nzero);
12151 }
12152 else {
12153 /* Move it to the right. */
12154 Move(buf, buf + gap,
12155 elen, char);
12156 /* Pad the front with spaces. */
12157 memset(buf, ' ', gap);
12158 }
12159 elen = width;
12160 }
12161 return elen;
12162 }
12163
12164 /*
12165 =for apidoc sv_vcatpvfn
12166 =for apidoc_item sv_vcatpvfn_flags
12167
12168 These process their arguments like C<L<vsprintf(3)>> and append the formatted output
12169 to an SV. They use an array of SVs if the C-style variable argument list is
12170 missing (C<NULL>). Argument reordering (using format specifiers like C<%2$d> or
12171 C<%*2$d>) is supported only when using an array of SVs; using a C-style
12172 C<va_list> argument list with a format string that uses argument reordering
12173 will yield an exception.
12174
12175 When running with taint checks enabled, they indicate via C<maybe_tainted> if
12176 results are untrustworthy (often due to the use of locales).
12177
12178 They assume that C<pat> has the same utf8-ness as C<sv>. It's the caller's
12179 responsibility to ensure that this is so.
12180
12181 They differ in that C<sv_vcatpvfn_flags> has a C<flags> parameter in which you
12182 can set or clear the C<SV_GMAGIC> and/or S<SV_SMAGIC> flags, to specify which
12183 magic to handle or not handle; whereas plain C<sv_vcatpvfn> always specifies
12184 both 'get' and 'set' magic.
12185
12186 They are usually used via one of the frontends L</C<sv_vcatpvf>> and
12187 L</C<sv_vcatpvf_mg>>.
12188
12189 =cut
12190 */
12191
12192
12193 void
12194 Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
12195 va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted,
12196 const U32 flags)
12197 {
12198 const char *fmtstart; /* character following the current '%' */
12199 const char *q; /* current position within format */
12200 const char *patend;
12201 STRLEN origlen;
12202 Size_t svix = 0;
12203 static const char nullstr[] = "(null)";
12204 bool has_utf8 = DO_UTF8(sv); /* has the result utf8? */
12205 const bool pat_utf8 = has_utf8; /* the pattern is in utf8? */
12206 /* Times 4: a decimal digit takes more than 3 binary digits.
12207 * NV_DIG: mantissa takes that many decimal digits.
12208 * Plus 32: Playing safe. */
12209 char ebuf[IV_DIG * 4 + NV_DIG + 32];
12210 bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */
12211 #ifdef USE_LOCALE_NUMERIC
12212 bool have_in_lc_numeric = FALSE;
12213 #endif
12214 /* we never change this unless USE_LOCALE_NUMERIC */
12215 bool in_lc_numeric = FALSE;
12216 SV *tmp_sv = NULL;
12217
12218 PERL_ARGS_ASSERT_SV_VCATPVFN_FLAGS;
12219 PERL_UNUSED_ARG(maybe_tainted);
12220
12221 if (flags & SV_GMAGIC)
12222 SvGETMAGIC(sv);
12223
12224 /* no matter what, this is a string now */
12225 (void)SvPV_force_nomg(sv, origlen);
12226
12227 /* the code that scans for flags etc following a % relies on
12228 * a '\0' being present to avoid falling off the end. Ideally that
12229 * should be fixed */
12230 assert(pat[patlen] == '\0');
12231
12232
12233 /* Special-case "", "%s", "%-p" (SVf - see below) and "%.0f".
12234 * In each case, if there isn't the correct number of args, instead
12235 * fall through to the main code to handle the issuing of any
12236 * warnings etc.
12237 */
12238
12239 if (patlen == 0 && (args || sv_count == 0))
12240 return;
12241
12242 if (patlen <= 4 && pat[0] == '%' && (args || sv_count == 1)) {
12243
12244 /* "%s" */
12245 if (patlen == 2 && pat[1] == 's') {
12246 if (args) {
12247 const char * const s = va_arg(*args, char*);
12248 sv_catpv_nomg(sv, s ? s : nullstr);
12249 }
12250 else {
12251 /* we want get magic on the source but not the target.
12252 * sv_catsv can't do that, though */
12253 SvGETMAGIC(*svargs);
12254 sv_catsv_nomg(sv, *svargs);
12255 }
12256 return;
12257 }
12258
12259 /* "%-p" */
12260 if (args) {
12261 if (patlen == 3 && pat[1] == '-' && pat[2] == 'p') {
12262 SV *asv = MUTABLE_SV(va_arg(*args, void*));
12263 sv_catsv_nomg(sv, asv);
12264 return;
12265 }
12266 }
12267 #if !defined(USE_LONG_DOUBLE) && !defined(USE_QUADMATH)
12268 /* special-case "%.0f" */
12269 else if ( patlen == 4
12270 && pat[1] == '.' && pat[2] == '0' && pat[3] == 'f')
12271 {
12272 const NV nv = SvNV(*svargs);
12273 if (LIKELY(!Perl_isinfnan(nv))) {
12274 STRLEN l;
12275 char *p;
12276
12277 if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) {
12278 sv_catpvn_nomg(sv, p, l);
12279 return;
12280 }
12281 }
12282 }
12283 #endif /* !USE_LONG_DOUBLE */
12284 }
12285
12286
12287 patend = (char*)pat + patlen;
12288 for (fmtstart = pat; fmtstart < patend; fmtstart = q) {
12289 char intsize = 0; /* size qualifier in "%hi..." etc */
12290 bool alt = FALSE; /* has "%#..." */
12291 bool left = FALSE; /* has "%-..." */
12292 bool fill = FALSE; /* has "%0..." */
12293 char plus = 0; /* has "%+..." */
12294 STRLEN width = 0; /* value of "%NNN..." */
12295 bool has_precis = FALSE; /* has "%.NNN..." */
12296 STRLEN precis = 0; /* value of "%.NNN..." */
12297 int base = 0; /* base to print in, e.g. 8 for %o */
12298 UV uv = 0; /* the value to print of int-ish args */
12299
12300 bool vectorize = FALSE; /* has "%v..." */
12301 bool vec_utf8 = FALSE; /* SvUTF8(vec arg) */
12302 const U8 *vecstr = NULL; /* SvPVX(vec arg) */
12303 STRLEN veclen = 0; /* SvCUR(vec arg) */
12304 const char *dotstr = NULL; /* separator string for %v */
12305 STRLEN dotstrlen; /* length of separator string for %v */
12306
12307 Size_t efix = 0; /* explicit format parameter index */
12308 const Size_t osvix = svix; /* original index in case of bad fmt */
12309
12310 SV *argsv = NULL;
12311 bool is_utf8 = FALSE; /* is this item utf8? */
12312 bool arg_missing = FALSE; /* give "Missing argument" warning */
12313 char esignbuf[4]; /* holds sign prefix, e.g. "-0x" */
12314 STRLEN esignlen = 0; /* length of e.g. "-0x" */
12315 STRLEN zeros = 0; /* how many '0' to prepend */
12316
12317 const char *eptr = NULL; /* the address of the element string */
12318 STRLEN elen = 0; /* the length of the element string */
12319
12320 char c; /* the actual format ('d', s' etc) */
12321
12322 bool escape_it = FALSE; /* if this is a string should we quote and escape it? */
12323
12324
12325 /* echo everything up to the next format specification */
12326 for (q = fmtstart; q < patend && *q != '%'; ++q)
12327 {};
12328
12329 if (q > fmtstart) {
12330 if (has_utf8 && !pat_utf8) {
12331 /* upgrade and copy the bytes of fmtstart..q-1 to utf8 on
12332 * the fly */
12333 const char *p;
12334 char *dst;
12335 STRLEN need = SvCUR(sv) + (q - fmtstart) + 1;
12336
12337 for (p = fmtstart; p < q; p++)
12338 if (!NATIVE_BYTE_IS_INVARIANT(*p))
12339 need++;
12340 SvGROW(sv, need);
12341
12342 dst = SvEND(sv);
12343 for (p = fmtstart; p < q; p++)
12344 append_utf8_from_native_byte((U8)*p, (U8**)&dst);
12345 *dst = '\0';
12346 SvCUR_set(sv, need - 1);
12347 }
12348 else
12349 S_sv_catpvn_simple(aTHX_ sv, fmtstart, q - fmtstart);
12350 }
12351 if (q++ >= patend)
12352 break;
12353
12354 fmtstart = q; /* fmtstart is char following the '%' */
12355
12356 /*
12357 We allow format specification elements in this order:
12358 \d+\$ explicit format parameter index
12359 [-+ 0#]+ flags
12360 v|\*(\d+\$)?v vector with optional (optionally specified) arg
12361 0 flag (as above): repeated to allow "v02"
12362 \d+|\*(\d+\$)? width using optional (optionally specified) arg
12363 \.(\d*|\*(\d+\$)?) precision using optional (optionally specified) arg
12364 [hlqLV] size
12365 [%bcdefginopsuxDFOUX] format (mandatory)
12366 */
12367
12368 if (inRANGE(*q, '1', '9')) {
12369 width = expect_number(&q);
12370 if (*q == '$') {
12371 if (args)
12372 Perl_croak_nocontext(
12373 "Cannot yet reorder sv_vcatpvfn() arguments from va_list");
12374 ++q;
12375 efix = (Size_t)width;
12376 width = 0;
12377 no_redundant_warning = TRUE;
12378 } else {
12379 goto gotwidth;
12380 }
12381 }
12382
12383 /* FLAGS */
12384
12385 while (*q) {
12386 switch (*q) {
12387 case ' ':
12388 case '+':
12389 if (plus == '+' && *q == ' ') /* '+' over ' ' */
12390 q++;
12391 else
12392 plus = *q++;
12393 continue;
12394
12395 case '-':
12396 left = TRUE;
12397 q++;
12398 continue;
12399
12400 case '0':
12401 fill = TRUE;
12402 q++;
12403 continue;
12404
12405 case '#':
12406 alt = TRUE;
12407 q++;
12408 continue;
12409
12410 default:
12411 break;
12412 }
12413 break;
12414 }
12415
12416 /* at this point we can expect one of:
12417 *
12418 * 123 an explicit width
12419 * * width taken from next arg
12420 * *12$ width taken from 12th arg
12421 * or no width
12422 *
12423 * But any width specification may be preceded by a v, in one of its
12424 * forms:
12425 * v
12426 * *v
12427 * *12$v
12428 * So an asterisk may be either a width specifier or a vector
12429 * separator arg specifier, and we don't know which initially
12430 */
12431
12432 tryasterisk:
12433 if (*q == '*') {
12434 STRLEN ix; /* explicit width/vector separator index */
12435 q++;
12436 if (inRANGE(*q, '1', '9')) {
12437 ix = expect_number(&q);
12438 if (*q++ == '$') {
12439 if (args)
12440 Perl_croak_nocontext(
12441 "Cannot yet reorder sv_vcatpvfn() arguments from va_list");
12442 no_redundant_warning = TRUE;
12443 } else
12444 goto unknown;
12445 }
12446 else
12447 ix = 0;
12448
12449 if (*q == 'v') {
12450 SV *vecsv;
12451 /* The asterisk was for *v, *NNN$v: vectorizing, but not
12452 * with the default "." */
12453 q++;
12454 if (vectorize)
12455 goto unknown;
12456 if (args)
12457 vecsv = va_arg(*args, SV*);
12458 else {
12459 ix = ix ? ix - 1 : svix++;
12460 vecsv = ix < sv_count ? svargs[ix]
12461 : (arg_missing = TRUE, &PL_sv_no);
12462 }
12463 dotstr = SvPV_const(vecsv, dotstrlen);
12464 /* Keep the DO_UTF8 test *after* the SvPV call, else things go
12465 bad with tied or overloaded values that return UTF8. */
12466 if (DO_UTF8(vecsv))
12467 is_utf8 = TRUE;
12468 else if (has_utf8) {
12469 vecsv = sv_mortalcopy(vecsv);
12470 sv_utf8_upgrade(vecsv);
12471 dotstr = SvPV_const(vecsv, dotstrlen);
12472 is_utf8 = TRUE;
12473 }
12474 vectorize = TRUE;
12475 goto tryasterisk;
12476 }
12477
12478 /* the asterisk specified a width */
12479 {
12480 int i = 0;
12481 SV *width_sv = NULL;
12482 if (args)
12483 i = va_arg(*args, int);
12484 else {
12485 ix = ix ? ix - 1 : svix++;
12486 width_sv = (ix < sv_count) ? svargs[ix]
12487 : (arg_missing = TRUE, (SV*)NULL);
12488 }
12489 width = S_sprintf_arg_num_val(aTHX_ args, i, width_sv, &left);
12490 }
12491 }
12492 else if (*q == 'v') {
12493 q++;
12494 if (vectorize)
12495 goto unknown;
12496 vectorize = TRUE;
12497 dotstr = ".";
12498 dotstrlen = 1;
12499 goto tryasterisk;
12500
12501 }
12502 else {
12503 /* explicit width? */
12504 if(*q == '0') {
12505 fill = TRUE;
12506 q++;
12507 }
12508 if (inRANGE(*q, '1', '9'))
12509 width = expect_number(&q);
12510 }
12511
12512 gotwidth:
12513
12514 /* PRECISION */
12515
12516 if (*q == '.') {
12517 q++;
12518 if (*q == '*') {
12519 STRLEN ix; /* explicit precision index */
12520 q++;
12521 if (inRANGE(*q, '1', '9')) {
12522 ix = expect_number(&q);
12523 if (*q++ == '$') {
12524 if (args)
12525 Perl_croak_nocontext(
12526 "Cannot yet reorder sv_vcatpvfn() arguments from va_list");
12527 no_redundant_warning = TRUE;
12528 } else
12529 goto unknown;
12530 }
12531 else
12532 ix = 0;
12533
12534 {
12535 int i = 0;
12536 SV *width_sv = NULL;
12537 bool neg = FALSE;
12538
12539 if (args)
12540 i = va_arg(*args, int);
12541 else {
12542 ix = ix ? ix - 1 : svix++;
12543 width_sv = (ix < sv_count) ? svargs[ix]
12544 : (arg_missing = TRUE, (SV*)NULL);
12545 }
12546 precis = S_sprintf_arg_num_val(aTHX_ args, i, width_sv, &neg);
12547 has_precis = !neg;
12548 /* ignore negative precision */
12549 if (!has_precis)
12550 precis = 0;
12551 }
12552 }
12553 else {
12554 /* although it doesn't seem documented, this code has long
12555 * behaved so that:
12556 * no digits following the '.' is treated like '.0'
12557 * the number may be preceded by any number of zeroes,
12558 * e.g. "%.0001f", which is the same as "%.1f"
12559 * so I've kept that behaviour. DAPM May 2017
12560 */
12561 while (*q == '0')
12562 q++;
12563 precis = inRANGE(*q, '1', '9') ? expect_number(&q) : 0;
12564 has_precis = TRUE;
12565 }
12566 }
12567
12568 /* SIZE */
12569
12570 switch (*q) {
12571 #ifdef WIN32
12572 case 'I': /* Ix, I32x, and I64x */
12573 # ifdef USE_64_BIT_INT
12574 if (q[1] == '6' && q[2] == '4') {
12575 q += 3;
12576 intsize = 'q';
12577 break;
12578 }
12579 # endif
12580 if (q[1] == '3' && q[2] == '2') {
12581 q += 3;
12582 break;
12583 }
12584 # ifdef USE_64_BIT_INT
12585 intsize = 'q';
12586 # endif
12587 q++;
12588 break;
12589 #endif
12590 #if (IVSIZE >= 8 || defined(HAS_LONG_DOUBLE)) || \
12591 (IVSIZE == 4 && !defined(HAS_LONG_DOUBLE))
12592 case 'L': /* Ld */
12593 /* FALLTHROUGH */
12594 # if IVSIZE >= 8
12595 case 'q': /* qd */
12596 # endif
12597 intsize = 'q';
12598 q++;
12599 break;
12600 #endif
12601 case 'l':
12602 ++q;
12603 #if (IVSIZE >= 8 || defined(HAS_LONG_DOUBLE)) || \
12604 (IVSIZE == 4 && !defined(HAS_LONG_DOUBLE))
12605 if (*q == 'l') { /* lld, llf */
12606 intsize = 'q';
12607 ++q;
12608 }
12609 else
12610 #endif
12611 intsize = 'l';
12612 break;
12613 case 'h':
12614 if (*++q == 'h') { /* hhd, hhu */
12615 intsize = 'c';
12616 ++q;
12617 }
12618 else
12619 intsize = 'h';
12620 break;
12621 #ifdef USE_QUADMATH
12622 case 'Q':
12623 #endif
12624 case 'V':
12625 case 'z':
12626 case 't':
12627 case 'j':
12628 intsize = *q++;
12629 break;
12630 }
12631
12632 /* CONVERSION */
12633
12634 c = *q++; /* c now holds the conversion type */
12635
12636 /* '%' doesn't have an arg, so skip arg processing */
12637 if (c == '%') {
12638 eptr = q - 1;
12639 elen = 1;
12640 if (vectorize)
12641 goto unknown;
12642 goto string;
12643 }
12644
12645 if (vectorize && !memCHRs("BbDdiOouUXx", c))
12646 goto unknown;
12647
12648 /* get next arg (individual branches do their own va_arg()
12649 * handling for the args case) */
12650
12651 if (!args) {
12652 efix = efix ? efix - 1 : svix++;
12653 argsv = efix < sv_count ? svargs[efix]
12654 : (arg_missing = TRUE, &PL_sv_no);
12655 }
12656
12657
12658 switch (c) {
12659
12660 /* STRINGS */
12661
12662 case 's':
12663 if (args) {
12664 eptr = va_arg(*args, char*);
12665 if (eptr)
12666 if (has_precis)
12667 elen = my_strnlen(eptr, precis);
12668 else
12669 elen = strlen(eptr);
12670 else {
12671 eptr = (char *)nullstr;
12672 elen = sizeof nullstr - 1;
12673 }
12674 }
12675 else {
12676 eptr = SvPV_const(argsv, elen);
12677 if (DO_UTF8(argsv)) {
12678 STRLEN old_precis = precis;
12679 if (has_precis && precis < elen) {
12680 STRLEN ulen = sv_or_pv_len_utf8(argsv, eptr, elen);
12681 STRLEN p = precis > ulen ? ulen : precis;
12682 precis = sv_or_pv_pos_u2b(argsv, eptr, p, 0);
12683 /* sticks at end */
12684 }
12685 if (width) { /* fudge width (can't fudge elen) */
12686 if (has_precis && precis < elen)
12687 width += precis - old_precis;
12688 else
12689 width +=
12690 elen - sv_or_pv_len_utf8(argsv,eptr,elen);
12691 }
12692 is_utf8 = TRUE;
12693 }
12694 }
12695
12696 string:
12697 if (escape_it) {
12698 U32 flags = PERL_PV_PRETTY_QUOTEDPREFIX;
12699 if (is_utf8)
12700 flags |= PERL_PV_ESCAPE_UNI;
12701
12702 if (!tmp_sv) {
12703 /* "blah"... where blah might be made up
12704 * of characters like \x{1234} */
12705 tmp_sv = newSV(1 + (PERL_QUOTEDPREFIX_LEN * 8) + 1 + 3);
12706 sv_2mortal(tmp_sv);
12707 }
12708 pv_pretty(tmp_sv, eptr, elen, PERL_QUOTEDPREFIX_LEN,
12709 NULL, NULL, flags);
12710 eptr = SvPV_const(tmp_sv, elen);
12711 }
12712 if (has_precis && precis < elen)
12713 elen = precis;
12714 break;
12715
12716 /* INTEGERS */
12717
12718 case 'p':
12719
12720 /* BEGIN NOTE
12721 *
12722 * We want to extend the C level sprintf format API with
12723 * custom formats for specific types (eg SV*) and behavior.
12724 * However some C compilers are "sprintf aware" and will
12725 * throw compile time exceptions when an illegal sprintf is
12726 * encountered, so we can't just add new format letters.
12727 *
12728 * However it turns out the length argument to the %p format
12729 * is more or less useless (the size of a pointer does not
12730 * change over time) and is not really used in the C level
12731 * code. Accordingly we can map our special behavior to
12732 * specific "length" options to the %p format. We hide these
12733 * mappings behind defines anyway, so nobody needs to know
12734 * that HEKf is actually %2p. This keeps the C compiler
12735 * happy while allowing us to add new formats.
12736 *
12737 * Note the existing logic for which number is used for what
12738 * is torturous. All negative values are used for SVf, and
12739 * non-negative values have arbitrary meanings with no
12740 * structure to them. This may change in the future.
12741 *
12742 * NEVER use the raw %p values directly. Always use the define
12743 * as the underlying mapping may change in the future.
12744 *
12745 * END NOTE
12746 *
12747 * %p extensions:
12748 *
12749 * "%...p" is normally treated like "%...x", except that the
12750 * number to print is the SV's address (or a pointer address
12751 * for C-ish sprintf).
12752 *
12753 * However, the C-ish sprintf variant allows a few special
12754 * extensions. These are currently:
12755 *
12756 * %-p (SVf) Like %s, but gets the string from an SV*
12757 * arg rather than a char* arg. Use C<SVfARG()>
12758 * to set up the argument properly.
12759 * (This was previously %_).
12760 *
12761 * %-<num>p Ditto but like %.<num>s (i.e. num is max
12762 * width), there is no escaped and quoted version
12763 * of this.
12764 *
12765 * %1p (PVf_QUOTEDPREFIX). Like raw %s, but it is escaped
12766 * and quoted.
12767 *
12768 * %5p (SVf_QUOTEDPREFIX) Like SVf, but length restricted,
12769 * escaped and quoted with pv_pretty. Intended
12770 * for error messages.
12771 *
12772 * %2p (HEKf) Like %s, but using the key string in a HEK
12773 * %7p (HEKf_QUOTEDPREFIX) ... but escaped and quoted.
12774 *
12775 * %3p (HEKf256) Ditto but like %.256s
12776 * %8p (HEKf256_QUOTEDPREFIX) ... but escaped and quoted
12777 *
12778 * %d%lu%4p (UTF8f) A utf8 string. Consumes 3 args:
12779 * (cBOOL(utf8), len, string_buf).
12780 * It's handled by the "case 'd'" branch
12781 * rather than here.
12782 * %d%lu%9p (UTF8f_QUOTEDPREFIX) .. but escaped and quoted.
12783 *
12784 * %6p (HvNAMEf) Like %s, but using the HvNAME() and HvNAMELEN()
12785 * %10p (HvNAMEf_QUOTEDPREFIX) ... but escaped and quoted
12786 *
12787 * %<num>p where num is > 9: reserved for future
12788 * extensions. Warns, but then is treated as a
12789 * general %p (print hex address) format.
12790 *
12791 * NOTE: If you add a new magic %p value you will
12792 * need to update F<t/porting/diag.t> to be aware of it
12793 * on top of adding the various defines and etc. Do not
12794 * forget to add it to F<pod/perlguts.pod> as well.
12795 */
12796
12797 if ( args
12798 && !intsize
12799 && !fill
12800 && !plus
12801 && !has_precis
12802 /* not %*p or %*1$p - any width was explicit */
12803 && q[-2] != '*'
12804 && q[-2] != '$'
12805 ) {
12806 if (left || width == 5) { /* %-p (SVf), %-NNNp, %5p */
12807 if (left && width) {
12808 precis = width;
12809 has_precis = TRUE;
12810 } else if (width == 5) {
12811 escape_it = TRUE;
12812 }
12813 argsv = MUTABLE_SV(va_arg(*args, void*));
12814 eptr = SvPV_const(argsv, elen);
12815 if (DO_UTF8(argsv))
12816 is_utf8 = TRUE;
12817 width = 0;
12818 goto string;
12819 }
12820 else if (width == 2 || width == 3 ||
12821 width == 7 || width == 8)
12822 { /* HEKf, HEKf256, HEKf_QUOTEDPREFIX, HEKf256_QUOTEDPREFIX */
12823 HEK * const hek = va_arg(*args, HEK *);
12824 eptr = HEK_KEY(hek);
12825 elen = HEK_LEN(hek);
12826 if (HEK_UTF8(hek))
12827 is_utf8 = TRUE;
12828 if (width == 3) {
12829 precis = 256;
12830 has_precis = TRUE;
12831 }
12832 if (width > 5)
12833 escape_it = TRUE;
12834 width = 0;
12835 goto string;
12836 }
12837 else if (width == 1) {
12838 eptr = va_arg(*args,char *);
12839 elen = strlen(eptr);
12840 escape_it = TRUE;
12841 width = 0;
12842 goto string;
12843 }
12844 else if (width == 6 || width == 10) {
12845 HV *hv = va_arg(*args, HV *);
12846 eptr = HvNAME(hv);
12847 elen = HvNAMELEN(hv);
12848 if (HvNAMEUTF8(hv))
12849 is_utf8 = TRUE;
12850 if (width == 10)
12851 escape_it = TRUE;
12852 width = 0;
12853 goto string;
12854 }
12855 else if (width) {
12856 /* note width=4 or width=9 is handled under %d */
12857 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
12858 "internal %%<num>p might conflict with future printf extensions");
12859 }
12860 }
12861
12862 /* treat as normal %...p */
12863
12864 uv = PTR2UV(args ? va_arg(*args, void*) : argsv);
12865 base = 16;
12866 c = 'x'; /* in case the format string contains '#' */
12867 goto do_integer;
12868
12869 case 'c':
12870 /* Ignore any size specifiers, since they're not documented as
12871 * being allowed for %c (ideally we should warn on e.g. '%hc').
12872 * Setting a default intsize, along with a positive
12873 * (which signals unsigned) base, causes, for C-ish use, the
12874 * va_arg to be interpreted as an unsigned int, when it's
12875 * actually signed, which will convert -ve values to high +ve
12876 * values. Note that unlike the libc %c, values > 255 will
12877 * convert to high unicode points rather than being truncated
12878 * to 8 bits. For perlish use, it will do SvUV(argsv), which
12879 * will again convert -ve args to high -ve values.
12880 */
12881 intsize = 0;
12882 base = 1; /* special value that indicates we're doing a 'c' */
12883 goto get_int_arg_val;
12884
12885 case 'D':
12886 #ifdef IV_IS_QUAD
12887 intsize = 'q';
12888 #else
12889 intsize = 'l';
12890 #endif
12891 base = -10;
12892 goto get_int_arg_val;
12893
12894 case 'd':
12895 /* probably just a plain %d, but it might be the start of the
12896 * special UTF8f format, which usually looks something like
12897 * "%d%lu%4p" (the lu may vary by platform) or
12898 * "%d%lu%9p" for an escaped version.
12899 */
12900 assert((UTF8f)[0] == 'd');
12901 assert((UTF8f)[1] == '%');
12902
12903 if ( args /* UTF8f only valid for C-ish sprintf */
12904 && q == fmtstart + 1 /* plain %d, not %....d */
12905 && patend >= fmtstart + sizeof(UTF8f) - 1 /* long enough */
12906 && *q == '%'
12907 && strnEQ(q + 1, (UTF8f) + 2, sizeof(UTF8f) - 5)
12908 && q[sizeof(UTF8f)-3] == 'p'
12909 && (q[sizeof(UTF8f)-4] == '4' ||
12910 q[sizeof(UTF8f)-4] == '9'))
12911 {
12912 /* The argument has already gone through cBOOL, so the cast
12913 is safe. */
12914 if (q[sizeof(UTF8f)-4] == '9')
12915 escape_it = TRUE;
12916 is_utf8 = (bool)va_arg(*args, int);
12917 elen = va_arg(*args, UV);
12918 /* if utf8 length is larger than 0x7ffff..., then it might
12919 * have been a signed value that wrapped */
12920 if (elen > ((~(STRLEN)0) >> 1)) {
12921 assert(0); /* in DEBUGGING build we want to crash */
12922 elen = 0; /* otherwise we want to treat this as an empty string */
12923 }
12924 eptr = va_arg(*args, char *);
12925 q += sizeof(UTF8f) - 2;
12926 goto string;
12927 }
12928
12929 /* FALLTHROUGH */
12930 case 'i':
12931 base = -10;
12932 goto get_int_arg_val;
12933
12934 case 'U':
12935 #ifdef IV_IS_QUAD
12936 intsize = 'q';
12937 #else
12938 intsize = 'l';
12939 #endif
12940 /* FALLTHROUGH */
12941 case 'u':
12942 base = 10;
12943 goto get_int_arg_val;
12944
12945 case 'B':
12946 case 'b':
12947 base = 2;
12948 goto get_int_arg_val;
12949
12950 case 'O':
12951 #ifdef IV_IS_QUAD
12952 intsize = 'q';
12953 #else
12954 intsize = 'l';
12955 #endif
12956 /* FALLTHROUGH */
12957 case 'o':
12958 base = 8;
12959 goto get_int_arg_val;
12960
12961 case 'X':
12962 case 'x':
12963 base = 16;
12964
12965 get_int_arg_val:
12966
12967 if (vectorize) {
12968 STRLEN ulen;
12969 SV *vecsv;
12970
12971 if (base < 0) {
12972 base = -base;
12973 if (plus)
12974 esignbuf[esignlen++] = plus;
12975 }
12976
12977 /* initialise the vector string to iterate over */
12978
12979 vecsv = args ? va_arg(*args, SV*) : argsv;
12980
12981 /* if this is a version object, we need to convert
12982 * back into v-string notation and then let the
12983 * vectorize happen normally
12984 */
12985 if (sv_isobject(vecsv) && sv_derived_from(vecsv, "version")) {
12986 if ( hv_existss(MUTABLE_HV(SvRV(vecsv)), "alpha") ) {
12987 Perl_ck_warner_d(aTHX_ packWARN(WARN_PRINTF),
12988 "vector argument not supported with alpha versions");
12989 vecsv = &PL_sv_no;
12990 }
12991 else {
12992 vecstr = (U8*)SvPV_const(vecsv,veclen);
12993 vecsv = sv_newmortal();
12994 scan_vstring((char *)vecstr, (char *)vecstr + veclen,
12995 vecsv);
12996 }
12997 }
12998 vecstr = (U8*)SvPV_const(vecsv, veclen);
12999 vec_utf8 = DO_UTF8(vecsv);
13000
13001 /* This is the re-entry point for when we're iterating
13002 * over the individual characters of a vector arg */
13003 vector:
13004 if (!veclen)
13005 goto done_valid_conversion;
13006 if (vec_utf8)
13007 uv = utf8n_to_uvchr(vecstr, veclen, &ulen,
13008 UTF8_ALLOW_ANYUV);
13009 else {
13010 uv = *vecstr;
13011 ulen = 1;
13012 }
13013 vecstr += ulen;
13014 veclen -= ulen;
13015 }
13016 else {
13017 /* test arg for inf/nan. This can trigger an unwanted
13018 * 'str' overload, so manually force 'num' overload first
13019 * if necessary */
13020 if (argsv) {
13021 SvGETMAGIC(argsv);
13022 if (UNLIKELY(SvAMAGIC(argsv)))
13023 argsv = sv_2num(argsv);
13024 if (UNLIKELY(isinfnansv(argsv)))
13025 goto handle_infnan_argsv;
13026 }
13027
13028 if (base < 0) {
13029 /* signed int type */
13030 IV iv;
13031 base = -base;
13032 if (args) {
13033 switch (intsize) {
13034 case 'c': iv = (char)va_arg(*args, int); break;
13035 case 'h': iv = (short)va_arg(*args, int); break;
13036 case 'l': iv = va_arg(*args, long); break;
13037 case 'V': iv = va_arg(*args, IV); break;
13038 case 'z': iv = va_arg(*args, SSize_t); break;
13039 #ifdef HAS_PTRDIFF_T
13040 case 't': iv = va_arg(*args, ptrdiff_t); break;
13041 #endif
13042 default: iv = va_arg(*args, int); break;
13043 case 'j': iv = (IV) va_arg(*args, PERL_INTMAX_T); break;
13044 case 'q':
13045 #if IVSIZE >= 8
13046 iv = va_arg(*args, Quad_t); break;
13047 #else
13048 goto unknown;
13049 #endif
13050 }
13051 }
13052 else {
13053 /* assign to tiv then cast to iv to work around
13054 * 2003 GCC cast bug (gnu.org bugzilla #13488) */
13055 IV tiv = SvIV_nomg(argsv);
13056 switch (intsize) {
13057 case 'c': iv = (char)tiv; break;
13058 case 'h': iv = (short)tiv; break;
13059 case 'l': iv = (long)tiv; break;
13060 case 'V':
13061 default: iv = tiv; break;
13062 case 'q':
13063 #if IVSIZE >= 8
13064 iv = (Quad_t)tiv; break;
13065 #else
13066 goto unknown;
13067 #endif
13068 }
13069 }
13070
13071 /* now convert iv to uv */
13072 if (iv >= 0) {
13073 uv = iv;
13074 if (plus)
13075 esignbuf[esignlen++] = plus;
13076 }
13077 else {
13078 /* Using 0- here to silence bogus warning from MS VC */
13079 uv = (UV) (0 - (UV) iv);
13080 esignbuf[esignlen++] = '-';
13081 }
13082 }
13083 else {
13084 /* unsigned int type */
13085 if (args) {
13086 switch (intsize) {
13087 case 'c': uv = (unsigned char)va_arg(*args, unsigned);
13088 break;
13089 case 'h': uv = (unsigned short)va_arg(*args, unsigned);
13090 break;
13091 case 'l': uv = va_arg(*args, unsigned long); break;
13092 case 'V': uv = va_arg(*args, UV); break;
13093 case 'z': uv = va_arg(*args, Size_t); break;
13094 #ifdef HAS_PTRDIFF_T
13095 /* will sign extend, but there is no
13096 * uptrdiff_t, so oh well */
13097 case 't': uv = va_arg(*args, ptrdiff_t); break;
13098 #endif
13099 case 'j': uv = (UV) va_arg(*args, PERL_UINTMAX_T); break;
13100 default: uv = va_arg(*args, unsigned); break;
13101 case 'q':
13102 #if IVSIZE >= 8
13103 uv = va_arg(*args, Uquad_t); break;
13104 #else
13105 goto unknown;
13106 #endif
13107 }
13108 }
13109 else {
13110 /* assign to tiv then cast to iv to work around
13111 * 2003 GCC cast bug (gnu.org bugzilla #13488) */
13112 UV tuv = SvUV_nomg(argsv);
13113 switch (intsize) {
13114 case 'c': uv = (unsigned char)tuv; break;
13115 case 'h': uv = (unsigned short)tuv; break;
13116 case 'l': uv = (unsigned long)tuv; break;
13117 case 'V':
13118 default: uv = tuv; break;
13119 case 'q':
13120 #if IVSIZE >= 8
13121 uv = (Uquad_t)tuv; break;
13122 #else
13123 goto unknown;
13124 #endif
13125 }
13126 }
13127 }
13128 }
13129
13130 do_integer:
13131 {
13132 char *ptr = ebuf + sizeof ebuf;
13133 unsigned dig;
13134 zeros = 0;
13135
13136 switch (base) {
13137 case 16:
13138 {
13139 const char * const p =
13140 (c == 'X') ? PL_hexdigit + 16 : PL_hexdigit;
13141
13142 do {
13143 dig = uv & 15;
13144 *--ptr = p[dig];
13145 } while (uv >>= 4);
13146 if (alt && *ptr != '0') {
13147 esignbuf[esignlen++] = '0';
13148 esignbuf[esignlen++] = c; /* 'x' or 'X' */
13149 }
13150 break;
13151 }
13152 case 8:
13153 do {
13154 dig = uv & 7;
13155 *--ptr = '0' + dig;
13156 } while (uv >>= 3);
13157 if (alt && *ptr != '0')
13158 *--ptr = '0';
13159 break;
13160 case 2:
13161 do {
13162 dig = uv & 1;
13163 *--ptr = '0' + dig;
13164 } while (uv >>= 1);
13165 if (alt && *ptr != '0') {
13166 esignbuf[esignlen++] = '0';
13167 esignbuf[esignlen++] = c; /* 'b' or 'B' */
13168 }
13169 break;
13170
13171 case 1:
13172 /* special-case: base 1 indicates a 'c' format:
13173 * we use the common code for extracting a uv,
13174 * but handle that value differently here than
13175 * all the other int types */
13176 if ((uv > 255 ||
13177 (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
13178 && !IN_BYTES)
13179 {
13180 STATIC_ASSERT_STMT(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
13181 eptr = ebuf;
13182 elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
13183 is_utf8 = TRUE;
13184 }
13185 else {
13186 eptr = ebuf;
13187 ebuf[0] = (char)uv;
13188 elen = 1;
13189 }
13190 goto string;
13191
13192 default: /* it had better be ten or less */
13193 do {
13194 dig = uv % base;
13195 *--ptr = '0' + dig;
13196 } while (uv /= base);
13197 break;
13198 }
13199 elen = (ebuf + sizeof ebuf) - ptr;
13200 eptr = ptr;
13201 if (has_precis) {
13202 if (precis > elen)
13203 zeros = precis - elen;
13204 else if (precis == 0 && elen == 1 && *eptr == '0'
13205 && !(base == 8 && alt)) /* "%#.0o" prints "0" */
13206 elen = 0;
13207
13208 /* a precision nullifies the 0 flag. */
13209 fill = FALSE;
13210 }
13211 }
13212 break;
13213
13214 /* FLOATING POINT */
13215
13216 case 'F':
13217 c = 'f'; /* maybe %F isn't supported here */
13218 /* FALLTHROUGH */
13219 case 'e': case 'E':
13220 case 'f':
13221 case 'g': case 'G':
13222 case 'a': case 'A':
13223
13224 {
13225 STRLEN float_need; /* what PL_efloatsize needs to become */
13226 bool hexfp; /* hexadecimal floating point? */
13227
13228 vcatpvfn_long_double_t fv;
13229 NV nv;
13230
13231 /* This is evil, but floating point is even more evil */
13232
13233 /* for SV-style calling, we can only get NV
13234 for C-style calling, we assume %f is double;
13235 for simplicity we allow any of %Lf, %llf, %qf for long double
13236 */
13237 switch (intsize) {
13238 #if defined(USE_QUADMATH)
13239 case 'Q':
13240 break;
13241 #endif
13242 case 'V':
13243 #if defined(USE_LONG_DOUBLE) || defined(USE_QUADMATH)
13244 intsize = 'q';
13245 #endif
13246 break;
13247 /* [perl #20339] - we should accept and ignore %lf rather than die */
13248 case 'l':
13249 /* FALLTHROUGH */
13250 default:
13251 #if defined(USE_LONG_DOUBLE) || defined(USE_QUADMATH)
13252 intsize = args ? 0 : 'q';
13253 #endif
13254 break;
13255 case 'q':
13256 #if defined(HAS_LONG_DOUBLE)
13257 break;
13258 #else
13259 /* FALLTHROUGH */
13260 #endif
13261 case 'c':
13262 case 'h':
13263 case 'z':
13264 case 't':
13265 case 'j':
13266 goto unknown;
13267 }
13268
13269 /* Now we need (long double) if intsize == 'q', else (double). */
13270 if (args) {
13271 /* Note: do not pull NVs off the va_list with va_arg()
13272 * (pull doubles instead) because if you have a build
13273 * with long doubles, you would always be pulling long
13274 * doubles, which would badly break anyone using only
13275 * doubles (i.e. the majority of builds). In other
13276 * words, you cannot mix doubles and long doubles.
13277 * The only case where you can pull off long doubles
13278 * is when the format specifier explicitly asks so with
13279 * e.g. "%Lg". */
13280 #ifdef USE_QUADMATH
13281 nv = intsize == 'Q' ? va_arg(*args, NV) :
13282 intsize == 'q' ? va_arg(*args, long double) :
13283 va_arg(*args, double);
13284 fv = nv;
13285 #elif LONG_DOUBLESIZE > DOUBLESIZE
13286 if (intsize == 'q') {
13287 fv = va_arg(*args, long double);
13288 nv = fv;
13289 } else {
13290 nv = va_arg(*args, double);
13291 VCATPVFN_NV_TO_FV(nv, fv);
13292 }
13293 #else
13294 nv = va_arg(*args, double);
13295 fv = nv;
13296 #endif
13297 }
13298 else
13299 {
13300 SvGETMAGIC(argsv);
13301 /* we jump here if an int-ish format encountered an
13302 * infinite/Nan argsv. After setting nv/fv, it falls
13303 * into the isinfnan block which follows */
13304 handle_infnan_argsv:
13305 nv = SvNV_nomg(argsv);
13306 VCATPVFN_NV_TO_FV(nv, fv);
13307 }
13308
13309 if (Perl_isinfnan(nv)) {
13310 if (c == 'c')
13311 Perl_croak(aTHX_ "Cannot printf %" NVgf " with '%c'",
13312 nv, (int)c);
13313
13314 elen = S_infnan_2pv(nv, ebuf, sizeof(ebuf), plus);
13315 assert(elen);
13316 eptr = ebuf;
13317 zeros = 0;
13318 esignlen = 0;
13319 dotstrlen = 0;
13320 break;
13321 }
13322
13323 /* special-case "%.0f" */
13324 if ( c == 'f'
13325 && !precis
13326 && has_precis
13327 && !(width || left || plus || alt)
13328 && !fill
13329 && intsize != 'q'
13330 && ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen)))
13331 )
13332 goto float_concat;
13333
13334 /* Determine the buffer size needed for the various
13335 * floating-point formats.
13336 *
13337 * The basic possibilities are:
13338 *
13339 * <---P--->
13340 * %f 1111111.123456789
13341 * %e 1.111111123e+06
13342 * %a 0x1.0f4471f9bp+20
13343 * %g 1111111.12
13344 * %g 1.11111112e+15
13345 *
13346 * where P is the value of the precision in the format, or 6
13347 * if not specified. Note the two possible output formats of
13348 * %g; in both cases the number of significant digits is <=
13349 * precision.
13350 *
13351 * For most of the format types the maximum buffer size needed
13352 * is precision, plus: any leading 1 or 0x1, the radix
13353 * point, and an exponent. The difficult one is %f: for a
13354 * large positive exponent it can have many leading digits,
13355 * which needs to be calculated specially. Also %a is slightly
13356 * different in that in the absence of a specified precision,
13357 * it uses as many digits as necessary to distinguish
13358 * different values.
13359 *
13360 * First, here are the constant bits. For ease of calculation
13361 * we over-estimate the needed buffer size, for example by
13362 * assuming all formats have an exponent and a leading 0x1.
13363 *
13364 * Also for production use, add a little extra overhead for
13365 * safety's sake. Under debugging don't, as it means we're
13366 * more likely to quickly spot issues during development.
13367 */
13368
13369 float_need = 1 /* possible unary minus */
13370 + 4 /* "0x1" plus very unlikely carry */
13371 + 1 /* default radix point '.' */
13372 + 2 /* "e-", "p+" etc */
13373 + 6 /* exponent: up to 16383 (quad fp) */
13374 #ifndef DEBUGGING
13375 + 20 /* safety net */
13376 #endif
13377 + 1; /* \0 */
13378
13379
13380 /* determine the radix point len, e.g. length(".") in "1.2" */
13381 #ifdef USE_LOCALE_NUMERIC
13382 /* note that we may either explicitly use PL_numeric_radix_sv
13383 * below, or implicitly, via an snprintf() variant.
13384 * Note also things like ps_AF.utf8 which has
13385 * "\N{ARABIC DECIMAL SEPARATOR} as a radix point */
13386 if (! have_in_lc_numeric) {
13387 in_lc_numeric = IN_LC(LC_NUMERIC);
13388 have_in_lc_numeric = TRUE;
13389 }
13390
13391 if (in_lc_numeric) {
13392 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(TRUE, {
13393 /* this can't wrap unless PL_numeric_radix_sv is a string
13394 * consuming virtually all the 32-bit or 64-bit address
13395 * space
13396 */
13397 float_need += (SvCUR(PL_numeric_radix_sv) - 1);
13398
13399 /* floating-point formats only get utf8 if the radix point
13400 * is utf8. All other characters in the string are < 128
13401 * and so can be safely appended to both a non-utf8 and utf8
13402 * string as-is.
13403 * Note that this will convert the output to utf8 even if
13404 * the radix point didn't get output.
13405 */
13406 if (SvUTF8(PL_numeric_radix_sv) && !has_utf8) {
13407 sv_utf8_upgrade(sv);
13408 has_utf8 = TRUE;
13409 }
13410 });
13411 }
13412 #endif
13413
13414 hexfp = FALSE;
13415
13416 if (isALPHA_FOLD_EQ(c, 'f')) {
13417 /* Determine how many digits before the radix point
13418 * might be emitted. frexp() (or frexpl) has some
13419 * unspecified behaviour for nan/inf/-inf, so lucky we've
13420 * already handled them above */
13421 STRLEN digits;
13422 int i = PERL_INT_MIN;
13423 (void)Perl_frexp((NV)fv, &i);
13424 if (i == PERL_INT_MIN)
13425 Perl_die(aTHX_ "panic: frexp: %" VCATPVFN_FV_GF, fv);
13426
13427 if (i > 0) {
13428 digits = BIT_DIGITS(i);
13429 /* this can't overflow. 'digits' will only be a few
13430 * thousand even for the largest floating-point types.
13431 * And up until now float_need is just some small
13432 * constants plus radix len, which can't be in
13433 * overflow territory unless the radix SV is consuming
13434 * over 1/2 the address space */
13435 assert(float_need < ((STRLEN)~0) - digits);
13436 float_need += digits;
13437 }
13438 }
13439 else if (UNLIKELY(isALPHA_FOLD_EQ(c, 'a'))) {
13440 hexfp = TRUE;
13441 if (!has_precis) {
13442 /* %a in the absence of precision may print as many
13443 * digits as needed to represent the entire mantissa
13444 * bit pattern.
13445 * This estimate seriously overshoots in most cases,
13446 * but better the undershooting. Firstly, all bytes
13447 * of the NV are not mantissa, some of them are
13448 * exponent. Secondly, for the reasonably common
13449 * long doubles case, the "80-bit extended", two
13450 * or six bytes of the NV are unused. Also, we'll
13451 * still pick up an extra +6 from the default
13452 * precision calculation below. */
13453 STRLEN digits =
13454 #ifdef LONGDOUBLE_DOUBLEDOUBLE
13455 /* For the "double double", we need more.
13456 * Since each double has their own exponent, the
13457 * doubles may float (haha) rather far from each
13458 * other, and the number of required bits is much
13459 * larger, up to total of DOUBLEDOUBLE_MAXBITS bits.
13460 * See the definition of DOUBLEDOUBLE_MAXBITS.
13461 *
13462 * Need 2 hexdigits for each byte. */
13463 (DOUBLEDOUBLE_MAXBITS/8 + 1) * 2;
13464 #else
13465 NVSIZE * 2; /* 2 hexdigits for each byte */
13466 #endif
13467 /* see "this can't overflow" comment above */
13468 assert(float_need < ((STRLEN)~0) - digits);
13469 float_need += digits;
13470 }
13471 }
13472 /* special-case "%.<number>g" if it will fit in ebuf */
13473 else if (c == 'g'
13474 && precis /* See earlier comment about buggy Gconvert
13475 when digits, aka precis, is 0 */
13476 && has_precis
13477 /* check that "%.<number>g" formatting will fit in ebuf */
13478 && sizeof(ebuf) - float_need > precis
13479 /* sizeof(ebuf) - float_need will have wrapped if float_need > sizeof(ebuf). *
13480 * Therefore we should check that float_need < sizeof(ebuf). Normally, we would *
13481 * have run this check first, but that triggers incorrect -Wformat-overflow *
13482 * compilation warnings with some versions of gcc if Gconvert invokes sprintf(). *
13483 * ( See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89161 ) *
13484 * So, instead, we check it next: */
13485 && float_need < sizeof(ebuf)
13486 && !(width || left || plus || alt)
13487 && !fill
13488 && intsize != 'q'
13489 ) {
13490 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric,
13491 SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis)
13492 );
13493 elen = strlen(ebuf);
13494 eptr = ebuf;
13495 goto float_concat;
13496 }
13497
13498
13499 {
13500 STRLEN pr = has_precis ? precis : 6; /* known default */
13501 /* this probably can't wrap, since precis is limited
13502 * to 1/4 address space size, but better safe than sorry
13503 */
13504 if (float_need >= ((STRLEN)~0) - pr)
13505 croak_memory_wrap();
13506 float_need += pr;
13507 }
13508
13509 if (float_need < width)
13510 float_need = width;
13511
13512 if (float_need > INT_MAX) {
13513 /* snprintf() returns an int, and we use that return value,
13514 so die horribly if the expected size is too large for int
13515 */
13516 Perl_croak(aTHX_ "Numeric format result too large");
13517 }
13518
13519 if (PL_efloatsize <= float_need) {
13520 /* PL_efloatbuf should be at least 1 greater than
13521 * float_need to allow a trailing \0 to be returned by
13522 * snprintf(). If we need to grow, overgrow for the
13523 * benefit of future generations */
13524 const STRLEN extra = 0x20;
13525 if (float_need >= ((STRLEN)~0) - extra)
13526 croak_memory_wrap();
13527 float_need += extra;
13528 Safefree(PL_efloatbuf);
13529 PL_efloatsize = float_need;
13530 Newx(PL_efloatbuf, PL_efloatsize, char);
13531 PL_efloatbuf[0] = '\0';
13532 }
13533
13534 if (UNLIKELY(hexfp)) {
13535 elen = S_format_hexfp(aTHX_ PL_efloatbuf, PL_efloatsize, c,
13536 nv, fv, has_precis, precis, width,
13537 alt, plus, left, fill, in_lc_numeric);
13538 }
13539 else {
13540 char *ptr = ebuf + sizeof ebuf;
13541 *--ptr = '\0';
13542 *--ptr = c;
13543 #if defined(USE_QUADMATH)
13544 /* always use Q here. my_snprint() throws an exception if we
13545 fallthrough to the double/long double code, even when the
13546 format is correct, presumably to avoid any accidentally
13547 missing Q.
13548 */
13549 *--ptr = 'Q';
13550 /* FIXME: what to do if HAS_LONG_DOUBLE but not PERL_PRIfldbl? */
13551 #elif defined(HAS_LONG_DOUBLE) && defined(PERL_PRIfldbl)
13552 /* Note that this is HAS_LONG_DOUBLE and PERL_PRIfldbl,
13553 * not USE_LONG_DOUBLE and NVff. In other words,
13554 * this needs to work without USE_LONG_DOUBLE. */
13555 if (intsize == 'q') {
13556 /* Copy the one or more characters in a long double
13557 * format before the 'base' ([efgEFG]) character to
13558 * the format string. */
13559 static char const ldblf[] = PERL_PRIfldbl;
13560 char const *p = ldblf + sizeof(ldblf) - 3;
13561 while (p >= ldblf) { *--ptr = *p--; }
13562 }
13563 #endif
13564 if (has_precis) {
13565 base = precis;
13566 do { *--ptr = '0' + (base % 10); } while (base /= 10);
13567 *--ptr = '.';
13568 }
13569 if (width) {
13570 base = width;
13571 do { *--ptr = '0' + (base % 10); } while (base /= 10);
13572 }
13573 if (fill)
13574 *--ptr = '0';
13575 if (left)
13576 *--ptr = '-';
13577 if (plus)
13578 *--ptr = plus;
13579 if (alt)
13580 *--ptr = '#';
13581 *--ptr = '%';
13582
13583 /* No taint. Otherwise we are in the strange situation
13584 * where printf() taints but print($float) doesn't.
13585 * --jhi */
13586
13587 /* hopefully the above makes ptr a very constrained format
13588 * that is safe to use, even though it's not literal */
13589 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
13590 #ifdef USE_QUADMATH
13591 {
13592 if (!quadmath_format_valid(ptr))
13593 Perl_croak_nocontext("panic: quadmath invalid format \"%s\"", ptr);
13594 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric,
13595 elen = quadmath_snprintf(PL_efloatbuf, PL_efloatsize,
13596 ptr, nv);
13597 );
13598 if ((IV)elen == -1) {
13599 Perl_croak_nocontext("panic: quadmath_snprintf failed, format \"%s\"", ptr);
13600 }
13601 }
13602 #elif defined(HAS_LONG_DOUBLE)
13603 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric,
13604 elen = ((intsize == 'q')
13605 ? my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, fv)
13606 : my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, (double)fv))
13607 );
13608 #else
13609 WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric,
13610 elen = my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, fv)
13611 );
13612 #endif
13613 GCC_DIAG_RESTORE_STMT;
13614 }
13615
13616 eptr = PL_efloatbuf;
13617
13618 float_concat:
13619
13620 /* Since floating-point formats do their own formatting and
13621 * padding, we skip the main block of code at the end of this
13622 * loop which handles appending eptr to sv, and do our own
13623 * stripped-down version */
13624
13625 assert(!zeros);
13626 assert(!esignlen);
13627 assert(elen);
13628 assert(elen >= width);
13629
13630 S_sv_catpvn_simple(aTHX_ sv, eptr, elen);
13631
13632 goto done_valid_conversion;
13633 }
13634
13635 /* SPECIAL */
13636
13637 case 'n':
13638 {
13639 STRLEN len;
13640 /* XXX ideally we should warn if any flags etc have been
13641 * set, e.g. "%-4.5n" */
13642 /* XXX if sv was originally non-utf8 with a char in the
13643 * range 0x80-0xff, then if it got upgraded, we should
13644 * calculate char len rather than byte len here */
13645 len = SvCUR(sv) - origlen;
13646 if (args) {
13647 int i = (len > PERL_INT_MAX) ? PERL_INT_MAX : (int)len;
13648
13649 switch (intsize) {
13650 case 'c': *(va_arg(*args, char*)) = i; break;
13651 case 'h': *(va_arg(*args, short*)) = i; break;
13652 default: *(va_arg(*args, int*)) = i; break;
13653 case 'l': *(va_arg(*args, long*)) = i; break;
13654 case 'V': *(va_arg(*args, IV*)) = i; break;
13655 case 'z': *(va_arg(*args, SSize_t*)) = i; break;
13656 #ifdef HAS_PTRDIFF_T
13657 case 't': *(va_arg(*args, ptrdiff_t*)) = i; break;
13658 #endif
13659 case 'j': *(va_arg(*args, PERL_INTMAX_T*)) = i; break;
13660 case 'q':
13661 #if IVSIZE >= 8
13662 *(va_arg(*args, Quad_t*)) = i; break;
13663 #else
13664 goto unknown;
13665 #endif
13666 }
13667 }
13668 else {
13669 if (arg_missing)
13670 Perl_croak_nocontext(
13671 "Missing argument for %%n in %s",
13672 PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
13673 sv_setuv_mg(argsv, has_utf8
13674 ? (UV)utf8_length((U8*)SvPVX(sv), (U8*)SvEND(sv))
13675 : (UV)len);
13676 }
13677 goto done_valid_conversion;
13678 }
13679
13680 /* UNKNOWN */
13681
13682 default:
13683 unknown:
13684 if (!args
13685 && (PL_op->op_type == OP_PRTF || PL_op->op_type == OP_SPRINTF)
13686 && ckWARN(WARN_PRINTF))
13687 {
13688 SV * const msg = sv_newmortal();
13689 Perl_sv_setpvf(aTHX_ msg, "Invalid conversion in %sprintf: ",
13690 (PL_op->op_type == OP_PRTF) ? "" : "s");
13691 if (fmtstart < patend) {
13692 const char * const fmtend = q < patend ? q : patend;
13693 const char * f;
13694 sv_catpvs(msg, "\"%");
13695 for (f = fmtstart; f < fmtend; f++) {
13696 if (isPRINT(*f)) {
13697 sv_catpvn_nomg(msg, f, 1);
13698 } else {
13699 Perl_sv_catpvf(aTHX_ msg, "\\%03o", (U8) *f);
13700 }
13701 }
13702 sv_catpvs(msg, "\"");
13703 } else {
13704 sv_catpvs(msg, "end of string");
13705 }
13706 Perl_warner(aTHX_ packWARN(WARN_PRINTF), "%" SVf, SVfARG(msg)); /* yes, this is reentrant */
13707 }
13708
13709 /* mangled format: output the '%', then continue from the
13710 * character following that */
13711 sv_catpvn_nomg(sv, fmtstart-1, 1);
13712 q = fmtstart;
13713 svix = osvix;
13714 /* Any "redundant arg" warning from now onwards will probably
13715 * just be misleading, so don't bother. */
13716 no_redundant_warning = TRUE;
13717 continue; /* not "break" */
13718 }
13719
13720 if (is_utf8 != has_utf8) {
13721 if (is_utf8) {
13722 if (SvCUR(sv))
13723 sv_utf8_upgrade(sv);
13724 }
13725 else {
13726 const STRLEN old_elen = elen;
13727 SV * const nsv = newSVpvn_flags(eptr, elen, SVs_TEMP);
13728 sv_utf8_upgrade(nsv);
13729 eptr = SvPVX_const(nsv);
13730 elen = SvCUR(nsv);
13731
13732 if (width) { /* fudge width (can't fudge elen) */
13733 width += elen - old_elen;
13734 }
13735 is_utf8 = TRUE;
13736 }
13737 }
13738
13739
13740 /* append esignbuf, filler, zeros, eptr and dotstr to sv */
13741
13742 {
13743 STRLEN need, have, gap;
13744 STRLEN i;
13745 char *s;
13746
13747 /* signed value that's wrapped? */
13748 assert(elen <= ((~(STRLEN)0) >> 1));
13749
13750 /* if zeros is non-zero, then it represents filler between
13751 * elen and precis. So adding elen and zeros together will
13752 * always be <= precis, and the addition can never wrap */
13753 assert(!zeros || (precis > elen && precis - elen == zeros));
13754 have = elen + zeros;
13755
13756 if (have >= (((STRLEN)~0) - esignlen))
13757 croak_memory_wrap();
13758 have += esignlen;
13759
13760 need = (have > width ? have : width);
13761 gap = need - have;
13762
13763 if (need >= (((STRLEN)~0) - (SvCUR(sv) + 1)))
13764 croak_memory_wrap();
13765 need += (SvCUR(sv) + 1);
13766
13767 SvGROW(sv, need);
13768
13769 s = SvEND(sv);
13770
13771 if (left) {
13772 for (i = 0; i < esignlen; i++)
13773 *s++ = esignbuf[i];
13774 for (i = zeros; i; i--)
13775 *s++ = '0';
13776 Copy(eptr, s, elen, char);
13777 s += elen;
13778 for (i = gap; i; i--)
13779 *s++ = ' ';
13780 }
13781 else {
13782 if (fill) {
13783 for (i = 0; i < esignlen; i++)
13784 *s++ = esignbuf[i];
13785 assert(!zeros);
13786 zeros = gap;
13787 }
13788 else {
13789 for (i = gap; i; i--)
13790 *s++ = ' ';
13791 for (i = 0; i < esignlen; i++)
13792 *s++ = esignbuf[i];
13793 }
13794
13795 for (i = zeros; i; i--)
13796 *s++ = '0';
13797 Copy(eptr, s, elen, char);
13798 s += elen;
13799 }
13800
13801 *s = '\0';
13802 SvCUR_set(sv, s - SvPVX_const(sv));
13803
13804 if (is_utf8)
13805 has_utf8 = TRUE;
13806 if (has_utf8)
13807 SvUTF8_on(sv);
13808 }
13809
13810 if (vectorize && veclen) {
13811 /* we append the vector separator separately since %v isn't
13812 * very common: don't slow down the general case by adding
13813 * dotstrlen to need etc */
13814 sv_catpvn_nomg(sv, dotstr, dotstrlen);
13815 esignlen = 0;
13816 goto vector; /* do next iteration */
13817 }
13818
13819 done_valid_conversion:
13820
13821 if (arg_missing)
13822 S_warn_vcatpvfn_missing_argument(aTHX);
13823 }
13824
13825 /* Now that we've consumed all our printf format arguments (svix)
13826 * do we have things left on the stack that we didn't use?
13827 */
13828 if (!no_redundant_warning && sv_count >= svix + 1 && ckWARN(WARN_REDUNDANT)) {
13829 Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s",
13830 PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
13831 }
13832
13833 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
13834 /* while we shouldn't set the cache, it may have been previously
13835 set in the caller, so clear it */
13836 MAGIC *mg = mg_find(sv, PERL_MAGIC_utf8);
13837 if (mg)
13838 magic_setutf8(sv,mg); /* clear UTF8 cache */
13839 }
13840 SvTAINT(sv);
13841 }
13842
13843 /* =========================================================================
13844
13845 =for apidoc_section $embedding
13846
13847 =cut
13848
13849 All the macros and functions in this section are for the private use of
13850 the main function, perl_clone().
13851
13852 The foo_dup() functions make an exact copy of an existing foo thingy.
13853 During the course of a cloning, a hash table is used to map old addresses
13854 to new addresses. The table is created and manipulated with the
13855 ptr_table_* functions.
13856
13857 * =========================================================================*/
13858
13859
13860 #if defined(USE_ITHREADS)
13861
13862 /* XXX Remove this so it doesn't have to go thru the macro and return for nothing */
13863 #ifndef GpREFCNT_inc
13864 # define GpREFCNT_inc(gp) ((gp) ? (++(gp)->gp_refcnt, (gp)) : (GP*)NULL)
13865 #endif
13866
13867
13868 #define SAVEPV(p) ((p) ? savepv(p) : NULL)
13869 #define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL)
13870
13871 /* clone a parser */
13872
13873 yy_parser *
13874 Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param)
13875 {
13876 yy_parser *parser;
13877
13878 PERL_ARGS_ASSERT_PARSER_DUP;
13879
13880 if (!proto)
13881 return NULL;
13882
13883 /* look for it in the table first */
13884 parser = (yy_parser *)ptr_table_fetch(PL_ptr_table, proto);
13885 if (parser)
13886 return parser;
13887
13888 /* create anew and remember what it is */
13889 Newxz(parser, 1, yy_parser);
13890 ptr_table_store(PL_ptr_table, proto, parser);
13891
13892 /* XXX eventually, just Copy() most of the parser struct ? */
13893
13894 parser->lex_brackets = proto->lex_brackets;
13895 parser->lex_casemods = proto->lex_casemods;
13896 parser->lex_brackstack = savepvn(proto->lex_brackstack,
13897 (proto->lex_brackets < 120 ? 120 : proto->lex_brackets));
13898 parser->lex_casestack = savepvn(proto->lex_casestack,
13899 (proto->lex_casemods < 12 ? 12 : proto->lex_casemods));
13900 parser->lex_defer = proto->lex_defer;
13901 parser->lex_dojoin = proto->lex_dojoin;
13902 parser->lex_formbrack = proto->lex_formbrack;
13903 parser->lex_inpat = proto->lex_inpat;
13904 parser->lex_inwhat = proto->lex_inwhat;
13905 parser->lex_op = proto->lex_op;
13906 parser->lex_repl = sv_dup_inc(proto->lex_repl, param);
13907 parser->lex_starts = proto->lex_starts;
13908 parser->lex_stuff = sv_dup_inc(proto->lex_stuff, param);
13909 parser->multi_close = proto->multi_close;
13910 parser->multi_open = proto->multi_open;
13911 parser->multi_start = proto->multi_start;
13912 parser->multi_end = proto->multi_end;
13913 parser->preambled = proto->preambled;
13914 parser->lex_super_state = proto->lex_super_state;
13915 parser->lex_sub_inwhat = proto->lex_sub_inwhat;
13916 parser->lex_sub_op = proto->lex_sub_op;
13917 parser->lex_sub_repl= sv_dup_inc(proto->lex_sub_repl, param);
13918 parser->linestr = sv_dup_inc(proto->linestr, param);
13919 parser->expect = proto->expect;
13920 parser->copline = proto->copline;
13921 parser->last_lop_op = proto->last_lop_op;
13922 parser->lex_state = proto->lex_state;
13923 parser->rsfp = fp_dup(proto->rsfp, '<', param);
13924 /* rsfp_filters entries have fake IoDIRP() */
13925 parser->rsfp_filters= av_dup_inc(proto->rsfp_filters, param);
13926 parser->in_my = proto->in_my;
13927 parser->in_my_stash = hv_dup(proto->in_my_stash, param);
13928 parser->error_count = proto->error_count;
13929 parser->sig_elems = proto->sig_elems;
13930 parser->sig_optelems= proto->sig_optelems;
13931 parser->sig_slurpy = proto->sig_slurpy;
13932 parser->recheck_utf8_validity = proto->recheck_utf8_validity;
13933
13934 {
13935 char * const ols = SvPVX(proto->linestr);
13936 char * const ls = SvPVX(parser->linestr);
13937
13938 parser->bufptr = ls + (proto->bufptr >= ols ?
13939 proto->bufptr - ols : 0);
13940 parser->oldbufptr = ls + (proto->oldbufptr >= ols ?
13941 proto->oldbufptr - ols : 0);
13942 parser->oldoldbufptr= ls + (proto->oldoldbufptr >= ols ?
13943 proto->oldoldbufptr - ols : 0);
13944 parser->linestart = ls + (proto->linestart >= ols ?
13945 proto->linestart - ols : 0);
13946 parser->last_uni = ls + (proto->last_uni >= ols ?
13947 proto->last_uni - ols : 0);
13948 parser->last_lop = ls + (proto->last_lop >= ols ?
13949 proto->last_lop - ols : 0);
13950
13951 parser->bufend = ls + SvCUR(parser->linestr);
13952 }
13953
13954 Copy(proto->tokenbuf, parser->tokenbuf, 256, char);
13955
13956
13957 Copy(proto->nextval, parser->nextval, 5, YYSTYPE);
13958 Copy(proto->nexttype, parser->nexttype, 5, I32);
13959 parser->nexttoke = proto->nexttoke;
13960
13961 /* XXX should clone saved_curcop here, but we aren't passed
13962 * proto_perl; so do it in perl_clone_using instead */
13963
13964 return parser;
13965 }
13966
13967 /*
13968 =for apidoc_section $io
13969 =for apidoc fp_dup
13970
13971 Duplicate a file handle, returning a pointer to the cloned object.
13972
13973 =cut
13974 */
13975
13976 PerlIO *
13977 Perl_fp_dup(pTHX_ PerlIO *const fp, const char type, CLONE_PARAMS *const param)
13978 {
13979 PerlIO *ret;
13980
13981 PERL_ARGS_ASSERT_FP_DUP;
13982 PERL_UNUSED_ARG(type);
13983
13984 if (!fp)
13985 return (PerlIO*)NULL;
13986
13987 /* look for it in the table first */
13988 ret = (PerlIO*)ptr_table_fetch(PL_ptr_table, fp);
13989 if (ret)
13990 return ret;
13991
13992 /* create anew and remember what it is */
13993 #ifdef __amigaos4__
13994 ret = PerlIO_fdupopen(aTHX_ fp, param, PERLIO_DUP_CLONE|PERLIO_DUP_FD);
13995 #else
13996 ret = PerlIO_fdupopen(aTHX_ fp, param, PERLIO_DUP_CLONE);
13997 #endif
13998 ptr_table_store(PL_ptr_table, fp, ret);
13999 return ret;
14000 }
14001
14002 /*
14003 =for apidoc_section $io
14004 =for apidoc dirp_dup
14005
14006 Duplicate a directory handle, returning a pointer to the cloned object.
14007
14008 =cut
14009 */
14010
14011 DIR *
14012 Perl_dirp_dup(pTHX_ DIR *const dp, CLONE_PARAMS *const param)
14013 {
14014 DIR *ret;
14015
14016 #if defined(HAS_FCHDIR) && defined(HAS_TELLDIR) && defined(HAS_SEEKDIR)
14017 DIR *pwd;
14018 const Direntry_t *dirent;
14019 char smallbuf[256]; /* XXX MAXPATHLEN, surely? */
14020 char *name = NULL;
14021 STRLEN len = 0;
14022 long pos;
14023 #endif
14024
14025 PERL_UNUSED_CONTEXT;
14026 PERL_ARGS_ASSERT_DIRP_DUP;
14027
14028 if (!dp)
14029 return (DIR*)NULL;
14030
14031 /* look for it in the table first */
14032 ret = (DIR*)ptr_table_fetch(PL_ptr_table, dp);
14033 if (ret)
14034 return ret;
14035
14036 #if defined(HAS_FCHDIR) && defined(HAS_TELLDIR) && defined(HAS_SEEKDIR)
14037
14038 PERL_UNUSED_ARG(param);
14039
14040 /* create anew */
14041
14042 /* open the current directory (so we can switch back) */
14043 if (!(pwd = PerlDir_open("."))) return (DIR *)NULL;
14044
14045 /* chdir to our dir handle and open the present working directory */
14046 if (fchdir(my_dirfd(dp)) < 0 || !(ret = PerlDir_open("."))) {
14047 PerlDir_close(pwd);
14048 return (DIR *)NULL;
14049 }
14050 /* Now we should have two dir handles pointing to the same dir. */
14051
14052 /* Be nice to the calling code and chdir back to where we were. */
14053 /* XXX If this fails, then what? */
14054 PERL_UNUSED_RESULT(fchdir(my_dirfd(pwd)));
14055
14056 /* We have no need of the pwd handle any more. */
14057 PerlDir_close(pwd);
14058
14059 #ifdef DIRNAMLEN
14060 # define d_namlen(d) (d)->d_namlen
14061 #else
14062 # define d_namlen(d) strlen((d)->d_name)
14063 #endif
14064 /* Iterate once through dp, to get the file name at the current posi-
14065 tion. Then step back. */
14066 pos = PerlDir_tell(dp);
14067 if ((dirent = PerlDir_read(dp))) {
14068 len = d_namlen(dirent);
14069 if (len > sizeof(dirent->d_name) && sizeof(dirent->d_name) > PTRSIZE) {
14070 /* If the len is somehow magically longer than the
14071 * maximum length of the directory entry, even though
14072 * we could fit it in a buffer, we could not copy it
14073 * from the dirent. Bail out. */
14074 PerlDir_close(ret);
14075 return (DIR*)NULL;
14076 }
14077 if (len <= sizeof smallbuf) name = smallbuf;
14078 else Newx(name, len, char);
14079 Move(dirent->d_name, name, len, char);
14080 }
14081 PerlDir_seek(dp, pos);
14082
14083 /* Iterate through the new dir handle, till we find a file with the
14084 right name. */
14085 if (!dirent) /* just before the end */
14086 for(;;) {
14087 pos = PerlDir_tell(ret);
14088 if (PerlDir_read(ret)) continue; /* not there yet */
14089 PerlDir_seek(ret, pos); /* step back */
14090 break;
14091 }
14092 else {
14093 const long pos0 = PerlDir_tell(ret);
14094 for(;;) {
14095 pos = PerlDir_tell(ret);
14096 if ((dirent = PerlDir_read(ret))) {
14097 if (len == (STRLEN)d_namlen(dirent)
14098 && memEQ(name, dirent->d_name, len)) {
14099 /* found it */
14100 PerlDir_seek(ret, pos); /* step back */
14101 break;
14102 }
14103 /* else we are not there yet; keep iterating */
14104 }
14105 else { /* This is not meant to happen. The best we can do is
14106 reset the iterator to the beginning. */
14107 PerlDir_seek(ret, pos0);
14108 break;
14109 }
14110 }
14111 }
14112 #undef d_namlen
14113
14114 if (name && name != smallbuf)
14115 Safefree(name);
14116 #endif
14117
14118 #ifdef WIN32
14119 ret = win32_dirp_dup(dp, param);
14120 #endif
14121
14122 /* pop it in the pointer table */
14123 if (ret)
14124 ptr_table_store(PL_ptr_table, dp, ret);
14125
14126 return ret;
14127 }
14128
14129 /*
14130 =for apidoc_section $GV
14131 =for apidoc gp_dup
14132
14133 Duplicate a typeglob, returning a pointer to the cloned object.
14134
14135 =cut
14136 */
14137
14138 GP *
14139 Perl_gp_dup(pTHX_ GP *const gp, CLONE_PARAMS *const param)
14140 {
14141 GP *ret;
14142
14143 PERL_ARGS_ASSERT_GP_DUP;
14144
14145 if (!gp)
14146 return (GP*)NULL;
14147 /* look for it in the table first */
14148 ret = (GP*)ptr_table_fetch(PL_ptr_table, gp);
14149 if (ret)
14150 return ret;
14151
14152 /* create anew and remember what it is */
14153 Newxz(ret, 1, GP);
14154 ptr_table_store(PL_ptr_table, gp, ret);
14155
14156 /* clone */
14157 /* ret->gp_refcnt must be 0 before any other dups are called. We're relying
14158 on Newxz() to do this for us. */
14159 ret->gp_sv = sv_dup_inc(gp->gp_sv, param);
14160 ret->gp_io = io_dup_inc(gp->gp_io, param);
14161 ret->gp_form = cv_dup_inc(gp->gp_form, param);
14162 ret->gp_av = av_dup_inc(gp->gp_av, param);
14163 ret->gp_hv = hv_dup_inc(gp->gp_hv, param);
14164 ret->gp_egv = gv_dup(gp->gp_egv, param);/* GvEGV is not refcounted */
14165 ret->gp_cv = cv_dup_inc(gp->gp_cv, param);
14166 ret->gp_cvgen = gp->gp_cvgen;
14167 ret->gp_line = gp->gp_line;
14168 ret->gp_file_hek = hek_dup(gp->gp_file_hek, param);
14169 return ret;
14170 }
14171
14172
14173 /*
14174 =for apidoc_section $magic
14175 =for apidoc mg_dup
14176
14177 Duplicate a chain of magic, returning a pointer to the cloned object.
14178
14179 =cut
14180 */
14181
14182 MAGIC *
14183 Perl_mg_dup(pTHX_ MAGIC *mg, CLONE_PARAMS *const param)
14184 {
14185 MAGIC *mgret = NULL;
14186 MAGIC **mgprev_p = &mgret;
14187
14188 PERL_ARGS_ASSERT_MG_DUP;
14189
14190 for (; mg; mg = mg->mg_moremagic) {
14191 MAGIC *nmg;
14192
14193 if ((param->flags & CLONEf_JOIN_IN)
14194 && mg->mg_type == PERL_MAGIC_backref)
14195 /* when joining, we let the individual SVs add themselves to
14196 * backref as needed. */
14197 continue;
14198
14199 Newx(nmg, 1, MAGIC);
14200 *mgprev_p = nmg;
14201 mgprev_p = &(nmg->mg_moremagic);
14202
14203 /* There was a comment "XXX copy dynamic vtable?" but as we don't have
14204 dynamic vtables, I'm not sure why Sarathy wrote it. The comment dates
14205 from the original commit adding Perl_mg_dup() - revision 4538.
14206 Similarly there is the annotation "XXX random ptr?" next to the
14207 assignment to nmg->mg_ptr. */
14208 *nmg = *mg;
14209
14210 /* FIXME for plugins
14211 if (nmg->mg_type == PERL_MAGIC_qr) {
14212 nmg->mg_obj = MUTABLE_SV(CALLREGDUPE((REGEXP*)nmg->mg_obj, param));
14213 }
14214 else
14215 */
14216 nmg->mg_obj = (nmg->mg_flags & MGf_REFCOUNTED)
14217 ? nmg->mg_type == PERL_MAGIC_backref
14218 /* The backref AV has its reference
14219 * count deliberately bumped by 1 */
14220 ? SvREFCNT_inc(av_dup_inc((const AV *)
14221 nmg->mg_obj, param))
14222 : sv_dup_inc(nmg->mg_obj, param)
14223 : (nmg->mg_type == PERL_MAGIC_regdatum ||
14224 nmg->mg_type == PERL_MAGIC_regdata)
14225 ? nmg->mg_obj
14226 : sv_dup(nmg->mg_obj, param);
14227
14228 if (nmg->mg_ptr && nmg->mg_type != PERL_MAGIC_regex_global) {
14229 if (nmg->mg_len > 0) {
14230 nmg->mg_ptr = SAVEPVN(nmg->mg_ptr, nmg->mg_len);
14231 if (nmg->mg_type == PERL_MAGIC_overload_table &&
14232 AMT_AMAGIC((AMT*)nmg->mg_ptr))
14233 {
14234 AMT * const namtp = (AMT*)nmg->mg_ptr;
14235 sv_dup_inc_multiple((SV**)(namtp->table),
14236 (SV**)(namtp->table), NofAMmeth, param);
14237 }
14238 }
14239 else if (nmg->mg_len == HEf_SVKEY)
14240 nmg->mg_ptr = (char*)sv_dup_inc((const SV *)nmg->mg_ptr, param);
14241 }
14242 if ((nmg->mg_flags & MGf_DUP) && nmg->mg_virtual && nmg->mg_virtual->svt_dup) {
14243 nmg->mg_virtual->svt_dup(aTHX_ nmg, param);
14244 }
14245 }
14246 return mgret;
14247 }
14248
14249 #endif /* USE_ITHREADS */
14250
14251 struct ptr_tbl_arena {
14252 struct ptr_tbl_arena *next;
14253 struct ptr_tbl_ent array[1023/3]; /* as ptr_tbl_ent has 3 pointers. */
14254 };
14255
14256 /*
14257 =for apidoc_section $embedding
14258 =for apidoc ptr_table_new
14259
14260 Create a new pointer-mapping table
14261
14262 =cut
14263 */
14264
14265 PTR_TBL_t *
14266 Perl_ptr_table_new(pTHX)
14267 {
14268 PTR_TBL_t *tbl;
14269 PERL_UNUSED_CONTEXT;
14270
14271 Newx(tbl, 1, PTR_TBL_t);
14272 tbl->tbl_max = 511;
14273 tbl->tbl_items = 0;
14274 tbl->tbl_arena = NULL;
14275 tbl->tbl_arena_next = NULL;
14276 tbl->tbl_arena_end = NULL;
14277 Newxz(tbl->tbl_ary, tbl->tbl_max + 1, PTR_TBL_ENT_t*);
14278 return tbl;
14279 }
14280
14281 #define PTR_TABLE_HASH(ptr) \
14282 ((PTR2UV(ptr) >> 3) ^ (PTR2UV(ptr) >> (3 + 7)) ^ (PTR2UV(ptr) >> (3 + 17)))
14283
14284 /* map an existing pointer using a table */
14285
14286 STATIC PTR_TBL_ENT_t *
14287 S_ptr_table_find(PTR_TBL_t *const tbl, const void *const sv)
14288 {
14289 PTR_TBL_ENT_t *tblent;
14290 const UV hash = PTR_TABLE_HASH(sv);
14291
14292 PERL_ARGS_ASSERT_PTR_TABLE_FIND;
14293
14294 tblent = tbl->tbl_ary[hash & tbl->tbl_max];
14295 for (; tblent; tblent = tblent->next) {
14296 if (tblent->oldval == sv)
14297 return tblent;
14298 }
14299 return NULL;
14300 }
14301
14302 /*
14303 =for apidoc ptr_table_fetch
14304
14305 Look for C<sv> in the pointer-mapping table C<tbl>, returning its value, or
14306 NULL if not found.
14307
14308 =cut
14309 */
14310
14311 void *
14312 Perl_ptr_table_fetch(pTHX_ PTR_TBL_t *const tbl, const void *const sv)
14313 {
14314 PTR_TBL_ENT_t const *const tblent = ptr_table_find(tbl, sv);
14315
14316 PERL_ARGS_ASSERT_PTR_TABLE_FETCH;
14317 PERL_UNUSED_CONTEXT;
14318
14319 return tblent ? tblent->newval : NULL;
14320 }
14321
14322 /*
14323 =for apidoc ptr_table_store
14324
14325 Add a new entry to a pointer-mapping table C<tbl>.
14326 In hash terms, C<oldsv> is the key; Cnewsv> is the value.
14327
14328 The names "old" and "new" are specific to the core's typical use of ptr_tables
14329 in thread cloning.
14330
14331 =cut
14332 */
14333
14334 void
14335 Perl_ptr_table_store(pTHX_ PTR_TBL_t *const tbl, const void *const oldsv, void *const newsv)
14336 {
14337 PTR_TBL_ENT_t *tblent = ptr_table_find(tbl, oldsv);
14338
14339 PERL_ARGS_ASSERT_PTR_TABLE_STORE;
14340 PERL_UNUSED_CONTEXT;
14341
14342 if (tblent) {
14343 tblent->newval = newsv;
14344 } else {
14345 const UV entry = PTR_TABLE_HASH(oldsv) & tbl->tbl_max;
14346
14347 if (tbl->tbl_arena_next == tbl->tbl_arena_end) {
14348 struct ptr_tbl_arena *new_arena;
14349
14350 Newx(new_arena, 1, struct ptr_tbl_arena);
14351 new_arena->next = tbl->tbl_arena;
14352 tbl->tbl_arena = new_arena;
14353 tbl->tbl_arena_next = new_arena->array;
14354 tbl->tbl_arena_end = C_ARRAY_END(new_arena->array);
14355 }
14356
14357 tblent = tbl->tbl_arena_next++;
14358
14359 tblent->oldval = oldsv;
14360 tblent->newval = newsv;
14361 tblent->next = tbl->tbl_ary[entry];
14362 tbl->tbl_ary[entry] = tblent;
14363 tbl->tbl_items++;
14364 if (tblent->next && tbl->tbl_items > tbl->tbl_max)
14365 ptr_table_split(tbl);
14366 }
14367 }
14368
14369 /*
14370 =for apidoc ptr_table_split
14371
14372 Double the hash bucket size of an existing ptr table
14373
14374 =cut
14375 */
14376
14377 void
14378 Perl_ptr_table_split(pTHX_ PTR_TBL_t *const tbl)
14379 {
14380 PTR_TBL_ENT_t **ary = tbl->tbl_ary;
14381 const UV oldsize = tbl->tbl_max + 1;
14382 UV newsize = oldsize * 2;
14383 UV i;
14384
14385 PERL_ARGS_ASSERT_PTR_TABLE_SPLIT;
14386 PERL_UNUSED_CONTEXT;
14387
14388 Renew(ary, newsize, PTR_TBL_ENT_t*);
14389 Zero(&ary[oldsize], newsize-oldsize, PTR_TBL_ENT_t*);
14390 tbl->tbl_max = --newsize;
14391 tbl->tbl_ary = ary;
14392 for (i=0; i < oldsize; i++, ary++) {
14393 PTR_TBL_ENT_t **entp = ary;
14394 PTR_TBL_ENT_t *ent = *ary;
14395 PTR_TBL_ENT_t **curentp;
14396 if (!ent)
14397 continue;
14398 curentp = ary + oldsize;
14399 do {
14400 if ((newsize & PTR_TABLE_HASH(ent->oldval)) != i) {
14401 *entp = ent->next;
14402 ent->next = *curentp;
14403 *curentp = ent;
14404 }
14405 else
14406 entp = &ent->next;
14407 ent = *entp;
14408 } while (ent);
14409 }
14410 }
14411
14412 /*
14413 =for apidoc ptr_table_free
14414
14415 Clear and free a ptr table
14416
14417 =cut
14418 */
14419
14420 void
14421 Perl_ptr_table_free(pTHX_ PTR_TBL_t *const tbl)
14422 {
14423 struct ptr_tbl_arena *arena;
14424
14425 PERL_UNUSED_CONTEXT;
14426
14427 if (!tbl) {
14428 return;
14429 }
14430
14431 arena = tbl->tbl_arena;
14432
14433 while (arena) {
14434 struct ptr_tbl_arena *next = arena->next;
14435
14436 Safefree(arena);
14437 arena = next;
14438 }
14439
14440 Safefree(tbl->tbl_ary);
14441 Safefree(tbl);
14442 }
14443
14444 #if defined(USE_ITHREADS)
14445
14446 void
14447 Perl_rvpv_dup(pTHX_ SV *const dsv, const SV *const ssv, CLONE_PARAMS *const param)
14448 {
14449 PERL_ARGS_ASSERT_RVPV_DUP;
14450
14451 assert(!isREGEXP(ssv));
14452 if (SvROK(ssv)) {
14453 if (SvWEAKREF(ssv)) {
14454 SvRV_set(dsv, sv_dup(SvRV_const(ssv), param));
14455 if (param->flags & CLONEf_JOIN_IN) {
14456 /* if joining, we add any back references individually rather
14457 * than copying the whole backref array */
14458 Perl_sv_add_backref(aTHX_ SvRV(dsv), dsv);
14459 }
14460 }
14461 else
14462 SvRV_set(dsv, sv_dup_inc(SvRV_const(ssv), param));
14463 }
14464 else if (SvPVX_const(ssv)) {
14465 /* Has something there */
14466 if (SvLEN(ssv)) {
14467 /* Normal PV - clone whole allocated space */
14468 SvPV_set(dsv, SAVEPVN(SvPVX_const(ssv), SvLEN(ssv)-1));
14469 /* ssv may not be that normal, but actually copy on write.
14470 But we are a true, independent SV, so: */
14471 SvIsCOW_off(dsv);
14472 }
14473 else {
14474 /* Special case - not normally malloced for some reason */
14475 if (isGV_with_GP(ssv)) {
14476 /* Don't need to do anything here. */
14477 }
14478 else if ((SvIsCOW_shared_hash(ssv))) {
14479 /* A "shared" PV - clone it as "shared" PV */
14480 SvPV_set(dsv,
14481 HEK_KEY(hek_dup(SvSHARED_HEK_FROM_PV(SvPVX_const(ssv)),
14482 param)));
14483 }
14484 else {
14485 /* Some other special case - random pointer */
14486 SvPV_set(dsv, (char *) SvPVX_const(ssv));
14487 }
14488 }
14489 }
14490 else {
14491 /* Copy the NULL */
14492 SvPV_set(dsv, NULL);
14493 }
14494 }
14495
14496 /* duplicate a list of SVs. source and dest may point to the same memory. */
14497 static SV **
14498 S_sv_dup_inc_multiple(pTHX_ SV *const *source, SV **dest,
14499 SSize_t items, CLONE_PARAMS *const param)
14500 {
14501 PERL_ARGS_ASSERT_SV_DUP_INC_MULTIPLE;
14502
14503 while (items-- > 0) {
14504 *dest++ = sv_dup_inc(*source++, param);
14505 }
14506
14507 return dest;
14508 }
14509
14510 /* duplicate the HvAUX of an HV */
14511 static void
14512 S_sv_dup_hvaux(pTHX_ const SV *const ssv, SV *dsv, CLONE_PARAMS *const param)
14513 {
14514 PERL_ARGS_ASSERT_SV_DUP_HVAUX;
14515
14516 const struct xpvhv_aux * const saux = HvAUX(ssv);
14517 struct xpvhv_aux * const daux = HvAUX(dsv);
14518 /* This flag isn't copied. */
14519 SvFLAGS(dsv) |= SVphv_HasAUX;
14520
14521 if (saux->xhv_name_count) {
14522 HEK ** const sname = saux->xhv_name_u.xhvnameu_names;
14523 const I32 count = saux->xhv_name_count < 0
14524 ? -saux->xhv_name_count
14525 : saux->xhv_name_count;
14526 HEK **shekp = sname + count;
14527 HEK **dhekp;
14528 Newx(daux->xhv_name_u.xhvnameu_names, count, HEK *);
14529 dhekp = daux->xhv_name_u.xhvnameu_names + count;
14530 while (shekp-- > sname) {
14531 dhekp--;
14532 *dhekp = hek_dup(*shekp, param);
14533 }
14534 }
14535 else {
14536 daux->xhv_name_u.xhvnameu_name = hek_dup(saux->xhv_name_u.xhvnameu_name, param);
14537 }
14538 daux->xhv_name_count = saux->xhv_name_count;
14539
14540 daux->xhv_aux_flags = saux->xhv_aux_flags;
14541 #ifdef PERL_HASH_RANDOMIZE_KEYS
14542 daux->xhv_rand = saux->xhv_rand;
14543 daux->xhv_last_rand = saux->xhv_last_rand;
14544 #endif
14545 daux->xhv_riter = saux->xhv_riter;
14546 daux->xhv_eiter = saux->xhv_eiter ? he_dup(saux->xhv_eiter, FALSE, param) : 0;
14547 /* backref array needs refcnt=2; see sv_add_backref */
14548 daux->xhv_backreferences =
14549 (param->flags & CLONEf_JOIN_IN)
14550 /* when joining, we let the individual GVs and
14551 * CVs add themselves to backref as
14552 * needed. This avoids pulling in stuff
14553 * that isn't required, and simplifies the
14554 * case where stashes aren't cloned back
14555 * if they already exist in the parent
14556 * thread */
14557 ? NULL
14558 : saux->xhv_backreferences
14559 ? (SvTYPE(saux->xhv_backreferences) == SVt_PVAV)
14560 ? MUTABLE_AV(SvREFCNT_inc(
14561 sv_dup_inc((const SV *)
14562 saux->xhv_backreferences, param)))
14563 : MUTABLE_AV(sv_dup((const SV *)
14564 saux->xhv_backreferences, param))
14565 : 0;
14566
14567 daux->xhv_mro_meta = saux->xhv_mro_meta
14568 ? mro_meta_dup(saux->xhv_mro_meta, param)
14569 : 0;
14570
14571 /* Record stashes for possible cloning in Perl_clone(). */
14572 if (HvNAME(ssv))
14573 av_push(param->stashes, dsv);
14574
14575 if (HvSTASH_IS_CLASS(ssv)) {
14576 daux->xhv_class_superclass = hv_dup_inc(saux->xhv_class_superclass, param);
14577 daux->xhv_class_initfields_cv = cv_dup_inc(saux->xhv_class_initfields_cv, param);
14578 daux->xhv_class_adjust_blocks = av_dup_inc(saux->xhv_class_adjust_blocks, param);
14579 daux->xhv_class_fields = padnamelist_dup_inc(saux->xhv_class_fields, param);
14580 daux->xhv_class_next_fieldix = saux->xhv_class_next_fieldix;
14581 daux->xhv_class_param_map = hv_dup_inc(saux->xhv_class_param_map, param);
14582
14583 /* TODO: This does mean that we can't compile more `field` expressions
14584 * in the cloned thread, but surely we're done with compiletime now..?
14585 */
14586 daux->xhv_class_suspended_initfields_compcv = NULL;
14587 }
14588 }
14589
14590 /* duplicate an SV of any type (including AV, HV etc) */
14591
14592 static SV *
14593 S_sv_dup_common(pTHX_ const SV *const ssv, CLONE_PARAMS *const param)
14594 {
14595 SV *dsv;
14596
14597 PERL_ARGS_ASSERT_SV_DUP_COMMON;
14598
14599 if (SvIS_FREED(ssv)) {
14600 #ifdef DEBUG_LEAKING_SCALARS_ABORT
14601 abort();
14602 #endif
14603 return NULL;
14604 }
14605 /* look for it in the table first */
14606 dsv = MUTABLE_SV(ptr_table_fetch(PL_ptr_table, ssv));
14607 if (dsv)
14608 return dsv;
14609
14610 if(param->flags & CLONEf_JOIN_IN) {
14611 /** We are joining here so we don't want do clone
14612 something that is bad **/
14613 if (SvTYPE(ssv) == SVt_PVHV) {
14614 const HEK * const hvname = HvNAME_HEK(ssv);
14615 if (hvname) {
14616 /** don't clone stashes if they already exist **/
14617 dsv = MUTABLE_SV(gv_stashpvn(HEK_KEY(hvname), HEK_LEN(hvname),
14618 HEK_UTF8(hvname) ? SVf_UTF8 : 0));
14619 ptr_table_store(PL_ptr_table, ssv, dsv);
14620 return dsv;
14621 }
14622 }
14623 else if (SvTYPE(ssv) == SVt_PVGV && !SvFAKE(ssv)) {
14624 HV *stash = GvSTASH(ssv);
14625 const HEK * hvname;
14626 if (stash && (hvname = HvNAME_HEK(stash))) {
14627 /** don't clone GVs if they already exist **/
14628 SV **svp;
14629 stash = gv_stashpvn(HEK_KEY(hvname), HEK_LEN(hvname),
14630 HEK_UTF8(hvname) ? SVf_UTF8 : 0);
14631 svp = hv_fetch(
14632 stash, GvNAME(ssv),
14633 GvNAMEUTF8(ssv)
14634 ? -GvNAMELEN(ssv)
14635 : GvNAMELEN(ssv),
14636 0
14637 );
14638 if (svp && *svp && SvTYPE(*svp) == SVt_PVGV) {
14639 ptr_table_store(PL_ptr_table, ssv, *svp);
14640 return *svp;
14641 }
14642 }
14643 }
14644 }
14645
14646 /* create anew and remember what it is */
14647 new_SV(dsv);
14648
14649 #ifdef DEBUG_LEAKING_SCALARS
14650 dsv->sv_debug_optype = ssv->sv_debug_optype;
14651 dsv->sv_debug_line = ssv->sv_debug_line;
14652 dsv->sv_debug_inpad = ssv->sv_debug_inpad;
14653 dsv->sv_debug_parent = (SV*)ssv;
14654 FREE_SV_DEBUG_FILE(dsv);
14655 dsv->sv_debug_file = savesharedpv(ssv->sv_debug_file);
14656 #endif
14657
14658 ptr_table_store(PL_ptr_table, ssv, dsv);
14659
14660 /* clone */
14661 SvFLAGS(dsv) = SvFLAGS(ssv);
14662 SvFLAGS(dsv) &= ~SVf_OOK; /* don't propagate OOK hack */
14663 SvREFCNT(dsv) = 0; /* must be before any other dups! */
14664
14665 #ifdef DEBUGGING
14666 if (SvANY(ssv) && PL_watch_pvx && SvPVX_const(ssv) == PL_watch_pvx)
14667 PerlIO_printf(Perl_debug_log, "watch at %p hit, found string \"%s\"\n",
14668 (void*)PL_watch_pvx, SvPVX_const(ssv));
14669 #endif
14670
14671 /* don't clone objects whose class has asked us not to */
14672 if (SvOBJECT(ssv)
14673 && ! (SvFLAGS(SvSTASH(ssv)) & SVphv_CLONEABLE))
14674 {
14675 SvFLAGS(dsv) = 0;
14676 return dsv;
14677 }
14678
14679 switch (SvTYPE(ssv)) {
14680 case SVt_NULL:
14681 SvANY(dsv) = NULL;
14682 break;
14683 case SVt_IV:
14684 SET_SVANY_FOR_BODYLESS_IV(dsv);
14685 if(SvROK(ssv)) {
14686 Perl_rvpv_dup(aTHX_ dsv, ssv, param);
14687 } else {
14688 SvIV_set(dsv, SvIVX(ssv));
14689 }
14690 break;
14691 case SVt_NV:
14692 #if NVSIZE <= IVSIZE
14693 SET_SVANY_FOR_BODYLESS_NV(dsv);
14694 #else
14695 SvANY(dsv) = new_XNV();
14696 #endif
14697 SvNV_set(dsv, SvNVX(ssv));
14698 break;
14699 default:
14700 {
14701 /* These are all the types that need complex bodies allocating. */
14702 void *new_body;
14703 const svtype sv_type = SvTYPE(ssv);
14704 const struct body_details *sv_type_details
14705 = bodies_by_type + sv_type;
14706
14707 switch (sv_type) {
14708 default:
14709 Perl_croak(param->proto_perl, "Bizarre SvTYPE [%" IVdf "]", (IV)SvTYPE(ssv));
14710 NOT_REACHED; /* NOTREACHED */
14711 break;
14712
14713 case SVt_PVHV:
14714 if (HvHasAUX(ssv)) {
14715 sv_type_details = &fake_hv_with_aux;
14716 #ifdef PURIFY
14717 new_body = new_NOARENA(sv_type_details);
14718 #else
14719 new_body_from_arena(new_body, HVAUX_ARENA_ROOT_IX, fake_hv_with_aux);
14720 #endif
14721 goto have_body;
14722 }
14723 /* FALLTHROUGH */
14724 case SVt_PVOBJ:
14725 case SVt_PVGV:
14726 case SVt_PVIO:
14727 case SVt_PVFM:
14728 case SVt_PVAV:
14729 case SVt_PVCV:
14730 case SVt_PVLV:
14731 case SVt_REGEXP:
14732 case SVt_PVMG:
14733 case SVt_PVNV:
14734 case SVt_PVIV:
14735 case SVt_INVLIST:
14736 case SVt_PV:
14737 assert(sv_type_details->body_size);
14738 #ifndef PURIFY
14739 if (sv_type_details->arena) {
14740 new_body = S_new_body(aTHX_ sv_type);
14741 new_body
14742 = (void*)((char*)new_body - sv_type_details->offset);
14743 } else
14744 #endif
14745 {
14746 new_body = new_NOARENA(sv_type_details);
14747 }
14748 }
14749 have_body:
14750 assert(new_body);
14751 SvANY(dsv) = new_body;
14752
14753 #ifndef PURIFY
14754 Copy(((char*)SvANY(ssv)) + sv_type_details->offset,
14755 ((char*)SvANY(dsv)) + sv_type_details->offset,
14756 sv_type_details->copy, char);
14757 #else
14758 Copy(((char*)SvANY(ssv)),
14759 ((char*)SvANY(dsv)),
14760 sv_type_details->body_size + sv_type_details->offset, char);
14761 #endif
14762
14763 if (sv_type != SVt_PVAV && sv_type != SVt_PVHV && sv_type != SVt_PVOBJ
14764 && !isGV_with_GP(dsv)
14765 && !isREGEXP(dsv)
14766 && !(sv_type == SVt_PVIO && !(IoFLAGS(dsv) & IOf_FAKE_DIRP)))
14767 Perl_rvpv_dup(aTHX_ dsv, ssv, param);
14768
14769 /* The Copy above means that all the source (unduplicated) pointers
14770 are now in the destination. We can check the flags and the
14771 pointers in either, but it's possible that there's less cache
14772 missing by always going for the destination.
14773 FIXME - instrument and check that assumption */
14774 if (sv_type >= SVt_PVMG) {
14775 if (SvMAGIC(dsv))
14776 SvMAGIC_set(dsv, mg_dup(SvMAGIC(dsv), param));
14777 if (SvOBJECT(dsv) && SvSTASH(dsv))
14778 SvSTASH_set(dsv, hv_dup_inc(SvSTASH(dsv), param));
14779 else SvSTASH_set(dsv, 0); /* don't copy DESTROY cache */
14780 }
14781
14782 /* The cast silences a GCC warning about unhandled types. */
14783 switch ((int)sv_type) {
14784 case SVt_PV:
14785 break;
14786 case SVt_PVIV:
14787 break;
14788 case SVt_PVNV:
14789 break;
14790 case SVt_PVMG:
14791 break;
14792 case SVt_REGEXP:
14793 duprex:
14794 /* FIXME for plugins */
14795 re_dup_guts((REGEXP*) ssv, (REGEXP*) dsv, param);
14796 break;
14797 case SVt_PVLV:
14798 /* XXX LvTARGOFF sometimes holds PMOP* when DEBUGGING */
14799 if (LvTYPE(dsv) == 't') /* for tie: unrefcnted fake (SV**) */
14800 LvTARG(dsv) = dsv;
14801 else if (LvTYPE(dsv) == 'T') /* for tie: fake HE */
14802 LvTARG(dsv) = MUTABLE_SV(he_dup((HE*)LvTARG(dsv), FALSE, param));
14803 else
14804 LvTARG(dsv) = sv_dup_inc(LvTARG(dsv), param);
14805 if (isREGEXP(ssv)) goto duprex;
14806 /* FALLTHROUGH */
14807 case SVt_PVGV:
14808 /* non-GP case already handled above */
14809 if(isGV_with_GP(ssv)) {
14810 GvNAME_HEK(dsv) = hek_dup(GvNAME_HEK(dsv), param);
14811 /* Don't call sv_add_backref here as it's going to be
14812 created as part of the magic cloning of the symbol
14813 table--unless this is during a join and the stash
14814 is not actually being cloned. */
14815 /* Danger Will Robinson - GvGP(dsv) isn't initialised
14816 at the point of this comment. */
14817 GvSTASH(dsv) = hv_dup(GvSTASH(dsv), param);
14818 if (param->flags & CLONEf_JOIN_IN)
14819 Perl_sv_add_backref(aTHX_ MUTABLE_SV(GvSTASH(dsv)), dsv);
14820 GvGP_set(dsv, gp_dup(GvGP(ssv), param));
14821 (void)GpREFCNT_inc(GvGP(dsv));
14822 }
14823 break;
14824 case SVt_PVIO:
14825 /* PL_parser->rsfp_filters entries have fake IoDIRP() */
14826 if(IoFLAGS(dsv) & IOf_FAKE_DIRP) {
14827 /* I have no idea why fake dirp (rsfps)
14828 should be treated differently but otherwise
14829 we end up with leaks -- sky*/
14830 IoTOP_GV(dsv) = gv_dup_inc(IoTOP_GV(dsv), param);
14831 IoFMT_GV(dsv) = gv_dup_inc(IoFMT_GV(dsv), param);
14832 IoBOTTOM_GV(dsv) = gv_dup_inc(IoBOTTOM_GV(dsv), param);
14833 } else {
14834 IoTOP_GV(dsv) = gv_dup(IoTOP_GV(dsv), param);
14835 IoFMT_GV(dsv) = gv_dup(IoFMT_GV(dsv), param);
14836 IoBOTTOM_GV(dsv) = gv_dup(IoBOTTOM_GV(dsv), param);
14837 if (IoDIRP(dsv)) {
14838 IoDIRP(dsv) = dirp_dup(IoDIRP(dsv), param);
14839 } else {
14840 NOOP;
14841 /* IoDIRP(dsv) is already a copy of IoDIRP(ssv) */
14842 }
14843 IoIFP(dsv) = fp_dup(IoIFP(ssv), IoTYPE(dsv), param);
14844 }
14845 if (IoOFP(dsv) == IoIFP(ssv))
14846 IoOFP(dsv) = IoIFP(dsv);
14847 else
14848 IoOFP(dsv) = fp_dup(IoOFP(dsv), IoTYPE(dsv), param);
14849 IoTOP_NAME(dsv) = SAVEPV(IoTOP_NAME(dsv));
14850 IoFMT_NAME(dsv) = SAVEPV(IoFMT_NAME(dsv));
14851 IoBOTTOM_NAME(dsv) = SAVEPV(IoBOTTOM_NAME(dsv));
14852 break;
14853 case SVt_PVAV:
14854 /* avoid cloning an empty array */
14855 if (AvARRAY((const AV *)ssv) && AvFILLp((const AV *)ssv) >= 0) {
14856 SV **dst_ary, **src_ary;
14857 SSize_t items = AvFILLp((const AV *)ssv) + 1;
14858
14859 src_ary = AvARRAY((const AV *)ssv);
14860 Newx(dst_ary, AvMAX((const AV *)ssv)+1, SV*);
14861 ptr_table_store(PL_ptr_table, src_ary, dst_ary);
14862 AvARRAY(MUTABLE_AV(dsv)) = dst_ary;
14863 AvALLOC((const AV *)dsv) = dst_ary;
14864 if (AvREAL((const AV *)ssv)) {
14865 dst_ary = sv_dup_inc_multiple(src_ary, dst_ary, items,
14866 param);
14867 }
14868 else {
14869 while (items-- > 0)
14870 *dst_ary++ = sv_dup(*src_ary++, param);
14871 }
14872 items = AvMAX((const AV *)ssv) - AvFILLp((const AV *)ssv);
14873 while (items-- > 0) {
14874 *dst_ary++ = NULL;
14875 }
14876 }
14877 else {
14878 AvARRAY(MUTABLE_AV(dsv)) = NULL;
14879 AvALLOC((const AV *)dsv) = (SV**)NULL;
14880 AvMAX( (const AV *)dsv) = -1;
14881 AvFILLp((const AV *)dsv) = -1;
14882 }
14883 break;
14884 case SVt_PVHV:
14885 if (HvARRAY((const HV *)ssv)) {
14886 STRLEN i = 0;
14887 XPVHV * const dxhv = (XPVHV*)SvANY(dsv);
14888 XPVHV * const sxhv = (XPVHV*)SvANY(ssv);
14889 char *darray;
14890 Newx(darray, PERL_HV_ARRAY_ALLOC_BYTES(dxhv->xhv_max+1),
14891 char);
14892 HvARRAY(dsv) = (HE**)darray;
14893 while (i <= sxhv->xhv_max) {
14894 const HE * const source = HvARRAY(ssv)[i];
14895 HvARRAY(dsv)[i] = source
14896 ? he_dup(source, FALSE, param) : 0;
14897 ++i;
14898 }
14899 if (HvHasAUX(ssv))
14900 sv_dup_hvaux(ssv, dsv, param);
14901 }
14902 else
14903 HvARRAY(MUTABLE_HV(dsv)) = NULL;
14904 break;
14905 case SVt_PVCV:
14906 if (!(param->flags & CLONEf_COPY_STACKS)) {
14907 CvDEPTH(dsv) = 0;
14908 }
14909 /* FALLTHROUGH */
14910 case SVt_PVFM:
14911 /* NOTE: not refcounted */
14912 SvANY(MUTABLE_CV(dsv))->xcv_stash =
14913 hv_dup(CvSTASH(dsv), param);
14914 if ((param->flags & CLONEf_JOIN_IN) && CvSTASH(dsv))
14915 Perl_sv_add_backref(aTHX_ MUTABLE_SV(CvSTASH(dsv)), dsv);
14916 if (!CvISXSUB(dsv)) {
14917 OP_REFCNT_LOCK;
14918 CvROOT(dsv) = OpREFCNT_inc(CvROOT(dsv));
14919 OP_REFCNT_UNLOCK;
14920 CvSLABBED_off(dsv);
14921 } else if (CvCONST(dsv)) {
14922 CvXSUBANY(dsv).any_ptr =
14923 sv_dup_inc((const SV *)CvXSUBANY(dsv).any_ptr, param);
14924 } else if (CvREFCOUNTED_ANYSV(dsv)) {
14925 CvXSUBANY(dsv).any_sv =
14926 sv_dup_inc((const SV *)CvXSUBANY(dsv).any_sv, param);
14927 }
14928 assert(!CvSLABBED(dsv));
14929 if (CvDYNFILE(dsv)) CvFILE(dsv) = SAVEPV(CvFILE(dsv));
14930 if (CvNAMED(dsv))
14931 SvANY((CV *)dsv)->xcv_gv_u.xcv_hek =
14932 hek_dup(CvNAME_HEK((CV *)ssv), param);
14933 /* don't dup if copying back - CvGV isn't refcounted, so the
14934 * duped GV may never be freed. A bit of a hack! DAPM */
14935 else
14936 SvANY(MUTABLE_CV(dsv))->xcv_gv_u.xcv_gv =
14937 CvCVGV_RC(dsv)
14938 ? gv_dup_inc(CvGV(ssv), param)
14939 : (param->flags & CLONEf_JOIN_IN)
14940 ? NULL
14941 : gv_dup(CvGV(ssv), param);
14942
14943 if (!CvISXSUB(ssv)) {
14944 PADLIST * padlist = CvPADLIST(ssv);
14945 if(padlist)
14946 padlist = padlist_dup(padlist, param);
14947 CvPADLIST_set(dsv, padlist);
14948 } else
14949 /* unthreaded perl can't sv_dup so we don't support unthreaded's CvHSCXT */
14950 PoisonPADLIST(dsv);
14951
14952 CvOUTSIDE(dsv) =
14953 CvWEAKOUTSIDE(ssv)
14954 ? cv_dup( CvOUTSIDE(dsv), param)
14955 : cv_dup_inc(CvOUTSIDE(dsv), param);
14956 break;
14957 case SVt_PVOBJ:
14958 {
14959 Size_t fieldcount = ObjectMAXFIELD(ssv) + 1;
14960
14961 Newx(ObjectFIELDS(dsv), fieldcount, SV *);
14962 ObjectMAXFIELD(dsv) = fieldcount - 1;
14963
14964 sv_dup_inc_multiple(ObjectFIELDS(ssv), ObjectFIELDS(dsv), fieldcount, param);
14965 }
14966 break;
14967 }
14968 }
14969 }
14970
14971 return dsv;
14972 }
14973
14974 SV *
14975 Perl_sv_dup_inc(pTHX_ const SV *const ssv, CLONE_PARAMS *const param)
14976 {
14977 PERL_ARGS_ASSERT_SV_DUP_INC;
14978 return ssv ? SvREFCNT_inc(sv_dup_common(ssv, param)) : NULL;
14979 }
14980
14981 SV *
14982 Perl_sv_dup(pTHX_ const SV *const ssv, CLONE_PARAMS *const param)
14983 {
14984 SV *dsv = ssv ? sv_dup_common(ssv, param) : NULL;
14985 PERL_ARGS_ASSERT_SV_DUP;
14986
14987 /* Track every SV that (at least initially) had a reference count of 0.
14988 We need to do this by holding an actual reference to it in this array.
14989 If we attempt to cheat, turn AvREAL_off(), and store only pointers
14990 (akin to the stashes hash, and the perl stack), we come unstuck if
14991 a weak reference (or other SV legitimately SvREFCNT() == 0 for this
14992 thread) is manipulated in a CLONE method, because CLONE runs before the
14993 unreferenced array is walked to find SVs still with SvREFCNT() == 0
14994 (and fix things up by giving each a reference via the temps stack).
14995 Instead, during CLONE, if the 0-referenced SV has SvREFCNT_inc() and
14996 then SvREFCNT_dec(), it will be cleaned up (and added to the free list)
14997 before the walk of unreferenced happens and a reference to that is SV
14998 added to the temps stack. At which point we have the same SV considered
14999 to be in use, and free to be re-used. Not good.
15000 */
15001 if (dsv && !(param->flags & CLONEf_COPY_STACKS) && !SvREFCNT(dsv)) {
15002 assert(param->unreferenced);
15003 av_push(param->unreferenced, SvREFCNT_inc(dsv));
15004 }
15005
15006 return dsv;
15007 }
15008
15009 /* duplicate a context */
15010
15011 PERL_CONTEXT *
15012 Perl_cx_dup(pTHX_ PERL_CONTEXT *cxs, I32 ix, I32 max, CLONE_PARAMS* param)
15013 {
15014 PERL_CONTEXT *ncxs;
15015
15016 PERL_ARGS_ASSERT_CX_DUP;
15017
15018 if (!cxs)
15019 return (PERL_CONTEXT*)NULL;
15020
15021 /* look for it in the table first */
15022 ncxs = (PERL_CONTEXT*)ptr_table_fetch(PL_ptr_table, cxs);
15023 if (ncxs)
15024 return ncxs;
15025
15026 /* create anew and remember what it is */
15027 Newx(ncxs, max + 1, PERL_CONTEXT);
15028 ptr_table_store(PL_ptr_table, cxs, ncxs);
15029 Copy(cxs, ncxs, max + 1, PERL_CONTEXT);
15030
15031 while (ix >= 0) {
15032 PERL_CONTEXT * const ncx = &ncxs[ix];
15033 if (CxTYPE(ncx) == CXt_SUBST) {
15034 Perl_croak(aTHX_ "Cloning substitution context is unimplemented");
15035 }
15036 else {
15037 ncx->blk_oldcop = (COP*)any_dup(ncx->blk_oldcop, param->proto_perl);
15038 switch (CxTYPE(ncx)) {
15039 case CXt_SUB:
15040 ncx->blk_sub.cv = cv_dup_inc(ncx->blk_sub.cv, param);
15041 if(CxHASARGS(ncx)){
15042 ncx->blk_sub.savearray = av_dup_inc(ncx->blk_sub.savearray,param);
15043 } else {
15044 ncx->blk_sub.savearray = NULL;
15045 }
15046 ncx->blk_sub.prevcomppad = (PAD*)ptr_table_fetch(PL_ptr_table,
15047 ncx->blk_sub.prevcomppad);
15048 break;
15049 case CXt_EVAL:
15050 ncx->blk_eval.old_namesv = sv_dup_inc(ncx->blk_eval.old_namesv,
15051 param);
15052 /* XXX should this sv_dup_inc? Or only if CxEVAL_TXT_REFCNTED ???? */
15053 ncx->blk_eval.cur_text = sv_dup(ncx->blk_eval.cur_text, param);
15054 ncx->blk_eval.cv = cv_dup(ncx->blk_eval.cv, param);
15055 /* XXX what to do with cur_top_env ???? */
15056 break;
15057 case CXt_LOOP_LAZYSV:
15058 ncx->blk_loop.state_u.lazysv.end
15059 = sv_dup_inc(ncx->blk_loop.state_u.lazysv.end, param);
15060 /* Fallthrough: duplicate lazysv.cur by using the ary.ary
15061 duplication code instead.
15062 We are taking advantage of (1) av_dup_inc and sv_dup_inc
15063 actually being the same function, and (2) order
15064 equivalence of the two unions.
15065 We can assert the later [but only at run time :-(] */
15066 assert ((void *) &ncx->blk_loop.state_u.ary.ary ==
15067 (void *) &ncx->blk_loop.state_u.lazysv.cur);
15068 /* FALLTHROUGH */
15069 case CXt_LOOP_ARY:
15070 ncx->blk_loop.state_u.ary.ary
15071 = av_dup_inc(ncx->blk_loop.state_u.ary.ary, param);
15072 /* FALLTHROUGH */
15073 case CXt_LOOP_LIST:
15074 case CXt_LOOP_LAZYIV:
15075 /* code common to all 'for' CXt_LOOP_* types */
15076 ncx->blk_loop.itersave =
15077 sv_dup_inc(ncx->blk_loop.itersave, param);
15078 if (CxPADLOOP(ncx)) {
15079 PADOFFSET off = ncx->blk_loop.itervar_u.svp
15080 - &CX_CURPAD_SV(ncx->blk_loop, 0);
15081 ncx->blk_loop.oldcomppad =
15082 (PAD*)ptr_table_fetch(PL_ptr_table,
15083 ncx->blk_loop.oldcomppad);
15084 ncx->blk_loop.itervar_u.svp =
15085 &CX_CURPAD_SV(ncx->blk_loop, off);
15086 }
15087 else {
15088 /* this copies the GV if CXp_FOR_GV, or the SV for an
15089 * alias (for \$x (...)) - relies on gv_dup being the
15090 * same as sv_dup */
15091 ncx->blk_loop.itervar_u.gv
15092 = gv_dup((const GV *)ncx->blk_loop.itervar_u.gv,
15093 param);
15094 }
15095 break;
15096 case CXt_LOOP_PLAIN:
15097 break;
15098 case CXt_FORMAT:
15099 ncx->blk_format.prevcomppad =
15100 (PAD*)ptr_table_fetch(PL_ptr_table,
15101 ncx->blk_format.prevcomppad);
15102 ncx->blk_format.cv = cv_dup_inc(ncx->blk_format.cv, param);
15103 ncx->blk_format.gv = gv_dup(ncx->blk_format.gv, param);
15104 ncx->blk_format.dfoutgv = gv_dup_inc(ncx->blk_format.dfoutgv,
15105 param);
15106 break;
15107 case CXt_GIVEN:
15108 ncx->blk_givwhen.defsv_save =
15109 sv_dup_inc(ncx->blk_givwhen.defsv_save, param);
15110 break;
15111 case CXt_BLOCK:
15112 case CXt_NULL:
15113 case CXt_WHEN:
15114 case CXt_DEFER:
15115 break;
15116 }
15117 }
15118 --ix;
15119 }
15120 return ncxs;
15121 }
15122
15123 /*
15124 =for apidoc si_dup
15125
15126 Duplicate a stack info structure, returning a pointer to the cloned object.
15127
15128 =cut
15129 */
15130
15131 PERL_SI *
15132 Perl_si_dup(pTHX_ PERL_SI *si, CLONE_PARAMS* param)
15133 {
15134 PERL_SI *nsi;
15135
15136 PERL_ARGS_ASSERT_SI_DUP;
15137
15138 if (!si)
15139 return (PERL_SI*)NULL;
15140
15141 /* look for it in the table first */
15142 nsi = (PERL_SI*)ptr_table_fetch(PL_ptr_table, si);
15143 if (nsi)
15144 return nsi;
15145
15146 /* create anew and remember what it is */
15147 Newx(nsi, 1, PERL_SI);
15148 ptr_table_store(PL_ptr_table, si, nsi);
15149
15150 nsi->si_stack = av_dup_inc(si->si_stack, param);
15151 nsi->si_cxix = si->si_cxix;
15152 nsi->si_cxsubix = si->si_cxsubix;
15153 nsi->si_cxmax = si->si_cxmax;
15154 nsi->si_cxstack = cx_dup(si->si_cxstack, si->si_cxix, si->si_cxmax, param);
15155 nsi->si_type = si->si_type;
15156 nsi->si_prev = si_dup(si->si_prev, param);
15157 nsi->si_next = si_dup(si->si_next, param);
15158 nsi->si_markoff = si->si_markoff;
15159 #ifdef PERL_RC_STACK
15160 nsi->si_stack_nonrc_base = si->si_stack_nonrc_base;
15161 #endif
15162 #ifdef PERL_USE_HWM
15163 nsi->si_stack_hwm = 0;
15164 #endif
15165
15166 return nsi;
15167 }
15168
15169 #define POPINT(ss,ix) ((ss)[--(ix)].any_i32)
15170 #define TOPINT(ss,ix) ((ss)[ix].any_i32)
15171 #define POPLONG(ss,ix) ((ss)[--(ix)].any_long)
15172 #define TOPLONG(ss,ix) ((ss)[ix].any_long)
15173 #define POPIV(ss,ix) ((ss)[--(ix)].any_iv)
15174 #define TOPIV(ss,ix) ((ss)[ix].any_iv)
15175 #define POPUV(ss,ix) ((ss)[--(ix)].any_uv)
15176 #define TOPUV(ss,ix) ((ss)[ix].any_uv)
15177 #define POPBOOL(ss,ix) ((ss)[--(ix)].any_bool)
15178 #define TOPBOOL(ss,ix) ((ss)[ix].any_bool)
15179 #define POPPTR(ss,ix) ((ss)[--(ix)].any_ptr)
15180 #define TOPPTR(ss,ix) ((ss)[ix].any_ptr)
15181 #define POPDPTR(ss,ix) ((ss)[--(ix)].any_dptr)
15182 #define TOPDPTR(ss,ix) ((ss)[ix].any_dptr)
15183 #define POPDXPTR(ss,ix) ((ss)[--(ix)].any_dxptr)
15184 #define TOPDXPTR(ss,ix) ((ss)[ix].any_dxptr)
15185
15186 /* XXXXX todo */
15187 #define pv_dup_inc(p) SAVEPV(p)
15188 #define pv_dup(p) SAVEPV(p)
15189 #define svp_dup_inc(p,pp) any_dup(p,pp)
15190
15191 /* map any object to the new equivalent - either something in the
15192 * ptr table, or something in the interpreter structure
15193 */
15194
15195 void *
15196 Perl_any_dup(pTHX_ void *v, const PerlInterpreter *proto_perl)
15197 {
15198 void *ret;
15199
15200 PERL_ARGS_ASSERT_ANY_DUP;
15201
15202 if (!v)
15203 return (void*)NULL;
15204
15205 /* look for it in the table first */
15206 ret = ptr_table_fetch(PL_ptr_table, v);
15207 if (ret)
15208 return ret;
15209
15210 /* see if it is part of the interpreter structure */
15211 if (v >= (void*)proto_perl && v < (void*)(proto_perl+1))
15212 ret = (void*)(((char*)aTHX) + (((char*)v) - (char*)proto_perl));
15213 else {
15214 ret = v;
15215 }
15216
15217 return ret;
15218 }
15219
15220 /*
15221 =for apidoc ss_dup
15222
15223 Duplicate the save stack, returning a pointer to the cloned object.
15224
15225 =cut
15226 */
15227
15228 ANY *
15229 Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
15230 {
15231 ANY * const ss = proto_perl->Isavestack;
15232 const I32 max = proto_perl->Isavestack_max + SS_MAXPUSH;
15233 I32 ix = proto_perl->Isavestack_ix;
15234 ANY *nss;
15235 const SV *sv;
15236 const GV *gv;
15237 const AV *av;
15238 const HV *hv;
15239 char *pv; /* no const deliberately */
15240 void* ptr;
15241 int intval;
15242 long longval;
15243 GP *gp;
15244 IV iv;
15245 I32 i;
15246 char *c = NULL;
15247 void (*dptr) (void*);
15248 void (*dxptr) (pTHX_ void*);
15249
15250 PERL_ARGS_ASSERT_SS_DUP;
15251
15252 Newx(nss, max, ANY);
15253
15254 while (ix > 0) {
15255 const UV uv = POPUV(ss,ix);
15256 const U8 type = (U8)uv & SAVE_MASK;
15257
15258 TOPUV(nss,ix) = uv;
15259 switch (type) {
15260 case SAVEt_CLEARSV:
15261 case SAVEt_CLEARPADRANGE:
15262 break;
15263 case SAVEt_HELEM: /* hash element */
15264 case SAVEt_SV: /* scalar reference */
15265 sv = (const SV *)POPPTR(ss,ix);
15266 TOPPTR(nss,ix) = SvREFCNT_inc(sv_dup_inc(sv, param));
15267 /* FALLTHROUGH */
15268 case SAVEt_ITEM: /* normal string */
15269 case SAVEt_GVSV: /* scalar slot in GV */
15270 sv = (const SV *)POPPTR(ss,ix);
15271 TOPPTR(nss,ix) = sv_dup_inc(sv, param);
15272 if (type == SAVEt_SV)
15273 break;
15274 /* FALLTHROUGH */
15275 case SAVEt_FREESV:
15276 case SAVEt_MORTALIZESV:
15277 case SAVEt_READONLY_OFF:
15278 sv = (const SV *)POPPTR(ss,ix);
15279 TOPPTR(nss,ix) = sv_dup_inc(sv, param);
15280 break;
15281 case SAVEt_FREEPADNAME:
15282 ptr = POPPTR(ss,ix);
15283 TOPPTR(nss,ix) = padname_dup((PADNAME *)ptr, param);
15284 PadnameREFCNT((PADNAME *)TOPPTR(nss,ix))++;
15285 break;
15286 case SAVEt_SHARED_PVREF: /* char* in shared space */
15287 c = (char*)POPPTR(ss,ix);
15288 TOPPTR(nss,ix) = savesharedpv(c);
15289 ptr = POPPTR(ss,ix);
15290 TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
15291 break;
15292 case SAVEt_GENERIC_SVREF: /* generic sv */
15293 case SAVEt_SVREF: /* scalar reference */
15294 sv = (const SV *)POPPTR(ss,ix);
15295 TOPPTR(nss,ix) = sv_dup_inc(sv, param);
15296 if (type == SAVEt_SVREF)
15297 SvREFCNT_inc_simple_void((SV *)TOPPTR(nss,ix));
15298 ptr = POPPTR(ss,ix);
15299 TOPPTR(nss,ix) = svp_dup_inc((SV**)ptr, proto_perl);/* XXXXX */
15300 /* this feels very strange, we have a **SV from one thread,
15301 * we copy the SV, but dont change the **SV. But in this thread
15302 * the target of the **SV could be something from the *other* thread.
15303 * So how can this possibly work correctly? */
15304 break;
15305 case SAVEt_RCPV:
15306 pv = (char *)POPPTR(ss,ix);
15307 TOPPTR(nss,ix) = rcpv_copy(pv);
15308 ptr = POPPTR(ss,ix);
15309 (void)rcpv_copy(*((char **)ptr));
15310 TOPPTR(nss,ix) = ptr;
15311 /* XXXXX: see comment above. */
15312 break;
15313 case SAVEt_GVSLOT: /* any slot in GV */
15314 sv = (const SV *)POPPTR(ss,ix);
15315 TOPPTR(nss,ix) = sv_dup_inc(sv, param);
15316 ptr = POPPTR(ss,ix);
15317 TOPPTR(nss,ix) = svp_dup_inc((SV**)ptr, proto_perl);/* XXXXX */
15318 sv = (const SV *)POPPTR(ss,ix);
15319 TOPPTR(nss,ix) = sv_dup_inc(sv, param);
15320 break;
15321 case SAVEt_HV: /* hash reference */
15322 case SAVEt_AV: /* array reference */
15323 sv = (const SV *) POPPTR(ss,ix);
15324 TOPPTR(nss,ix) = sv_dup_inc(sv, param);
15325 /* FALLTHROUGH */
15326 case SAVEt_COMPPAD:
15327 case SAVEt_NSTAB:
15328 sv = (const SV *) POPPTR(ss,ix);
15329 TOPPTR(nss,ix) = sv_dup(sv, param);
15330 break;
15331 case SAVEt_INT: /* int reference */
15332 ptr = POPPTR(ss,ix);
15333 TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
15334 intval = (int)POPINT(ss,ix);
15335 TOPINT(nss,ix) = intval;
15336 break;
15337 case SAVEt_LONG: /* long reference */
15338 ptr = POPPTR(ss,ix);
15339 TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
15340 longval = (long)POPLONG(ss,ix);
15341 TOPLONG(nss,ix) = longval;
15342 break;
15343 case SAVEt_I32: /* I32 reference */
15344 ptr = POPPTR(ss,ix);
15345 TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
15346 i = POPINT(ss,ix);
15347 TOPINT(nss,ix) = i;
15348 break;
15349 case SAVEt_IV: /* IV reference */
15350 case SAVEt_STRLEN: /* STRLEN/size_t ref */
15351 ptr = POPPTR(ss,ix);
15352 TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
15353 iv = POPIV(ss,ix);
15354 TOPIV(nss,ix) = iv;
15355 break;
15356 case SAVEt_TMPSFLOOR:
15357 iv = POPIV(ss,ix);
15358 TOPIV(nss,ix) = iv;
15359 break;
15360 case SAVEt_HPTR: /* HV* reference */
15361 case SAVEt_APTR: /* AV* reference */
15362 case SAVEt_SPTR: /* SV* reference */
15363 ptr = POPPTR(ss,ix);
15364 TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
15365 sv = (const SV *)POPPTR(ss,ix);
15366 TOPPTR(nss,ix) = sv_dup(sv, param);
15367 break;
15368 case SAVEt_VPTR: /* random* reference */
15369 ptr = POPPTR(ss,ix);
15370 TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
15371 /* FALLTHROUGH */
15372 case SAVEt_STRLEN_SMALL:
15373 case SAVEt_INT_SMALL:
15374 case SAVEt_I32_SMALL:
15375 case SAVEt_I16: /* I16 reference */
15376 case SAVEt_I8: /* I8 reference */
15377 case SAVEt_BOOL:
15378 ptr = POPPTR(ss,ix);
15379 TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
15380 break;
15381 case SAVEt_GENERIC_PVREF: /* generic char* */
15382 case SAVEt_PPTR: /* char* reference */
15383 ptr = POPPTR(ss,ix);
15384 TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
15385 c = (char*)POPPTR(ss,ix);
15386 TOPPTR(nss,ix) = pv_dup(c);
15387 break;
15388 case SAVEt_GP: /* scalar reference */
15389 gp = (GP*)POPPTR(ss,ix);
15390 TOPPTR(nss,ix) = gp = gp_dup(gp, param);
15391 (void)GpREFCNT_inc(gp);
15392 gv = (const GV *)POPPTR(ss,ix);
15393 TOPPTR(nss,ix) = gv_dup_inc(gv, param);
15394 break;
15395 case SAVEt_FREEOP:
15396 ptr = POPPTR(ss,ix);
15397 if (ptr && (((OP*)ptr)->op_private & OPpREFCOUNTED)) {
15398 /* these are assumed to be refcounted properly */
15399 OP *o;
15400 switch (((OP*)ptr)->op_type) {
15401 case OP_LEAVESUB:
15402 case OP_LEAVESUBLV:
15403 case OP_LEAVEEVAL:
15404 case OP_LEAVE:
15405 case OP_SCOPE:
15406 case OP_LEAVEWRITE:
15407 TOPPTR(nss,ix) = ptr;
15408 o = (OP*)ptr;
15409 OP_REFCNT_LOCK;
15410 (void) OpREFCNT_inc(o);
15411 OP_REFCNT_UNLOCK;
15412 break;
15413 default:
15414 TOPPTR(nss,ix) = NULL;
15415 break;
15416 }
15417 }
15418 else
15419 TOPPTR(nss,ix) = NULL;
15420 break;
15421 case SAVEt_FREECOPHH:
15422 ptr = POPPTR(ss,ix);
15423 TOPPTR(nss,ix) = cophh_copy((COPHH *)ptr);
15424 break;
15425 case SAVEt_ADELETE:
15426 av = (const AV *)POPPTR(ss,ix);
15427 TOPPTR(nss,ix) = av_dup_inc(av, param);
15428 i = POPINT(ss,ix);
15429 TOPINT(nss,ix) = i;
15430 break;
15431 case SAVEt_DELETE:
15432 hv = (const HV *)POPPTR(ss,ix);
15433 TOPPTR(nss,ix) = hv_dup_inc(hv, param);
15434 i = POPINT(ss,ix);
15435 TOPINT(nss,ix) = i;
15436 /* FALLTHROUGH */
15437 case SAVEt_FREEPV:
15438 c = (char*)POPPTR(ss,ix);
15439 TOPPTR(nss,ix) = pv_dup_inc(c);
15440 break;
15441 case SAVEt_FREERCPV:
15442 c = (char *)POPPTR(ss,ix);
15443 TOPPTR(nss,ix) = rcpv_copy(c);
15444 break;
15445 case SAVEt_STACK_POS: /* Position on Perl stack */
15446 i = POPINT(ss,ix);
15447 TOPINT(nss,ix) = i;
15448 break;
15449 case SAVEt_DESTRUCTOR:
15450 ptr = POPPTR(ss,ix);
15451 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); /* XXX quite arbitrary */
15452 dptr = POPDPTR(ss,ix);
15453 TOPDPTR(nss,ix) = DPTR2FPTR(void (*)(void*),
15454 any_dup(FPTR2DPTR(void *, dptr),
15455 proto_perl));
15456 break;
15457 case SAVEt_DESTRUCTOR_X:
15458 ptr = POPPTR(ss,ix);
15459 TOPPTR(nss,ix) = any_dup(ptr, proto_perl); /* XXX quite arbitrary */
15460 dxptr = POPDXPTR(ss,ix);
15461 TOPDXPTR(nss,ix) = DPTR2FPTR(void (*)(pTHX_ void*),
15462 any_dup(FPTR2DPTR(void *, dxptr),
15463 proto_perl));
15464 break;
15465 case SAVEt_REGCONTEXT:
15466 case SAVEt_ALLOC:
15467 ix -= uv >> SAVE_TIGHT_SHIFT;
15468 break;
15469 case SAVEt_AELEM: /* array element */
15470 sv = (const SV *)POPPTR(ss,ix);
15471 TOPPTR(nss,ix) = SvREFCNT_inc(sv_dup_inc(sv, param));
15472 iv = POPIV(ss,ix);
15473 TOPIV(nss,ix) = iv;
15474 av = (const AV *)POPPTR(ss,ix);
15475 TOPPTR(nss,ix) = av_dup_inc(av, param);
15476 break;
15477 case SAVEt_OP:
15478 ptr = POPPTR(ss,ix);
15479 TOPPTR(nss,ix) = ptr;
15480 break;
15481 case SAVEt_HINTS_HH:
15482 hv = (const HV *)POPPTR(ss,ix);
15483 TOPPTR(nss,ix) = hv_dup_inc(hv, param);
15484 /* FALLTHROUGH */
15485 case SAVEt_HINTS:
15486 ptr = POPPTR(ss,ix);
15487 ptr = cophh_copy((COPHH*)ptr);
15488 TOPPTR(nss,ix) = ptr;
15489 i = POPINT(ss,ix);
15490 TOPINT(nss,ix) = i;
15491 break;
15492 case SAVEt_PADSV_AND_MORTALIZE:
15493 longval = (long)POPLONG(ss,ix);
15494 TOPLONG(nss,ix) = longval;
15495 ptr = POPPTR(ss,ix);
15496 TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
15497 sv = (const SV *)POPPTR(ss,ix);
15498 TOPPTR(nss,ix) = sv_dup_inc(sv, param);
15499 break;
15500 case SAVEt_SET_SVFLAGS:
15501 i = POPINT(ss,ix);
15502 TOPINT(nss,ix) = i;
15503 i = POPINT(ss,ix);
15504 TOPINT(nss,ix) = i;
15505 sv = (const SV *)POPPTR(ss,ix);
15506 TOPPTR(nss,ix) = sv_dup(sv, param);
15507 break;
15508 case SAVEt_CURCOP_WARNINGS:
15509 /* FALLTHROUGH */
15510 case SAVEt_COMPILE_WARNINGS:
15511 ptr = POPPTR(ss,ix);
15512 TOPPTR(nss,ix) = DUP_WARNINGS((char*)ptr);
15513 break;
15514 case SAVEt_PARSER:
15515 ptr = POPPTR(ss,ix);
15516 TOPPTR(nss,ix) = parser_dup((const yy_parser*)ptr, param);
15517 break;
15518 default:
15519 Perl_croak(aTHX_
15520 "panic: ss_dup inconsistency (%" IVdf ")", (IV) type);
15521 }
15522 }
15523
15524 return nss;
15525 }
15526
15527
15528 /* if sv is a stash, call $class->CLONE_SKIP(), and set the SVphv_CLONEABLE
15529 * flag to the result. This is done for each stash before cloning starts,
15530 * so we know which stashes want their objects cloned */
15531
15532 static void
15533 do_mark_cloneable_stash(pTHX_ SV *const sv)
15534 {
15535 const HEK * const hvname = HvNAME_HEK((const HV *)sv);
15536 if (hvname) {
15537 GV* const cloner = gv_fetchmethod_autoload(MUTABLE_HV(sv), "CLONE_SKIP", 0);
15538 SvFLAGS(sv) |= SVphv_CLONEABLE; /* clone objects by default */
15539 if (cloner && GvCV(cloner)) {
15540 dSP;
15541 UV status;
15542
15543 ENTER;
15544 SAVETMPS;
15545 PUSHMARK(SP);
15546 mXPUSHs(newSVhek(hvname));
15547 PUTBACK;
15548 call_sv(MUTABLE_SV(GvCV(cloner)), G_SCALAR);
15549 SPAGAIN;
15550 status = POPu;
15551 PUTBACK;
15552 FREETMPS;
15553 LEAVE;
15554 if (status)
15555 SvFLAGS(sv) &= ~SVphv_CLONEABLE;
15556 }
15557 }
15558 }
15559
15560
15561
15562 /*
15563 =for apidoc perl_clone
15564
15565 Create and return a new interpreter by cloning the current one.
15566
15567 C<perl_clone> takes these flags as parameters:
15568
15569 C<CLONEf_COPY_STACKS> - is used to, well, copy the stacks also,
15570 without it we only clone the data and zero the stacks,
15571 with it we copy the stacks and the new perl interpreter is
15572 ready to run at the exact same point as the previous one.
15573 The pseudo-fork code uses C<COPY_STACKS> while the
15574 threads->create doesn't.
15575
15576 C<CLONEf_KEEP_PTR_TABLE> -
15577 C<perl_clone> keeps a ptr_table with the pointer of the old
15578 variable as a key and the new variable as a value,
15579 this allows it to check if something has been cloned and not
15580 clone it again, but rather just use the value and increase the
15581 refcount.
15582 If C<KEEP_PTR_TABLE> is not set then C<perl_clone> will kill the ptr_table
15583 using the function S<C<ptr_table_free(PL_ptr_table); PL_ptr_table = NULL;>>.
15584 A reason to keep it around is if you want to dup some of your own
15585 variables which are outside the graph that perl scans.
15586
15587 C<CLONEf_CLONE_HOST> -
15588 This is a win32 thing, it is ignored on unix, it tells perl's
15589 win32host code (which is c++) to clone itself, this is needed on
15590 win32 if you want to run two threads at the same time,
15591 if you just want to do some stuff in a separate perl interpreter
15592 and then throw it away and return to the original one,
15593 you don't need to do anything.
15594
15595 =cut
15596 */
15597
15598 /* XXX the above needs expanding by someone who actually understands it ! */
15599 EXTERN_C PerlInterpreter *
15600 perl_clone_host(PerlInterpreter* proto_perl, UV flags);
15601
15602 PerlInterpreter *
15603 perl_clone(PerlInterpreter *proto_perl, UV flags)
15604 {
15605 #ifdef PERL_IMPLICIT_SYS
15606
15607 PERL_ARGS_ASSERT_PERL_CLONE;
15608
15609 /* perlhost.h so we need to call into it
15610 to clone the host, CPerlHost should have a c interface, sky */
15611
15612 #ifndef __amigaos4__
15613 if (flags & CLONEf_CLONE_HOST) {
15614 return perl_clone_host(proto_perl,flags);
15615 }
15616 #endif
15617 return perl_clone_using(proto_perl, flags,
15618 proto_perl->IMem,
15619 proto_perl->IMemShared,
15620 proto_perl->IMemParse,
15621 proto_perl->IEnv,
15622 proto_perl->IStdIO,
15623 proto_perl->ILIO,
15624 proto_perl->IDir,
15625 proto_perl->ISock,
15626 proto_perl->IProc);
15627 }
15628
15629 PerlInterpreter *
15630 perl_clone_using(PerlInterpreter *proto_perl, UV flags,
15631 struct IPerlMem* ipM, struct IPerlMem* ipMS,
15632 struct IPerlMem* ipMP, struct IPerlEnv* ipE,
15633 struct IPerlStdIO* ipStd, struct IPerlLIO* ipLIO,
15634 struct IPerlDir* ipD, struct IPerlSock* ipS,
15635 struct IPerlProc* ipP)
15636 {
15637 /* XXX many of the string copies here can be optimized if they're
15638 * constants; they need to be allocated as common memory and just
15639 * their pointers copied. */
15640
15641 IV i;
15642 CLONE_PARAMS clone_params;
15643 CLONE_PARAMS* const param = &clone_params;
15644
15645 PerlInterpreter * const my_perl = (PerlInterpreter*)(*ipM->pMalloc)(ipM, sizeof(PerlInterpreter));
15646
15647 PERL_ARGS_ASSERT_PERL_CLONE_USING;
15648 #else /* !PERL_IMPLICIT_SYS */
15649 IV i;
15650 CLONE_PARAMS clone_params;
15651 CLONE_PARAMS* param = &clone_params;
15652 PerlInterpreter * const my_perl = (PerlInterpreter*)PerlMem_malloc(sizeof(PerlInterpreter));
15653
15654 PERL_ARGS_ASSERT_PERL_CLONE;
15655 #endif /* PERL_IMPLICIT_SYS */
15656
15657 /* for each stash, determine whether its objects should be cloned */
15658 S_visit(proto_perl, do_mark_cloneable_stash, SVt_PVHV, SVTYPEMASK);
15659 my_perl->Iphase = PERL_PHASE_CONSTRUCT;
15660 PERL_SET_THX(my_perl);
15661
15662 #ifdef DEBUGGING
15663 PoisonNew(my_perl, 1, PerlInterpreter);
15664 PL_op = NULL;
15665 PL_curcop = NULL;
15666 PL_defstash = NULL; /* may be used by perl malloc() */
15667 PL_markstack = 0;
15668 PL_scopestack = 0;
15669 PL_scopestack_name = 0;
15670 PL_savestack = 0;
15671 PL_savestack_ix = 0;
15672 PL_savestack_max = -1;
15673 PL_sig_pending = 0;
15674 PL_parser = NULL;
15675 PL_eval_begin_nest_depth = proto_perl->Ieval_begin_nest_depth;
15676 Zero(&PL_debug_pad, 1, struct perl_debug_pad);
15677 Zero(&PL_padname_undef, 1, PADNAME);
15678 Zero(&PL_padname_const, 1, PADNAME);
15679 # ifdef DEBUG_LEAKING_SCALARS
15680 PL_sv_serial = (((UV)my_perl >> 2) & 0xfff) * 1000000;
15681 # endif
15682 # ifdef PERL_TRACE_OPS
15683 Zero(PL_op_exec_cnt, OP_max+2, UV);
15684 # endif
15685 #else /* !DEBUGGING */
15686 Zero(my_perl, 1, PerlInterpreter);
15687 #endif /* DEBUGGING */
15688
15689 #ifdef PERL_IMPLICIT_SYS
15690 /* host pointers */
15691 PL_Mem = ipM;
15692 PL_MemShared = ipMS;
15693 PL_MemParse = ipMP;
15694 PL_Env = ipE;
15695 PL_StdIO = ipStd;
15696 PL_LIO = ipLIO;
15697 PL_Dir = ipD;
15698 PL_Sock = ipS;
15699 PL_Proc = ipP;
15700 #endif /* PERL_IMPLICIT_SYS */
15701
15702
15703 param->flags = flags;
15704 /* Nothing in the core code uses this, but we make it available to
15705 extensions (using mg_dup). */
15706 param->proto_perl = proto_perl;
15707 /* Likely nothing will use this, but it is initialised to be consistent
15708 with Perl_clone_params_new(). */
15709 param->new_perl = my_perl;
15710 param->unreferenced = NULL;
15711
15712
15713 INIT_TRACK_MEMPOOL(my_perl->Imemory_debug_header, my_perl);
15714
15715 PL_body_arenas = NULL;
15716 Zero(&PL_body_roots, 1, PL_body_roots);
15717
15718 PL_sv_count = 0;
15719 PL_sv_root = NULL;
15720 PL_sv_arenaroot = NULL;
15721
15722 PL_debug = proto_perl->Idebug;
15723
15724 /* dbargs array probably holds garbage */
15725 PL_dbargs = NULL;
15726
15727 PL_compiling = proto_perl->Icompiling;
15728
15729 /* pseudo environmental stuff */
15730 PL_origargc = proto_perl->Iorigargc;
15731 PL_origargv = proto_perl->Iorigargv;
15732
15733 #ifndef NO_TAINT_SUPPORT
15734 /* Set tainting stuff before PerlIO_debug can possibly get called */
15735 PL_tainting = proto_perl->Itainting;
15736 PL_taint_warn = proto_perl->Itaint_warn;
15737 #else
15738 PL_tainting = FALSE;
15739 PL_taint_warn = FALSE;
15740 #endif
15741
15742 PL_minus_c = proto_perl->Iminus_c;
15743
15744 PL_localpatches = proto_perl->Ilocalpatches;
15745 PL_splitstr = SAVEPV(proto_perl->Isplitstr);
15746 PL_minus_n = proto_perl->Iminus_n;
15747 PL_minus_p = proto_perl->Iminus_p;
15748 PL_minus_l = proto_perl->Iminus_l;
15749 PL_minus_a = proto_perl->Iminus_a;
15750 PL_minus_E = proto_perl->Iminus_E;
15751 PL_minus_F = proto_perl->Iminus_F;
15752 PL_doswitches = proto_perl->Idoswitches;
15753 PL_dowarn = proto_perl->Idowarn;
15754 #ifdef PERL_SAWAMPERSAND
15755 PL_sawampersand = proto_perl->Isawampersand;
15756 #endif
15757 PL_unsafe = proto_perl->Iunsafe;
15758 PL_perldb = proto_perl->Iperldb;
15759 PL_perl_destruct_level = proto_perl->Iperl_destruct_level;
15760 PL_exit_flags = proto_perl->Iexit_flags;
15761
15762 /* XXX time(&PL_basetime) when asked for? */
15763 PL_basetime = proto_perl->Ibasetime;
15764
15765 PL_maxsysfd = proto_perl->Imaxsysfd;
15766 PL_statusvalue = proto_perl->Istatusvalue;
15767 #ifdef __VMS
15768 PL_statusvalue_vms = proto_perl->Istatusvalue_vms;
15769 #else
15770 PL_statusvalue_posix = proto_perl->Istatusvalue_posix;
15771 #endif
15772
15773 /* RE engine related */
15774 PL_regmatch_slab = NULL;
15775 PL_reg_curpm = NULL;
15776
15777 PL_sub_generation = proto_perl->Isub_generation;
15778
15779 /* funky return mechanisms */
15780 PL_forkprocess = proto_perl->Iforkprocess;
15781
15782 /* internal state */
15783 PL_main_start = proto_perl->Imain_start;
15784 PL_eval_root = proto_perl->Ieval_root;
15785 PL_eval_start = proto_perl->Ieval_start;
15786
15787 PL_filemode = proto_perl->Ifilemode;
15788 PL_lastfd = proto_perl->Ilastfd;
15789 PL_oldname = proto_perl->Ioldname; /* XXX not quite right */
15790 PL_gensym = proto_perl->Igensym;
15791
15792 PL_laststatval = proto_perl->Ilaststatval;
15793 PL_laststype = proto_perl->Ilaststype;
15794 PL_mess_sv = NULL;
15795
15796 PL_profiledata = NULL;
15797
15798 PL_generation = proto_perl->Igeneration;
15799
15800 PL_in_clean_objs = proto_perl->Iin_clean_objs;
15801 PL_in_clean_all = proto_perl->Iin_clean_all;
15802
15803 PL_delaymagic_uid = proto_perl->Idelaymagic_uid;
15804 PL_delaymagic_euid = proto_perl->Idelaymagic_euid;
15805 PL_delaymagic_gid = proto_perl->Idelaymagic_gid;
15806 PL_delaymagic_egid = proto_perl->Idelaymagic_egid;
15807 PL_nomemok = proto_perl->Inomemok;
15808 PL_an = proto_perl->Ian;
15809 PL_evalseq = proto_perl->Ievalseq;
15810 PL_origalen = proto_perl->Iorigalen;
15811
15812 PL_sighandlerp = proto_perl->Isighandlerp;
15813 PL_sighandler1p = proto_perl->Isighandler1p;
15814 PL_sighandler3p = proto_perl->Isighandler3p;
15815
15816 PL_runops = proto_perl->Irunops;
15817
15818 PL_subline = proto_perl->Isubline;
15819
15820 PL_cv_has_eval = proto_perl->Icv_has_eval;
15821 /* Unicode features (see perlrun/-C) */
15822 PL_unicode = proto_perl->Iunicode;
15823
15824 /* Pre-5.8 signals control */
15825 PL_signals = proto_perl->Isignals;
15826
15827 /* times() ticks per second */
15828 PL_clocktick = proto_perl->Iclocktick;
15829
15830 /* Recursion stopper for PerlIO_find_layer */
15831 PL_in_load_module = proto_perl->Iin_load_module;
15832
15833 /* Not really needed/useful since the reenrant_retint is "volatile",
15834 * but do it for consistency's sake. */
15835 PL_reentrant_retint = proto_perl->Ireentrant_retint;
15836
15837 /* Hooks to shared SVs and locks. */
15838 PL_sharehook = proto_perl->Isharehook;
15839 PL_lockhook = proto_perl->Ilockhook;
15840 PL_unlockhook = proto_perl->Iunlockhook;
15841 PL_threadhook = proto_perl->Ithreadhook;
15842 PL_destroyhook = proto_perl->Idestroyhook;
15843 PL_signalhook = proto_perl->Isignalhook;
15844
15845 PL_globhook = proto_perl->Iglobhook;
15846
15847 PL_srand_called = proto_perl->Isrand_called;
15848 Copy(&(proto_perl->Irandom_state), &PL_random_state, 1, PL_RANDOM_STATE_TYPE);
15849 PL_srand_override = proto_perl->Isrand_override;
15850 PL_srand_override_next = proto_perl->Isrand_override_next;
15851
15852 if (flags & CLONEf_COPY_STACKS) {
15853 /* next allocation will be PL_tmps_stack[PL_tmps_ix+1] */
15854 PL_tmps_ix = proto_perl->Itmps_ix;
15855 PL_tmps_max = proto_perl->Itmps_max;
15856 PL_tmps_floor = proto_perl->Itmps_floor;
15857
15858 /* next push_scope()/ENTER sets PL_scopestack[PL_scopestack_ix]
15859 * NOTE: unlike the others! */
15860 PL_scopestack_ix = proto_perl->Iscopestack_ix;
15861 PL_scopestack_max = proto_perl->Iscopestack_max;
15862
15863 /* next SSPUSHFOO() sets PL_savestack[PL_savestack_ix]
15864 * NOTE: unlike the others! */
15865 PL_savestack_ix = proto_perl->Isavestack_ix;
15866 PL_savestack_max = proto_perl->Isavestack_max;
15867 }
15868
15869 PL_start_env = proto_perl->Istart_env; /* XXXXXX */
15870 PL_top_env = &PL_start_env;
15871
15872 PL_op = proto_perl->Iop;
15873
15874 PL_Sv = NULL;
15875 PL_Xpv = (XPV*)NULL;
15876 my_perl->Ina = proto_perl->Ina;
15877
15878 PL_statcache = proto_perl->Istatcache;
15879
15880 #ifndef NO_TAINT_SUPPORT
15881 PL_tainted = proto_perl->Itainted;
15882 #else
15883 PL_tainted = FALSE;
15884 #endif
15885 PL_curpm = proto_perl->Icurpm; /* XXX No PMOP ref count */
15886
15887 PL_chopset = proto_perl->Ichopset; /* XXX never deallocated */
15888
15889 PL_restartjmpenv = proto_perl->Irestartjmpenv;
15890 PL_restartop = proto_perl->Irestartop;
15891 PL_in_eval = proto_perl->Iin_eval;
15892 PL_delaymagic = proto_perl->Idelaymagic;
15893 PL_phase = proto_perl->Iphase;
15894 PL_localizing = proto_perl->Ilocalizing;
15895
15896 PL_hv_fetch_ent_mh = NULL;
15897 PL_modcount = proto_perl->Imodcount;
15898 PL_lastgotoprobe = NULL;
15899 PL_dumpindent = proto_perl->Idumpindent;
15900
15901 PL_efloatbuf = NULL; /* reinits on demand */
15902 PL_efloatsize = 0; /* reinits on demand */
15903
15904 /* regex stuff */
15905
15906 PL_colorset = 0; /* reinits PL_colors[] */
15907 /*PL_colors[6] = {0,0,0,0,0,0};*/
15908
15909 /* Pluggable optimizer */
15910 PL_peepp = proto_perl->Ipeepp;
15911 PL_rpeepp = proto_perl->Irpeepp;
15912 /* op_free() hook */
15913 PL_opfreehook = proto_perl->Iopfreehook;
15914
15915 # ifdef PERL_MEM_LOG
15916 Zero(PL_mem_log, sizeof(PL_mem_log), char);
15917 # endif
15918
15919 #ifdef USE_REENTRANT_API
15920 /* XXX: things like -Dm will segfault here in perlio, but doing
15921 * PERL_SET_CONTEXT(proto_perl);
15922 * breaks too many other things
15923 */
15924 Perl_reentrant_init(aTHX);
15925 #endif
15926
15927 /* create SV map for pointer relocation */
15928 PL_ptr_table = ptr_table_new();
15929
15930 /* initialize these special pointers as early as possible */
15931 init_constants();
15932 ptr_table_store(PL_ptr_table, &proto_perl->Isv_undef, &PL_sv_undef);
15933 ptr_table_store(PL_ptr_table, &proto_perl->Isv_no, &PL_sv_no);
15934 ptr_table_store(PL_ptr_table, &proto_perl->Isv_zero, &PL_sv_zero);
15935 ptr_table_store(PL_ptr_table, &proto_perl->Isv_yes, &PL_sv_yes);
15936 ptr_table_store(PL_ptr_table, &proto_perl->Ipadname_const,
15937 &PL_padname_const);
15938
15939 /* create (a non-shared!) shared string table */
15940 PL_strtab = newHV();
15941 HvSHAREKEYS_off(PL_strtab);
15942 hv_ksplit(PL_strtab, HvTOTALKEYS(proto_perl->Istrtab));
15943 ptr_table_store(PL_ptr_table, proto_perl->Istrtab, PL_strtab);
15944
15945 Zero(PL_sv_consts, SV_CONSTS_COUNT, SV*);
15946
15947 PL_compiling.cop_file = rcpv_copy(proto_perl->Icompiling.cop_file);
15948
15949 ptr_table_store(PL_ptr_table, &proto_perl->Icompiling, &PL_compiling);
15950 PL_compiling.cop_warnings = DUP_WARNINGS(PL_compiling.cop_warnings);
15951 CopHINTHASH_set(&PL_compiling, cophh_copy(CopHINTHASH_get(&PL_compiling)));
15952 PL_curcop = (COP*)any_dup(proto_perl->Icurcop, proto_perl);
15953
15954 param->stashes = newAV(); /* Setup array of objects to call clone on */
15955 /* This makes no difference to the implementation, as it always pushes
15956 and shifts pointers to other SVs without changing their reference
15957 count, with the array becoming empty before it is freed. However, it
15958 makes it conceptually clear what is going on, and will avoid some
15959 work inside av.c, filling slots between AvFILL() and AvMAX() with
15960 &PL_sv_undef, and SvREFCNT_dec()ing those. */
15961 AvREAL_off(param->stashes);
15962
15963 if (!(flags & CLONEf_COPY_STACKS)) {
15964 param->unreferenced = newAV();
15965 }
15966
15967 #ifdef PERLIO_LAYERS
15968 /* Clone PerlIO tables as soon as we can handle general xx_dup() */
15969 PerlIO_clone(aTHX_ proto_perl, param);
15970 #endif
15971
15972 PL_envgv = gv_dup_inc(proto_perl->Ienvgv, param);
15973 PL_incgv = gv_dup_inc(proto_perl->Iincgv, param);
15974 PL_hintgv = gv_dup_inc(proto_perl->Ihintgv, param);
15975 PL_origfilename = SAVEPV(proto_perl->Iorigfilename);
15976 PL_xsubfilename = proto_perl->Ixsubfilename;
15977 PL_diehook = sv_dup_inc(proto_perl->Idiehook, param);
15978 PL_warnhook = sv_dup_inc(proto_perl->Iwarnhook, param);
15979
15980 PL_hook__require__before = sv_dup_inc(proto_perl->Ihook__require__before, param);
15981 PL_hook__require__after = sv_dup_inc(proto_perl->Ihook__require__after, param);
15982
15983 /* switches */
15984 PL_patchlevel = sv_dup_inc(proto_perl->Ipatchlevel, param);
15985 PL_inplace = SAVEPV(proto_perl->Iinplace);
15986 PL_e_script = sv_dup_inc(proto_perl->Ie_script, param);
15987
15988 /* magical thingies */
15989
15990 SvPVCLEAR(PERL_DEBUG_PAD(0)); /* For regex debugging. */
15991 SvPVCLEAR(PERL_DEBUG_PAD(1)); /* ext/re needs these */
15992 SvPVCLEAR(PERL_DEBUG_PAD(2)); /* even without DEBUGGING. */
15993
15994
15995 /* Clone the regex array */
15996 /* ORANGE FIXME for plugins, probably in the SV dup code.
15997 newSViv(PTR2IV(CALLREGDUPE(
15998 INT2PTR(REGEXP *, SvIVX(regex)), param))))
15999 */
16000 PL_regex_padav = av_dup_inc(proto_perl->Iregex_padav, param);
16001 PL_regex_pad = AvARRAY(PL_regex_padav);
16002
16003 PL_stashpadmax = proto_perl->Istashpadmax;
16004 PL_stashpadix = proto_perl->Istashpadix ;
16005 Newx(PL_stashpad, PL_stashpadmax, HV *);
16006 {
16007 PADOFFSET o = 0;
16008 for (; o < PL_stashpadmax; ++o)
16009 PL_stashpad[o] = hv_dup(proto_perl->Istashpad[o], param);
16010 }
16011
16012 /* shortcuts to various I/O objects */
16013 PL_ofsgv = gv_dup_inc(proto_perl->Iofsgv, param);
16014 PL_stdingv = gv_dup(proto_perl->Istdingv, param);
16015 PL_stderrgv = gv_dup(proto_perl->Istderrgv, param);
16016 PL_defgv = gv_dup(proto_perl->Idefgv, param);
16017 PL_argvgv = gv_dup_inc(proto_perl->Iargvgv, param);
16018 PL_argvoutgv = gv_dup(proto_perl->Iargvoutgv, param);
16019 PL_argvout_stack = av_dup_inc(proto_perl->Iargvout_stack, param);
16020
16021 /* shortcuts to regexp stuff */
16022 PL_replgv = gv_dup_inc(proto_perl->Ireplgv, param);
16023
16024 /* shortcuts to misc objects */
16025 PL_errgv = gv_dup(proto_perl->Ierrgv, param);
16026
16027 /* shortcuts to debugging objects */
16028 PL_DBgv = gv_dup_inc(proto_perl->IDBgv, param);
16029 PL_DBline = gv_dup_inc(proto_perl->IDBline, param);
16030 PL_DBsub = gv_dup_inc(proto_perl->IDBsub, param);
16031 PL_DBsingle = sv_dup(proto_perl->IDBsingle, param);
16032 PL_DBtrace = sv_dup(proto_perl->IDBtrace, param);
16033 PL_DBsignal = sv_dup(proto_perl->IDBsignal, param);
16034 Copy(proto_perl->IDBcontrol, PL_DBcontrol, DBVARMG_COUNT, IV);
16035
16036 /* symbol tables */
16037 PL_defstash = hv_dup_inc(proto_perl->Idefstash, param);
16038 PL_curstash = hv_dup_inc(proto_perl->Icurstash, param);
16039 PL_debstash = hv_dup(proto_perl->Idebstash, param);
16040 PL_globalstash = hv_dup(proto_perl->Iglobalstash, param);
16041 PL_curstname = sv_dup_inc(proto_perl->Icurstname, param);
16042
16043 PL_beginav = av_dup_inc(proto_perl->Ibeginav, param);
16044 PL_beginav_save = av_dup_inc(proto_perl->Ibeginav_save, param);
16045 PL_checkav_save = av_dup_inc(proto_perl->Icheckav_save, param);
16046 PL_unitcheckav = av_dup_inc(proto_perl->Iunitcheckav, param);
16047 PL_unitcheckav_save = av_dup_inc(proto_perl->Iunitcheckav_save, param);
16048 PL_endav = av_dup_inc(proto_perl->Iendav, param);
16049 PL_checkav = av_dup_inc(proto_perl->Icheckav, param);
16050 PL_initav = av_dup_inc(proto_perl->Iinitav, param);
16051 PL_savebegin = proto_perl->Isavebegin;
16052
16053 PL_isarev = hv_dup_inc(proto_perl->Iisarev, param);
16054
16055 /* subprocess state */
16056 PL_fdpid = av_dup_inc(proto_perl->Ifdpid, param);
16057
16058 if (proto_perl->Iop_mask)
16059 PL_op_mask = SAVEPVN(proto_perl->Iop_mask, PL_maxo);
16060 else
16061 PL_op_mask = NULL;
16062 /* PL_asserting = proto_perl->Iasserting; */
16063
16064 /* current interpreter roots */
16065 PL_main_cv = cv_dup_inc(proto_perl->Imain_cv, param);
16066 OP_REFCNT_LOCK;
16067 PL_main_root = OpREFCNT_inc(proto_perl->Imain_root);
16068 OP_REFCNT_UNLOCK;
16069
16070 /* runtime control stuff */
16071 PL_curcopdb = (COP*)any_dup(proto_perl->Icurcopdb, proto_perl);
16072
16073 PL_preambleav = av_dup_inc(proto_perl->Ipreambleav, param);
16074
16075 PL_ors_sv = sv_dup_inc(proto_perl->Iors_sv, param);
16076
16077 /* interpreter atexit processing */
16078 PL_exitlistlen = proto_perl->Iexitlistlen;
16079 if (PL_exitlistlen) {
16080 Newx(PL_exitlist, PL_exitlistlen, PerlExitListEntry);
16081 Copy(proto_perl->Iexitlist, PL_exitlist, PL_exitlistlen, PerlExitListEntry);
16082 }
16083 else
16084 PL_exitlist = (PerlExitListEntry*)NULL;
16085
16086 PL_my_cxt_size = proto_perl->Imy_cxt_size;
16087 if (PL_my_cxt_size) {
16088 Newx(PL_my_cxt_list, PL_my_cxt_size, void *);
16089 Copy(proto_perl->Imy_cxt_list, PL_my_cxt_list, PL_my_cxt_size, void *);
16090 }
16091 else {
16092 PL_my_cxt_list = (void**)NULL;
16093 }
16094 PL_modglobal = hv_dup_inc(proto_perl->Imodglobal, param);
16095 PL_custom_op_names = hv_dup_inc(proto_perl->Icustom_op_names,param);
16096 PL_custom_op_descs = hv_dup_inc(proto_perl->Icustom_op_descs,param);
16097 PL_custom_ops = hv_dup_inc(proto_perl->Icustom_ops, param);
16098
16099 PL_compcv = cv_dup(proto_perl->Icompcv, param);
16100
16101 PAD_CLONE_VARS(proto_perl, param);
16102
16103 #ifdef HAVE_INTERP_INTERN
16104 sys_intern_dup(&proto_perl->Isys_intern, &PL_sys_intern);
16105 #endif
16106
16107 PL_DBcv = cv_dup(proto_perl->IDBcv, param);
16108
16109 #ifdef PERL_USES_PL_PIDSTATUS
16110 PL_pidstatus = newHV(); /* XXX flag for cloning? */
16111 #endif
16112 PL_osname = SAVEPV(proto_perl->Iosname);
16113 PL_parser = parser_dup(proto_perl->Iparser, param);
16114
16115 /* XXX this only works if the saved cop has already been cloned */
16116 if (proto_perl->Iparser) {
16117 PL_parser->saved_curcop = (COP*)any_dup(
16118 proto_perl->Iparser->saved_curcop,
16119 proto_perl);
16120 }
16121
16122 PL_subname = sv_dup_inc(proto_perl->Isubname, param);
16123
16124 #ifdef USE_PL_CURLOCALES
16125 for (i = 0; i < (int) C_ARRAY_LENGTH(PL_curlocales); i++) {
16126 PL_curlocales[i] = SAVEPV("C");
16127 }
16128 #endif
16129 #ifdef USE_PL_CUR_LC_ALL
16130 PL_cur_LC_ALL = SAVEPV("C");
16131 #endif
16132 #ifdef USE_LOCALE_CTYPE
16133 Copy(PL_fold, PL_fold_locale, 256, U8);
16134
16135 /* Should we warn if uses locale? */
16136 PL_ctype_name = SAVEPV("C");
16137 PL_warn_locale = sv_dup_inc(proto_perl->Iwarn_locale, param);
16138 PL_in_utf8_CTYPE_locale = false;
16139 PL_in_utf8_turkic_locale = false;
16140 #endif
16141
16142 /* Did the locale setup indicate UTF-8? */
16143 PL_utf8locale = false;
16144
16145 #ifdef USE_LOCALE_COLLATE
16146 PL_in_utf8_COLLATE_locale = false;
16147 PL_collation_name = SAVEPV("C");
16148 PL_collation_ix = proto_perl->Icollation_ix;
16149 PL_collation_standard = true;
16150 PL_collxfrm_base = 0;
16151 PL_collxfrm_mult = 0;
16152 PL_strxfrm_max_cp = 0;
16153 PL_strxfrm_is_behaved = proto_perl->Istrxfrm_is_behaved;
16154 PL_strxfrm_NUL_replacement = '\0';
16155 #endif /* USE_LOCALE_COLLATE */
16156
16157 #ifdef USE_LOCALE_THREADS
16158 assert(PL_locale_mutex_depth <= 0);
16159 PL_locale_mutex_depth = 0;
16160 #endif
16161
16162 #ifdef USE_LOCALE_NUMERIC
16163 PL_numeric_name = SAVEPV("C");
16164 PL_numeric_radix_sv = newSVpvs(".");
16165 PL_underlying_radix_sv = newSVpvs(".");
16166 PL_numeric_standard = true;
16167 PL_numeric_underlying = true;
16168 PL_numeric_underlying_is_standard = true;
16169
16170 #endif /* !USE_LOCALE_NUMERIC */
16171 #if defined(USE_POSIX_2008_LOCALE)
16172 PL_scratch_locale_obj = NULL;
16173 PL_cur_locale_obj = PL_C_locale_obj;
16174 #endif
16175
16176 #ifdef HAS_MBRLEN
16177 PL_mbrlen_ps = proto_perl->Imbrlen_ps;
16178 #endif
16179 #ifdef HAS_MBRTOWC
16180 PL_mbrtowc_ps = proto_perl->Imbrtowc_ps;
16181 #endif
16182 #ifdef HAS_WCRTOMB
16183 PL_wcrtomb_ps = proto_perl->Iwcrtomb_ps;
16184 #endif
16185
16186 PL_langinfo_sv = newSVpvs("");
16187 PL_scratch_langinfo = newSVpvs("");
16188
16189 PL_setlocale_buf = NULL;
16190 PL_setlocale_bufsize = 0;
16191
16192 #if defined(USE_LOCALE_THREADS) && ! defined(USE_THREAD_SAFE_LOCALE)
16193 PL_less_dicey_locale_buf = NULL;
16194 PL_less_dicey_locale_bufsize = 0;
16195 #endif
16196
16197 /* Unicode inversion lists */
16198
16199 PL_AboveLatin1 = sv_dup_inc(proto_perl->IAboveLatin1, param);
16200 PL_Assigned_invlist = sv_dup_inc(proto_perl->IAssigned_invlist, param);
16201 PL_GCB_invlist = sv_dup_inc(proto_perl->IGCB_invlist, param);
16202 PL_HasMultiCharFold = sv_dup_inc(proto_perl->IHasMultiCharFold, param);
16203 PL_InMultiCharFold = sv_dup_inc(proto_perl->IInMultiCharFold, param);
16204 PL_Latin1 = sv_dup_inc(proto_perl->ILatin1, param);
16205 PL_LB_invlist = sv_dup_inc(proto_perl->ILB_invlist, param);
16206 PL_SB_invlist = sv_dup_inc(proto_perl->ISB_invlist, param);
16207 PL_SCX_invlist = sv_dup_inc(proto_perl->ISCX_invlist, param);
16208 PL_UpperLatin1 = sv_dup_inc(proto_perl->IUpperLatin1, param);
16209 PL_in_some_fold = sv_dup_inc(proto_perl->Iin_some_fold, param);
16210 PL_utf8_foldclosures = sv_dup_inc(proto_perl->Iutf8_foldclosures, param);
16211 PL_utf8_idcont = sv_dup_inc(proto_perl->Iutf8_idcont, param);
16212 PL_utf8_idstart = sv_dup_inc(proto_perl->Iutf8_idstart, param);
16213 PL_utf8_perl_idcont = sv_dup_inc(proto_perl->Iutf8_perl_idcont, param);
16214 PL_utf8_perl_idstart = sv_dup_inc(proto_perl->Iutf8_perl_idstart, param);
16215 PL_utf8_xidcont = sv_dup_inc(proto_perl->Iutf8_xidcont, param);
16216 PL_utf8_xidstart = sv_dup_inc(proto_perl->Iutf8_xidstart, param);
16217 PL_WB_invlist = sv_dup_inc(proto_perl->IWB_invlist, param);
16218 for (i = 0; i < POSIX_CC_COUNT; i++) {
16219 PL_XPosix_ptrs[i] = sv_dup_inc(proto_perl->IXPosix_ptrs[i], param);
16220 if (i != CC_CASED_ && i != CC_VERTSPACE_) {
16221 PL_Posix_ptrs[i] = sv_dup_inc(proto_perl->IPosix_ptrs[i], param);
16222 }
16223 }
16224 PL_Posix_ptrs[CC_CASED_] = PL_Posix_ptrs[CC_ALPHA_];
16225 PL_Posix_ptrs[CC_VERTSPACE_] = NULL;
16226
16227 PL_utf8_toupper = sv_dup_inc(proto_perl->Iutf8_toupper, param);
16228 PL_utf8_totitle = sv_dup_inc(proto_perl->Iutf8_totitle, param);
16229 PL_utf8_tolower = sv_dup_inc(proto_perl->Iutf8_tolower, param);
16230 PL_utf8_tofold = sv_dup_inc(proto_perl->Iutf8_tofold, param);
16231 PL_utf8_tosimplefold = sv_dup_inc(proto_perl->Iutf8_tosimplefold, param);
16232 PL_utf8_charname_begin = sv_dup_inc(proto_perl->Iutf8_charname_begin, param);
16233 PL_utf8_charname_continue = sv_dup_inc(proto_perl->Iutf8_charname_continue, param);
16234 PL_utf8_mark = sv_dup_inc(proto_perl->Iutf8_mark, param);
16235 PL_InBitmap = sv_dup_inc(proto_perl->IInBitmap, param);
16236 PL_CCC_non0_non230 = sv_dup_inc(proto_perl->ICCC_non0_non230, param);
16237 PL_Private_Use = sv_dup_inc(proto_perl->IPrivate_Use, param);
16238
16239 #if 0
16240 PL_seen_deprecated_macro = hv_dup_inc(proto_perl->Iseen_deprecated_macro, param);
16241 #endif
16242
16243 if (proto_perl->Ipsig_pend) {
16244 Newxz(PL_psig_pend, SIG_SIZE, int);
16245 }
16246 else {
16247 PL_psig_pend = (int*)NULL;
16248 }
16249
16250 if (proto_perl->Ipsig_name) {
16251 Newx(PL_psig_name, 2 * SIG_SIZE, SV*);
16252 sv_dup_inc_multiple(proto_perl->Ipsig_name, PL_psig_name, 2 * SIG_SIZE,
16253 param);
16254 PL_psig_ptr = PL_psig_name + SIG_SIZE;
16255 }
16256 else {
16257 PL_psig_ptr = (SV**)NULL;
16258 PL_psig_name = (SV**)NULL;
16259 }
16260
16261 if (flags & CLONEf_COPY_STACKS) {
16262 Newx(PL_tmps_stack, PL_tmps_max, SV*);
16263 sv_dup_inc_multiple(proto_perl->Itmps_stack, PL_tmps_stack,
16264 PL_tmps_ix+1, param);
16265
16266 /* next PUSHMARK() sets *(PL_markstack_ptr+1) */
16267 i = proto_perl->Imarkstack_max - proto_perl->Imarkstack;
16268 Newx(PL_markstack, i, Stack_off_t);
16269 PL_markstack_max = PL_markstack + (proto_perl->Imarkstack_max
16270 - proto_perl->Imarkstack);
16271 PL_markstack_ptr = PL_markstack + (proto_perl->Imarkstack_ptr
16272 - proto_perl->Imarkstack);
16273 Copy(proto_perl->Imarkstack, PL_markstack,
16274 PL_markstack_ptr - PL_markstack + 1, Stack_off_t);
16275
16276 /* next push_scope()/ENTER sets PL_scopestack[PL_scopestack_ix]
16277 * NOTE: unlike the others! */
16278 Newx(PL_scopestack, PL_scopestack_max, I32);
16279 Copy(proto_perl->Iscopestack, PL_scopestack, PL_scopestack_ix, I32);
16280
16281 #ifdef DEBUGGING
16282 Newx(PL_scopestack_name, PL_scopestack_max, const char *);
16283 Copy(proto_perl->Iscopestack_name, PL_scopestack_name, PL_scopestack_ix, const char *);
16284 #endif
16285 /* reset stack AV to correct length before its duped via
16286 * PL_curstackinfo */
16287 AvFILLp(proto_perl->Icurstack) =
16288 proto_perl->Istack_sp - proto_perl->Istack_base;
16289
16290 /* NOTE: si_dup() looks at PL_markstack */
16291 PL_curstackinfo = si_dup(proto_perl->Icurstackinfo, param);
16292
16293 /* PL_curstack = PL_curstackinfo->si_stack; */
16294 PL_curstack = av_dup(proto_perl->Icurstack, param);
16295 PL_mainstack = av_dup(proto_perl->Imainstack, param);
16296
16297 /* next PUSHs() etc. set *(PL_stack_sp+1) */
16298 PL_stack_base = AvARRAY(PL_curstack);
16299 PL_stack_sp = PL_stack_base + (proto_perl->Istack_sp
16300 - proto_perl->Istack_base);
16301 PL_stack_max = PL_stack_base + AvMAX(PL_curstack);
16302
16303 /*Newxz(PL_savestack, PL_savestack_max, ANY);*/
16304 PL_savestack = ss_dup(proto_perl, param);
16305 }
16306 else {
16307 init_stacks();
16308 ENTER; /* perl_destruct() wants to LEAVE; */
16309 }
16310
16311 PL_statgv = gv_dup(proto_perl->Istatgv, param);
16312 PL_statname = sv_dup_inc(proto_perl->Istatname, param);
16313
16314 PL_rs = sv_dup_inc(proto_perl->Irs, param);
16315 PL_last_in_gv = gv_dup(proto_perl->Ilast_in_gv, param);
16316 PL_defoutgv = gv_dup_inc(proto_perl->Idefoutgv, param);
16317 PL_toptarget = sv_dup_inc(proto_perl->Itoptarget, param);
16318 PL_bodytarget = sv_dup_inc(proto_perl->Ibodytarget, param);
16319 PL_formtarget = sv_dup(proto_perl->Iformtarget, param);
16320
16321 PL_errors = sv_dup_inc(proto_perl->Ierrors, param);
16322
16323 PL_sortcop = (OP*)any_dup(proto_perl->Isortcop, proto_perl);
16324 PL_firstgv = gv_dup_inc(proto_perl->Ifirstgv, param);
16325 PL_secondgv = gv_dup_inc(proto_perl->Isecondgv, param);
16326
16327 PL_stashcache = newHV();
16328
16329 PL_watchaddr = (char **) ptr_table_fetch(PL_ptr_table,
16330 proto_perl->Iwatchaddr);
16331 PL_watchok = PL_watchaddr ? * PL_watchaddr : NULL;
16332 if (PL_debug && PL_watchaddr) {
16333 PerlIO_printf(Perl_debug_log,
16334 "WATCHING: %" UVxf " cloned as %" UVxf " with value %" UVxf "\n",
16335 PTR2UV(proto_perl->Iwatchaddr), PTR2UV(PL_watchaddr),
16336 PTR2UV(PL_watchok));
16337 }
16338
16339 PL_registered_mros = hv_dup_inc(proto_perl->Iregistered_mros, param);
16340 PL_blockhooks = av_dup_inc(proto_perl->Iblockhooks, param);
16341
16342 /* Call the ->CLONE method, if it exists, for each of the stashes
16343 identified by sv_dup() above.
16344 */
16345 while(av_count(param->stashes) != 0) {
16346 HV* const stash = MUTABLE_HV(av_shift(param->stashes));
16347 GV* const cloner = gv_fetchmethod_autoload(stash, "CLONE", 0);
16348 if (cloner && GvCV(cloner)) {
16349 ENTER;
16350 SAVETMPS;
16351 PUSHMARK(PL_stack_sp);
16352 rpp_extend(1);
16353 SV *newsv = newSVhek(HvNAME_HEK(stash));
16354 *++PL_stack_sp = newsv;
16355 if (!rpp_stack_is_rc())
16356 sv_2mortal(newsv);
16357 call_sv(MUTABLE_SV(GvCV(cloner)), G_DISCARD);
16358 FREETMPS;
16359 LEAVE;
16360 }
16361 }
16362
16363 if (!(flags & CLONEf_KEEP_PTR_TABLE)) {
16364 ptr_table_free(PL_ptr_table);
16365 PL_ptr_table = NULL;
16366 }
16367
16368 if (!(flags & CLONEf_COPY_STACKS)) {
16369 unreferenced_to_tmp_stack(param->unreferenced);
16370 }
16371
16372 SvREFCNT_dec(param->stashes);
16373
16374 /* orphaned? eg threads->new inside BEGIN or use */
16375 if (PL_compcv && ! SvREFCNT(PL_compcv)) {
16376 SvREFCNT_inc_simple_void(PL_compcv);
16377 SAVEFREESV(PL_compcv);
16378 }
16379
16380 return my_perl;
16381 }
16382
16383 static void
16384 S_unreferenced_to_tmp_stack(pTHX_ AV *const unreferenced)
16385 {
16386 PERL_ARGS_ASSERT_UNREFERENCED_TO_TMP_STACK;
16387
16388 if (AvFILLp(unreferenced) > -1) {
16389 SV **svp = AvARRAY(unreferenced);
16390 SV **const last = svp + AvFILLp(unreferenced);
16391 SSize_t count = 0;
16392
16393 do {
16394 if (SvREFCNT(*svp) == 1)
16395 ++count;
16396 } while (++svp <= last);
16397
16398 EXTEND_MORTAL(count);
16399 svp = AvARRAY(unreferenced);
16400
16401 do {
16402 if (SvREFCNT(*svp) == 1) {
16403 /* Our reference is the only one to this SV. This means that
16404 in this thread, the scalar effectively has a 0 reference.
16405 That doesn't work (cleanup never happens), so donate our
16406 reference to it onto the save stack. */
16407 PL_tmps_stack[++PL_tmps_ix] = *svp;
16408 } else {
16409 /* As an optimisation, because we are already walking the
16410 entire array, instead of above doing either
16411 SvREFCNT_inc(*svp) or *svp = &PL_sv_undef, we can instead
16412 release our reference to the scalar, so that at the end of
16413 the array owns zero references to the scalars it happens to
16414 point to. We are effectively converting the array from
16415 AvREAL() on to AvREAL() off. This saves the av_clear()
16416 (triggered by the SvREFCNT_dec(unreferenced) below) from
16417 walking the array a second time. */
16418 SvREFCNT_dec(*svp);
16419 }
16420
16421 } while (++svp <= last);
16422 AvREAL_off(unreferenced);
16423 }
16424 SvREFCNT_dec_NN(unreferenced);
16425 }
16426
16427 void
16428 Perl_clone_params_del(CLONE_PARAMS *param)
16429 {
16430 PerlInterpreter *const was = PERL_GET_THX;
16431 PerlInterpreter *const to = param->new_perl;
16432 dTHXa(to);
16433
16434 PERL_ARGS_ASSERT_CLONE_PARAMS_DEL;
16435
16436 if (was != to) {
16437 PERL_SET_THX(to);
16438 }
16439
16440 SvREFCNT_dec(param->stashes);
16441 if (param->unreferenced)
16442 unreferenced_to_tmp_stack(param->unreferenced);
16443
16444 Safefree(param);
16445
16446 if (was != to) {
16447 PERL_SET_THX(was);
16448 }
16449 }
16450
16451 CLONE_PARAMS *
16452 Perl_clone_params_new(PerlInterpreter *const from, PerlInterpreter *const to)
16453 {
16454 /* Need to play this game, as newAV() can call safesysmalloc(), and that
16455 does a dTHX; to get the context from thread local storage.
16456 FIXME - under PERL_CORE Newx(), Safefree() and friends should expand to
16457 a version that passes in my_perl. */
16458 PerlInterpreter *const was = PERL_GET_THX;
16459 CLONE_PARAMS *param;
16460
16461 PERL_ARGS_ASSERT_CLONE_PARAMS_NEW;
16462
16463 if (was != to) {
16464 PERL_SET_THX(to);
16465 }
16466
16467 /* Given that we've set the context, we can do this unshared. */
16468 Newx(param, 1, CLONE_PARAMS);
16469
16470 param->flags = 0;
16471 param->proto_perl = from;
16472 param->new_perl = to;
16473 param->stashes = (AV *)Perl_newSV_type(to, SVt_PVAV);
16474 AvREAL_off(param->stashes);
16475 param->unreferenced = (AV *)Perl_newSV_type(to, SVt_PVAV);
16476
16477 if (was != to) {
16478 PERL_SET_THX(was);
16479 }
16480 return param;
16481 }
16482
16483 #endif /* USE_ITHREADS */
16484
16485 void
16486 Perl_init_constants(pTHX)
16487 {
16488
16489 SvREFCNT(&PL_sv_undef) = SvREFCNT_IMMORTAL;
16490 SvFLAGS(&PL_sv_undef) = SVf_READONLY|SVf_PROTECT|SVt_NULL;
16491 SvANY(&PL_sv_undef) = NULL;
16492
16493 SvANY(&PL_sv_no) = new_XPVNV();
16494 SvREFCNT(&PL_sv_no) = SvREFCNT_IMMORTAL;
16495 SvFLAGS(&PL_sv_no) = SVt_PVNV|SVf_READONLY|SVf_PROTECT
16496 |SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK
16497 |SVp_POK|SVf_POK|SVf_IsCOW|SVppv_STATIC;
16498
16499 SvANY(&PL_sv_yes) = new_XPVNV();
16500 SvREFCNT(&PL_sv_yes) = SvREFCNT_IMMORTAL;
16501 SvFLAGS(&PL_sv_yes) = SVt_PVNV|SVf_READONLY|SVf_PROTECT
16502 |SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK
16503 |SVp_POK|SVf_POK|SVf_IsCOW|SVppv_STATIC;
16504
16505 SvANY(&PL_sv_zero) = new_XPVNV();
16506 SvREFCNT(&PL_sv_zero) = SvREFCNT_IMMORTAL;
16507 SvFLAGS(&PL_sv_zero) = SVt_PVNV|SVf_READONLY|SVf_PROTECT
16508 |SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK
16509 |SVp_POK|SVf_POK
16510 |SVs_PADTMP;
16511
16512 SvPV_set(&PL_sv_no, (char*)PL_No);
16513 SvCUR_set(&PL_sv_no, 0);
16514 SvLEN_set(&PL_sv_no, 0);
16515 SvIV_set(&PL_sv_no, 0);
16516 SvNV_set(&PL_sv_no, 0);
16517
16518 SvPV_set(&PL_sv_yes, (char*)PL_Yes);
16519 SvCUR_set(&PL_sv_yes, 1);
16520 SvLEN_set(&PL_sv_yes, 0);
16521 SvIV_set(&PL_sv_yes, 1);
16522 SvNV_set(&PL_sv_yes, 1);
16523
16524 SvPV_set(&PL_sv_zero, (char*)PL_Zero);
16525 SvCUR_set(&PL_sv_zero, 1);
16526 SvLEN_set(&PL_sv_zero, 0);
16527 SvIV_set(&PL_sv_zero, 0);
16528 SvNV_set(&PL_sv_zero, 0);
16529
16530 PadnamePV(&PL_padname_const) = (char *)PL_No;
16531
16532 assert(SvIMMORTAL_INTERP(&PL_sv_yes));
16533 assert(SvIMMORTAL_INTERP(&PL_sv_undef));
16534 assert(SvIMMORTAL_INTERP(&PL_sv_no));
16535 assert(SvIMMORTAL_INTERP(&PL_sv_zero));
16536
16537 assert(SvIMMORTAL(&PL_sv_yes));
16538 assert(SvIMMORTAL(&PL_sv_undef));
16539 assert(SvIMMORTAL(&PL_sv_no));
16540 assert(SvIMMORTAL(&PL_sv_zero));
16541
16542 assert( SvIMMORTAL_TRUE(&PL_sv_yes));
16543 assert(!SvIMMORTAL_TRUE(&PL_sv_undef));
16544 assert(!SvIMMORTAL_TRUE(&PL_sv_no));
16545 assert(!SvIMMORTAL_TRUE(&PL_sv_zero));
16546
16547 assert( SvTRUE_nomg_NN(&PL_sv_yes));
16548 assert(!SvTRUE_nomg_NN(&PL_sv_undef));
16549 assert(!SvTRUE_nomg_NN(&PL_sv_no));
16550 assert(!SvTRUE_nomg_NN(&PL_sv_zero));
16551 }
16552
16553 /*
16554 =for apidoc_section $unicode
16555
16556 =for apidoc sv_recode_to_utf8
16557
16558 C<encoding> is assumed to be an C<Encode> object, on entry the PV
16559 of C<sv> is assumed to be octets in that encoding, and C<sv>
16560 will be converted into Unicode (and UTF-8).
16561
16562 If C<sv> already is UTF-8 (or if it is not C<POK>), or if C<encoding>
16563 is not a reference, nothing is done to C<sv>. If C<encoding> is not
16564 an C<Encode::XS> Encoding object, bad things will happen.
16565 (See L<encoding> and L<Encode>.)
16566
16567 The PV of C<sv> is returned.
16568
16569 =cut */
16570
16571 char *
16572 Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding)
16573 {
16574 PERL_ARGS_ASSERT_SV_RECODE_TO_UTF8;
16575
16576 if (SvPOK(sv) && !SvUTF8(sv) && !IN_BYTES && SvROK(encoding)) {
16577 SV *uni;
16578 STRLEN len;
16579 const char *s;
16580 dSP;
16581 SV *nsv = sv;
16582 ENTER;
16583 PUSHSTACK;
16584 SAVETMPS;
16585 if (SvPADTMP(nsv)) {
16586 nsv = sv_newmortal();
16587 SvSetSV_nosteal(nsv, sv);
16588 }
16589 save_re_context();
16590 PUSHMARK(sp);
16591 EXTEND(SP, 3);
16592 PUSHs(encoding);
16593 PUSHs(nsv);
16594 /*
16595 NI-S 2002/07/09
16596 Passing sv_yes is wrong - it needs to be or'ed set of constants
16597 for Encode::XS, while UTf-8 decode (currently) assumes a true value means
16598 remove converted chars from source.
16599
16600 Both will default the value - let them.
16601
16602 XPUSHs(&PL_sv_yes);
16603 */
16604 PUTBACK;
16605 call_method("decode", G_SCALAR);
16606 SPAGAIN;
16607 uni = POPs;
16608 PUTBACK;
16609 s = SvPV_const(uni, len);
16610 if (s != SvPVX_const(sv)) {
16611 SvGROW(sv, len + 1);
16612 Move(s, SvPVX(sv), len + 1, char);
16613 SvCUR_set(sv, len);
16614 }
16615 FREETMPS;
16616 POPSTACK;
16617 LEAVE;
16618 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
16619 /* clear pos and any utf8 cache */
16620 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
16621 if (mg)
16622 mg->mg_len = -1;
16623 if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
16624 magic_setutf8(sv,mg); /* clear UTF8 cache */
16625 }
16626 SvUTF8_on(sv);
16627 return SvPVX(sv);
16628 }
16629 return SvPOKp(sv) ? SvPVX(sv) : NULL;
16630 }
16631
16632 /*
16633 =for apidoc sv_cat_decode
16634
16635 C<encoding> is assumed to be an C<Encode> object, the PV of C<ssv> is
16636 assumed to be octets in that encoding and decoding the input starts
16637 from the position which S<C<(PV + *offset)>> pointed to. C<dsv> will be
16638 concatenated with the decoded UTF-8 string from C<ssv>. Decoding will terminate
16639 when the string C<tstr> appears in decoding output or the input ends on
16640 the PV of C<ssv>. The value which C<offset> points will be modified
16641 to the last input position on C<ssv>.
16642
16643 Returns TRUE if the terminator was found, else returns FALSE.
16644
16645 =cut */
16646
16647 bool
16648 Perl_sv_cat_decode(pTHX_ SV *dsv, SV *encoding,
16649 SV *ssv, int *offset, char *tstr, int tlen)
16650 {
16651 bool ret = FALSE;
16652
16653 PERL_ARGS_ASSERT_SV_CAT_DECODE;
16654
16655 if (SvPOK(ssv) && SvPOK(dsv) && SvROK(encoding)) {
16656 SV *offsv;
16657 dSP;
16658 ENTER;
16659 SAVETMPS;
16660 save_re_context();
16661 PUSHMARK(sp);
16662 EXTEND(SP, 6);
16663 PUSHs(encoding);
16664 PUSHs(dsv);
16665 PUSHs(ssv);
16666 offsv = newSViv(*offset);
16667 mPUSHs(offsv);
16668 mPUSHp(tstr, tlen);
16669 PUTBACK;
16670 call_method("cat_decode", G_SCALAR);
16671 SPAGAIN;
16672 ret = SvTRUE(TOPs);
16673 *offset = SvIV(offsv);
16674 PUTBACK;
16675 FREETMPS;
16676 LEAVE;
16677 }
16678 else
16679 Perl_croak(aTHX_ "Invalid argument to sv_cat_decode");
16680 return ret;
16681
16682 }
16683
16684 /* ---------------------------------------------------------------------
16685 *
16686 * support functions for report_uninit()
16687 */
16688
16689 /* the maxiumum size of array or hash where we will scan looking
16690 * for the undefined element that triggered the warning */
16691
16692 #define FUV_MAX_SEARCH_SIZE 1000
16693
16694 /* Look for an entry in the hash whose value has the same SV as val;
16695 * If so, return a mortal copy of the key. */
16696
16697 STATIC SV*
16698 S_find_hash_subscript(pTHX_ const HV *const hv, const SV *const val)
16699 {
16700 HE **array;
16701 I32 i;
16702
16703 PERL_ARGS_ASSERT_FIND_HASH_SUBSCRIPT;
16704
16705 if (!hv || SvMAGICAL(hv) || !HvTOTALKEYS(hv) ||
16706 (HvTOTALKEYS(hv) > FUV_MAX_SEARCH_SIZE))
16707 return NULL;
16708
16709 if (val == &PL_sv_undef || val == &PL_sv_placeholder)
16710 return NULL;
16711
16712 array = HvARRAY(hv);
16713
16714 for (i=HvMAX(hv); i>=0; i--) {
16715 HE *entry;
16716 for (entry = array[i]; entry; entry = HeNEXT(entry)) {
16717 if (HeVAL(entry) == val)
16718 return newSVhek_mortal(HeKEY_hek(entry));
16719 }
16720 }
16721 return NULL;
16722 }
16723
16724 /* Look for an entry in the array whose value has the same SV as val;
16725 * If so, return the index, otherwise return -1. */
16726
16727 STATIC SSize_t
16728 S_find_array_subscript(pTHX_ const AV *const av, const SV *const val)
16729 {
16730 PERL_ARGS_ASSERT_FIND_ARRAY_SUBSCRIPT;
16731
16732 if (!av || SvMAGICAL(av) || !AvARRAY(av) ||
16733 (AvFILLp(av) > FUV_MAX_SEARCH_SIZE))
16734 return -1;
16735
16736 if (val != &PL_sv_undef) {
16737 SV ** const svp = AvARRAY(av);
16738 SSize_t i;
16739
16740 for (i=AvFILLp(av); i>=0; i--)
16741 if (svp[i] == val)
16742 return i;
16743 }
16744 return -1;
16745 }
16746
16747 /* varname(): return the name of a variable, optionally with a subscript.
16748 * If gv is non-zero, use the name of that global, along with gvtype (one
16749 * of "$", "@", "%"); otherwise use the name of the lexical at pad offset
16750 * targ. Depending on the value of the subscript_type flag, return:
16751 */
16752
16753 #define FUV_SUBSCRIPT_NONE 1 /* "@foo" */
16754 #define FUV_SUBSCRIPT_ARRAY 2 /* "$foo[aindex]" */
16755 #define FUV_SUBSCRIPT_HASH 3 /* "$foo{keyname}" */
16756 #define FUV_SUBSCRIPT_WITHIN 4 /* "within @foo" */
16757
16758 SV*
16759 Perl_varname(pTHX_ const GV *const gv, const char gvtype, PADOFFSET targ,
16760 const SV *const keyname, SSize_t aindex, int subscript_type)
16761 {
16762
16763 SV * const name = sv_newmortal();
16764 if (gv && isGV(gv)) {
16765 char buffer[2];
16766 buffer[0] = gvtype;
16767 buffer[1] = 0;
16768
16769 /* as gv_fullname4(), but add literal '^' for $^FOO names */
16770
16771 gv_fullname4(name, gv, buffer, 0);
16772
16773 if ((unsigned int)SvPVX(name)[1] <= 26) {
16774 buffer[0] = '^';
16775 buffer[1] = SvPVX(name)[1] + 'A' - 1;
16776
16777 /* Swap the 1 unprintable control character for the 2 byte pretty
16778 version - ie substr($name, 1, 1) = $buffer; */
16779 sv_insert(name, 1, 1, buffer, 2);
16780 }
16781 }
16782 else {
16783 CV * const cv = gv ? ((CV *)gv) : find_runcv(NULL);
16784 PADNAME *sv;
16785
16786 assert(!cv || SvTYPE(cv) == SVt_PVCV || SvTYPE(cv) == SVt_PVFM);
16787
16788 if (!cv || !CvPADLIST(cv))
16789 return NULL;
16790 sv = padnamelist_fetch(PadlistNAMES(CvPADLIST(cv)), targ);
16791 sv_setpvn(name, PadnamePV(sv), PadnameLEN(sv));
16792 SvUTF8_on(name);
16793 }
16794
16795 if (subscript_type == FUV_SUBSCRIPT_HASH) {
16796 SV * const sv = newSV_type(SVt_NULL);
16797 STRLEN len;
16798 const char * const pv = SvPV_nomg_const((SV*)keyname, len);
16799
16800 *SvPVX(name) = '$';
16801 Perl_sv_catpvf(aTHX_ name, "{%s}",
16802 pv_pretty(sv, pv, len, 32, NULL, NULL,
16803 PERL_PV_PRETTY_DUMP | PERL_PV_ESCAPE_UNI_DETECT ));
16804 SvREFCNT_dec_NN(sv);
16805 }
16806 else if (subscript_type == FUV_SUBSCRIPT_ARRAY) {
16807 *SvPVX(name) = '$';
16808 Perl_sv_catpvf(aTHX_ name, "[%" IVdf "]", (IV)aindex);
16809 }
16810 else if (subscript_type == FUV_SUBSCRIPT_WITHIN) {
16811 /* We know that name has no magic, so can use 0 instead of SV_GMAGIC */
16812 Perl_sv_insert_flags(aTHX_ name, 0, 0, STR_WITH_LEN("within "), 0);
16813 }
16814 else {
16815 assert(subscript_type == FUV_SUBSCRIPT_NONE);
16816 }
16817
16818 return name;
16819 }
16820
16821
16822 /*
16823 =apidoc_section $warning
16824 =for apidoc find_uninit_var
16825
16826 Find the name of the undefined variable (if any) that caused the operator
16827 to issue a "Use of uninitialized value" warning.
16828 If match is true, only return a name if its value matches C<uninit_sv>.
16829 So roughly speaking, if a unary operator (such as C<OP_COS>) generates a
16830 warning, then following the direct child of the op may yield an
16831 C<OP_PADSV> or C<OP_GV> that gives the name of the undefined variable. On the
16832 other hand, with C<OP_ADD> there are two branches to follow, so we only print
16833 the variable name if we get an exact match.
16834 C<desc_p> points to a string pointer holding the description of the op.
16835 This may be updated if needed.
16836
16837 The name is returned as a mortal SV.
16838
16839 Assumes that C<PL_op> is the OP that originally triggered the error, and that
16840 C<PL_comppad>/C<PL_curpad> points to the currently executing pad.
16841
16842 =cut
16843 */
16844
16845 STATIC SV *
16846 S_find_uninit_var(pTHX_ const OP *const obase, const SV *const uninit_sv,
16847 bool match, const char **desc_p)
16848 {
16849 SV *sv;
16850 const GV *gv;
16851 const OP *o, *o2, *kid;
16852
16853 PERL_ARGS_ASSERT_FIND_UNINIT_VAR;
16854
16855 if (!obase || (match && (!uninit_sv || uninit_sv == &PL_sv_undef ||
16856 uninit_sv == &PL_sv_placeholder)))
16857 return NULL;
16858
16859 switch (obase->op_type) {
16860
16861 case OP_UNDEF:
16862 /* the optimizer rewrites '$x = undef' to 'undef $x' for lexical
16863 * variables, which can occur as the source of warnings:
16864 * ($x = undef) =~ s/a/b/;
16865 * The OPpUNDEF_KEEP_PV flag indicates that this used to be an
16866 * assignment op.
16867 * Otherwise undef should not care if its args are undef - any warnings
16868 * will be from tied/magic vars */
16869 if (
16870 (obase->op_private & (OPpTARGET_MY | OPpUNDEF_KEEP_PV)) == (OPpTARGET_MY | OPpUNDEF_KEEP_PV)
16871 && (!match || PAD_SVl(obase->op_targ) == uninit_sv)
16872 ) {
16873 return varname(NULL, '$', obase->op_targ, NULL, 0, FUV_SUBSCRIPT_NONE);
16874 }
16875 break;
16876
16877 case OP_RV2AV:
16878 case OP_RV2HV:
16879 case OP_PADAV:
16880 case OP_PADHV:
16881 {
16882 const bool pad = ( obase->op_type == OP_PADAV
16883 || obase->op_type == OP_PADHV
16884 || obase->op_type == OP_PADRANGE
16885 );
16886
16887 const bool hash = ( obase->op_type == OP_PADHV
16888 || obase->op_type == OP_RV2HV
16889 || (obase->op_type == OP_PADRANGE
16890 && SvTYPE(PAD_SVl(obase->op_targ)) == SVt_PVHV)
16891 );
16892 SSize_t index = 0;
16893 SV *keysv = NULL;
16894 int subscript_type = FUV_SUBSCRIPT_WITHIN;
16895
16896 if (pad) { /* @lex, %lex */
16897 sv = PAD_SVl(obase->op_targ);
16898 gv = NULL;
16899 }
16900 else {
16901 if (cUNOPx(obase)->op_first->op_type == OP_GV) {
16902 /* @global, %global */
16903 gv = cGVOPx_gv(cUNOPx(obase)->op_first);
16904 if (!gv)
16905 break;
16906 sv = hash ? MUTABLE_SV(GvHV(gv)): MUTABLE_SV(GvAV(gv));
16907 }
16908 else if (obase == PL_op) /* @{expr}, %{expr} */
16909 return find_uninit_var(cUNOPx(obase)->op_first,
16910 uninit_sv, match, desc_p);
16911 else /* @{expr}, %{expr} as a sub-expression */
16912 return NULL;
16913 }
16914
16915 /* attempt to find a match within the aggregate */
16916 if (hash) {
16917 keysv = find_hash_subscript((const HV*)sv, uninit_sv);
16918 if (keysv)
16919 subscript_type = FUV_SUBSCRIPT_HASH;
16920 }
16921 else {
16922 index = find_array_subscript((const AV *)sv, uninit_sv);
16923 if (index >= 0)
16924 subscript_type = FUV_SUBSCRIPT_ARRAY;
16925 }
16926
16927 if (match && subscript_type == FUV_SUBSCRIPT_WITHIN)
16928 break;
16929
16930 return varname(gv, (char)(hash ? '%' : '@'), obase->op_targ,
16931 keysv, index, subscript_type);
16932 }
16933
16934 case OP_RV2SV:
16935 if (cUNOPx(obase)->op_first->op_type == OP_GV) {
16936 /* $global */
16937 gv = cGVOPx_gv(cUNOPx(obase)->op_first);
16938 if (!gv || !GvSTASH(gv))
16939 break;
16940 if (match && (GvSV(gv) != uninit_sv))
16941 break;
16942 return varname(gv, '$', 0, NULL, 0, FUV_SUBSCRIPT_NONE);
16943 }
16944 /* ${expr} */
16945 return find_uninit_var(cUNOPx(obase)->op_first, uninit_sv, 1, desc_p);
16946
16947 case OP_PADSV:
16948 if (match && PAD_SVl(obase->op_targ) != uninit_sv)
16949 break;
16950 return varname(NULL, '$', obase->op_targ,
16951 NULL, 0, FUV_SUBSCRIPT_NONE);
16952
16953 case OP_PADSV_STORE:
16954 if (match && PAD_SVl(obase->op_targ) != uninit_sv)
16955 goto do_op;
16956 return varname(NULL, '$', obase->op_targ,
16957 NULL, 0, FUV_SUBSCRIPT_NONE);
16958
16959 case OP_GVSV:
16960 gv = cGVOPx_gv(obase);
16961 if (!gv || (match && GvSV(gv) != uninit_sv) || !GvSTASH(gv))
16962 break;
16963 return varname(gv, '$', 0, NULL, 0, FUV_SUBSCRIPT_NONE);
16964
16965 case OP_AELEMFAST_LEX:
16966 if (match) {
16967 SV **svp;
16968 AV *av = MUTABLE_AV(PAD_SV(obase->op_targ));
16969 if (!av || SvRMAGICAL(av))
16970 break;
16971 svp = av_fetch(av, (I8)obase->op_private, FALSE);
16972 if (!svp || *svp != uninit_sv)
16973 break;
16974 }
16975 return varname(NULL, '$', obase->op_targ,
16976 NULL, (I8)obase->op_private, FUV_SUBSCRIPT_ARRAY);
16977
16978 case OP_AELEMFASTLEX_STORE:
16979 if (match) {
16980 SV **svp;
16981 AV *av = MUTABLE_AV(PAD_SV(obase->op_targ));
16982 if (!av || SvRMAGICAL(av))
16983 goto do_op;
16984 svp = av_fetch(av, (I8)obase->op_private, FALSE);
16985 if (!svp || *svp != uninit_sv)
16986 goto do_op;
16987 }
16988 return varname(NULL, '$', obase->op_targ,
16989 NULL, (I8)obase->op_private, FUV_SUBSCRIPT_ARRAY);
16990
16991 case OP_AELEMFAST:
16992 {
16993 gv = cGVOPx_gv(obase);
16994 if (!gv)
16995 break;
16996 if (match) {
16997 SV **svp;
16998 AV *const av = GvAV(gv);
16999 if (!av || SvRMAGICAL(av))
17000 break;
17001 svp = av_fetch(av, (I8)obase->op_private, FALSE);
17002 if (!svp || *svp != uninit_sv)
17003 break;
17004 }
17005 return varname(gv, '$', 0,
17006 NULL, (I8)obase->op_private, FUV_SUBSCRIPT_ARRAY);
17007 }
17008 NOT_REACHED; /* NOTREACHED */
17009
17010 case OP_EXISTS:
17011 o = cUNOPx(obase)->op_first;
17012 if (!o || o->op_type != OP_NULL ||
17013 ! (o->op_targ == OP_AELEM || o->op_targ == OP_HELEM))
17014 break;
17015 return find_uninit_var(cBINOPo->op_last, uninit_sv, match, desc_p);
17016
17017 case OP_AELEM:
17018 case OP_HELEM:
17019 {
17020 bool negate = FALSE;
17021
17022 if (PL_op == obase)
17023 /* $a[uninit_expr] or $h{uninit_expr} */
17024 return find_uninit_var(cBINOPx(obase)->op_last,
17025 uninit_sv, match, desc_p);
17026
17027 gv = NULL;
17028 o = cBINOPx(obase)->op_first;
17029 kid = cBINOPx(obase)->op_last;
17030
17031 /* get the av or hv, and optionally the gv */
17032 sv = NULL;
17033 if (o->op_type == OP_PADAV || o->op_type == OP_PADHV) {
17034 sv = PAD_SV(o->op_targ);
17035 }
17036 else if ((o->op_type == OP_RV2AV || o->op_type == OP_RV2HV)
17037 && cUNOPo->op_first->op_type == OP_GV)
17038 {
17039 gv = cGVOPx_gv(cUNOPo->op_first);
17040 if (!gv)
17041 break;
17042 sv = o->op_type
17043 == OP_RV2HV ? MUTABLE_SV(GvHV(gv)) : MUTABLE_SV(GvAV(gv));
17044 }
17045 if (!sv)
17046 break;
17047
17048 if (kid && kid->op_type == OP_NEGATE) {
17049 negate = TRUE;
17050 kid = cUNOPx(kid)->op_first;
17051 }
17052
17053 if (kid && kid->op_type == OP_CONST && SvOK(cSVOPx_sv(kid))) {
17054 /* index is constant */
17055 SV* kidsv;
17056 if (negate) {
17057 kidsv = newSVpvs_flags("-", SVs_TEMP);
17058 sv_catsv(kidsv, cSVOPx_sv(kid));
17059 }
17060 else
17061 kidsv = cSVOPx_sv(kid);
17062 if (match) {
17063 if (SvMAGICAL(sv))
17064 break;
17065 if (obase->op_type == OP_HELEM) {
17066 HE* he = hv_fetch_ent(MUTABLE_HV(sv), kidsv, 0, 0);
17067 if (!he || HeVAL(he) != uninit_sv)
17068 break;
17069 }
17070 else {
17071 SV * const opsv = cSVOPx_sv(kid);
17072 const IV opsviv = SvIV(opsv);
17073 SV * const * const svp = av_fetch(MUTABLE_AV(sv),
17074 negate ? - opsviv : opsviv,
17075 FALSE);
17076 if (!svp || *svp != uninit_sv)
17077 break;
17078 }
17079 }
17080 if (obase->op_type == OP_HELEM)
17081 return varname(gv, '%', o->op_targ,
17082 kidsv, 0, FUV_SUBSCRIPT_HASH);
17083 else
17084 return varname(gv, '@', o->op_targ, NULL,
17085 negate ? - SvIV(cSVOPx_sv(kid)) : SvIV(cSVOPx_sv(kid)),
17086 FUV_SUBSCRIPT_ARRAY);
17087 }
17088 else {
17089 /* index is an expression;
17090 * attempt to find a match within the aggregate */
17091 if (obase->op_type == OP_HELEM) {
17092 SV * const keysv = find_hash_subscript((const HV*)sv, uninit_sv);
17093 if (keysv)
17094 return varname(gv, '%', o->op_targ,
17095 keysv, 0, FUV_SUBSCRIPT_HASH);
17096 }
17097 else {
17098 const SSize_t index
17099 = find_array_subscript((const AV *)sv, uninit_sv);
17100 if (index >= 0)
17101 return varname(gv, '@', o->op_targ,
17102 NULL, index, FUV_SUBSCRIPT_ARRAY);
17103 }
17104 if (match)
17105 break;
17106 return varname(gv,
17107 (char)((o->op_type == OP_PADAV || o->op_type == OP_RV2AV)
17108 ? '@' : '%'),
17109 o->op_targ, NULL, 0, FUV_SUBSCRIPT_WITHIN);
17110 }
17111 NOT_REACHED; /* NOTREACHED */
17112 }
17113
17114 case OP_MULTIDEREF: {
17115 /* If we were executing OP_MULTIDEREF when the undef warning
17116 * triggered, then it must be one of the index values within
17117 * that triggered it. If not, then the only possibility is that
17118 * the value retrieved by the last aggregate index might be the
17119 * culprit. For the former, we set PL_multideref_pc each time before
17120 * using an index, so work though the item list until we reach
17121 * that point. For the latter, just work through the entire item
17122 * list; the last aggregate retrieved will be the candidate.
17123 * There is a third rare possibility: something triggered
17124 * magic while fetching an array/hash element. Just display
17125 * nothing in this case.
17126 */
17127
17128 /* the named aggregate, if any */
17129 PADOFFSET agg_targ = 0;
17130 GV *agg_gv = NULL;
17131 /* the last-seen index */
17132 UV index_type;
17133 PADOFFSET index_targ;
17134 GV *index_gv;
17135 IV index_const_iv = 0; /* init for spurious compiler warn */
17136 SV *index_const_sv;
17137 int depth = 0; /* how many array/hash lookups we've done */
17138
17139 UNOP_AUX_item *items = cUNOP_AUXx(obase)->op_aux;
17140 UNOP_AUX_item *last = NULL;
17141 UV actions = items->uv;
17142 bool is_hv;
17143
17144 if (PL_op == obase) {
17145 last = PL_multideref_pc;
17146 assert(last >= items && last <= items + items[-1].uv);
17147 }
17148
17149 assert(actions);
17150
17151 while (1) {
17152 is_hv = FALSE;
17153 switch (actions & MDEREF_ACTION_MASK) {
17154
17155 case MDEREF_reload:
17156 actions = (++items)->uv;
17157 continue;
17158
17159 case MDEREF_HV_padhv_helem: /* $lex{...} */
17160 is_hv = TRUE;
17161 /* FALLTHROUGH */
17162 case MDEREF_AV_padav_aelem: /* $lex[...] */
17163 agg_targ = (++items)->pad_offset;
17164 agg_gv = NULL;
17165 break;
17166
17167 case MDEREF_HV_gvhv_helem: /* $pkg{...} */
17168 is_hv = TRUE;
17169 /* FALLTHROUGH */
17170 case MDEREF_AV_gvav_aelem: /* $pkg[...] */
17171 agg_targ = 0;
17172 agg_gv = (GV*)UNOP_AUX_item_sv(++items);
17173 assert(isGV_with_GP(agg_gv));
17174 break;
17175
17176 case MDEREF_HV_gvsv_vivify_rv2hv_helem: /* $pkg->{...} */
17177 case MDEREF_HV_padsv_vivify_rv2hv_helem: /* $lex->{...} */
17178 ++items;
17179 /* FALLTHROUGH */
17180 case MDEREF_HV_pop_rv2hv_helem: /* expr->{...} */
17181 case MDEREF_HV_vivify_rv2hv_helem: /* vivify, ->{...} */
17182 agg_targ = 0;
17183 agg_gv = NULL;
17184 is_hv = TRUE;
17185 break;
17186
17187 case MDEREF_AV_gvsv_vivify_rv2av_aelem: /* $pkg->[...] */
17188 case MDEREF_AV_padsv_vivify_rv2av_aelem: /* $lex->[...] */
17189 ++items;
17190 /* FALLTHROUGH */
17191 case MDEREF_AV_pop_rv2av_aelem: /* expr->[...] */
17192 case MDEREF_AV_vivify_rv2av_aelem: /* vivify, ->[...] */
17193 agg_targ = 0;
17194 agg_gv = NULL;
17195 } /* switch */
17196
17197 index_targ = 0;
17198 index_gv = NULL;
17199 index_const_sv = NULL;
17200
17201 index_type = (actions & MDEREF_INDEX_MASK);
17202 switch (index_type) {
17203 case MDEREF_INDEX_none:
17204 break;
17205 case MDEREF_INDEX_const:
17206 if (is_hv)
17207 index_const_sv = UNOP_AUX_item_sv(++items)
17208 else
17209 index_const_iv = (++items)->iv;
17210 break;
17211 case MDEREF_INDEX_padsv:
17212 index_targ = (++items)->pad_offset;
17213 break;
17214 case MDEREF_INDEX_gvsv:
17215 index_gv = (GV*)UNOP_AUX_item_sv(++items);
17216 assert(isGV_with_GP(index_gv));
17217 break;
17218 }
17219
17220 if (index_type != MDEREF_INDEX_none)
17221 depth++;
17222
17223 if ( index_type == MDEREF_INDEX_none
17224 || (actions & MDEREF_FLAG_last)
17225 || (last && items >= last)
17226 )
17227 break;
17228
17229 actions >>= MDEREF_SHIFT;
17230 } /* while */
17231
17232 if (PL_op == obase) {
17233 /* most likely index was undef */
17234
17235 *desc_p = ( (actions & MDEREF_FLAG_last)
17236 && (obase->op_private
17237 & (OPpMULTIDEREF_EXISTS|OPpMULTIDEREF_DELETE)))
17238 ?
17239 (obase->op_private & OPpMULTIDEREF_EXISTS)
17240 ? "exists"
17241 : "delete"
17242 : is_hv ? "hash element" : "array element";
17243 assert(index_type != MDEREF_INDEX_none);
17244 if (index_gv) {
17245 if (GvSV(index_gv) == uninit_sv)
17246 return varname(index_gv, '$', 0, NULL, 0,
17247 FUV_SUBSCRIPT_NONE);
17248 else
17249 return NULL;
17250 }
17251 if (index_targ) {
17252 if (PL_curpad[index_targ] == uninit_sv)
17253 return varname(NULL, '$', index_targ,
17254 NULL, 0, FUV_SUBSCRIPT_NONE);
17255 else
17256 return NULL;
17257 }
17258 /* If we got to this point it was undef on a const subscript,
17259 * so magic probably involved, e.g. $ISA[0]. Give up. */
17260 return NULL;
17261 }
17262
17263 /* the SV returned by pp_multideref() was undef, if anything was */
17264
17265 if (depth != 1)
17266 break;
17267
17268 if (agg_targ)
17269 sv = PAD_SV(agg_targ);
17270 else if (agg_gv) {
17271 sv = is_hv ? MUTABLE_SV(GvHV(agg_gv)) : MUTABLE_SV(GvAV(agg_gv));
17272 if (!sv)
17273 break;
17274 }
17275 else
17276 break;
17277
17278 if (index_type == MDEREF_INDEX_const) {
17279 if (match) {
17280 if (SvMAGICAL(sv))
17281 break;
17282 if (is_hv) {
17283 HE* he = hv_fetch_ent(MUTABLE_HV(sv), index_const_sv, 0, 0);
17284 if (!he || HeVAL(he) != uninit_sv)
17285 break;
17286 }
17287 else {
17288 SV * const * const svp =
17289 av_fetch(MUTABLE_AV(sv), index_const_iv, FALSE);
17290 if (!svp || *svp != uninit_sv)
17291 break;
17292 }
17293 }
17294 return is_hv
17295 ? varname(agg_gv, '%', agg_targ,
17296 index_const_sv, 0, FUV_SUBSCRIPT_HASH)
17297 : varname(agg_gv, '@', agg_targ,
17298 NULL, index_const_iv, FUV_SUBSCRIPT_ARRAY);
17299 }
17300 else {
17301 /* index is an var */
17302 if (is_hv) {
17303 SV * const keysv = find_hash_subscript((const HV*)sv, uninit_sv);
17304 if (keysv)
17305 return varname(agg_gv, '%', agg_targ,
17306 keysv, 0, FUV_SUBSCRIPT_HASH);
17307 }
17308 else {
17309 const SSize_t index
17310 = find_array_subscript((const AV *)sv, uninit_sv);
17311 if (index >= 0)
17312 return varname(agg_gv, '@', agg_targ,
17313 NULL, index, FUV_SUBSCRIPT_ARRAY);
17314 }
17315 /* look for an element not found */
17316 if (!SvMAGICAL(sv)) {
17317 SV *index_sv = NULL;
17318 if (index_targ) {
17319 index_sv = PL_curpad[index_targ];
17320 }
17321 else if (index_gv) {
17322 index_sv = GvSV(index_gv);
17323 }
17324 if (index_sv && !SvMAGICAL(index_sv) && !SvROK(index_sv)) {
17325 if (is_hv) {
17326 SV *report_index_sv = SvOK(index_sv) ? index_sv : &PL_sv_no;
17327 HE *he = hv_fetch_ent(MUTABLE_HV(sv), report_index_sv, 0, 0);
17328 if (!he) {
17329 return varname(agg_gv, '%', agg_targ,
17330 report_index_sv, 0, FUV_SUBSCRIPT_HASH);
17331 }
17332 }
17333 else {
17334 SSize_t index = SvOK(index_sv) ? SvIV(index_sv) : 0;
17335 SV * const * const svp =
17336 av_fetch(MUTABLE_AV(sv), index, FALSE);
17337 if (!svp) {
17338 return varname(agg_gv, '@', agg_targ,
17339 NULL, index, FUV_SUBSCRIPT_ARRAY);
17340 }
17341 }
17342 }
17343 }
17344 if (match)
17345 break;
17346 return varname(agg_gv,
17347 is_hv ? '%' : '@',
17348 agg_targ, NULL, 0, FUV_SUBSCRIPT_WITHIN);
17349 }
17350 NOT_REACHED; /* NOTREACHED */
17351 }
17352
17353 case OP_AASSIGN:
17354 /* only examine RHS */
17355 return find_uninit_var(cBINOPx(obase)->op_first, uninit_sv,
17356 match, desc_p);
17357
17358 case OP_OPEN:
17359 o = cUNOPx(obase)->op_first;
17360 if ( o->op_type == OP_PUSHMARK
17361 || (o->op_type == OP_NULL && o->op_targ == OP_PUSHMARK)
17362 )
17363 o = OpSIBLING(o);
17364
17365 if (!OpHAS_SIBLING(o)) {
17366 /* one-arg version of open is highly magical */
17367
17368 if (o->op_type == OP_GV) { /* open FOO; */
17369 gv = cGVOPx_gv(o);
17370 if (match && GvSV(gv) != uninit_sv)
17371 break;
17372 return varname(gv, '$', 0,
17373 NULL, 0, FUV_SUBSCRIPT_NONE);
17374 }
17375 /* other possibilities not handled are:
17376 * open $x; or open my $x; should return '${*$x}'
17377 * open expr; should return '$'.expr ideally
17378 */
17379 break;
17380 }
17381 match = 1;
17382 goto do_op;
17383
17384 /* ops where $_ may be an implicit arg */
17385 case OP_TRANS:
17386 case OP_TRANSR:
17387 case OP_SUBST:
17388 case OP_MATCH:
17389 if ( !(obase->op_flags & OPf_STACKED)) {
17390 if (uninit_sv == DEFSV)
17391 return newSVpvs_flags("$_", SVs_TEMP);
17392 else if (obase->op_targ
17393 && uninit_sv == PAD_SVl(obase->op_targ))
17394 return varname(NULL, '$', obase->op_targ, NULL, 0,
17395 FUV_SUBSCRIPT_NONE);
17396 }
17397 goto do_op;
17398
17399 case OP_PRTF:
17400 case OP_PRINT:
17401 case OP_SAY:
17402 match = 1; /* print etc can return undef on defined args */
17403 /* skip filehandle as it can't produce 'undef' warning */
17404 o = cUNOPx(obase)->op_first;
17405 if ((obase->op_flags & OPf_STACKED)
17406 &&
17407 ( o->op_type == OP_PUSHMARK
17408 || (o->op_type == OP_NULL && o->op_targ == OP_PUSHMARK)))
17409 o = OpSIBLING(OpSIBLING(o));
17410 goto do_op2;
17411
17412
17413 case OP_ENTEREVAL: /* could be eval $undef or $x='$undef'; eval $x */
17414 case OP_CUSTOM: /* XS or custom code could trigger random warnings */
17415
17416 /* the following ops are capable of returning PL_sv_undef even for
17417 * defined arg(s) */
17418
17419 case OP_BACKTICK:
17420 case OP_PIPE_OP:
17421 case OP_FILENO:
17422 case OP_BINMODE:
17423 case OP_TIED:
17424 case OP_GETC:
17425 case OP_SYSREAD:
17426 case OP_READLINE:
17427 case OP_SEND:
17428 case OP_IOCTL:
17429 case OP_SOCKET:
17430 case OP_SOCKPAIR:
17431 case OP_BIND:
17432 case OP_CONNECT:
17433 case OP_LISTEN:
17434 case OP_ACCEPT:
17435 case OP_SHUTDOWN:
17436 case OP_SSOCKOPT:
17437 case OP_GETPEERNAME:
17438 case OP_FTRREAD:
17439 case OP_FTRWRITE:
17440 case OP_FTREXEC:
17441 case OP_FTROWNED:
17442 case OP_FTEREAD:
17443 case OP_FTEWRITE:
17444 case OP_FTEEXEC:
17445 case OP_FTEOWNED:
17446 case OP_FTIS:
17447 case OP_FTZERO:
17448 case OP_FTSIZE:
17449 case OP_FTFILE:
17450 case OP_FTDIR:
17451 case OP_FTLINK:
17452 case OP_FTPIPE:
17453 case OP_FTSOCK:
17454 case OP_FTBLK:
17455 case OP_FTCHR:
17456 case OP_FTTTY:
17457 case OP_FTSUID:
17458 case OP_FTSGID:
17459 case OP_FTSVTX:
17460 case OP_FTTEXT:
17461 case OP_FTBINARY:
17462 case OP_FTMTIME:
17463 case OP_FTATIME:
17464 case OP_FTCTIME:
17465 case OP_READLINK:
17466 case OP_OPEN_DIR:
17467 case OP_READDIR:
17468 case OP_TELLDIR:
17469 case OP_SEEKDIR:
17470 case OP_REWINDDIR:
17471 case OP_CLOSEDIR:
17472 case OP_GMTIME:
17473 case OP_ALARM:
17474 case OP_SEMGET:
17475 case OP_GETLOGIN:
17476 case OP_SUBSTR:
17477 case OP_AEACH:
17478 case OP_EACH:
17479 case OP_SORT:
17480 case OP_CALLER:
17481 case OP_DOFILE:
17482 case OP_PROTOTYPE:
17483 case OP_NCMP:
17484 case OP_SMARTMATCH:
17485 case OP_UNPACK:
17486 case OP_SYSOPEN:
17487 case OP_SYSSEEK:
17488 case OP_SPLICE: /* scalar splice(@x, $i, 0) ==> undef */
17489 match = 1;
17490 goto do_op;
17491
17492 case OP_ENTERSUB:
17493 case OP_GOTO:
17494 /* XXX tmp hack: these two may call an XS sub, and currently
17495 XS subs don't have a SUB entry on the context stack, so CV and
17496 pad determination goes wrong, and BAD things happen. So, just
17497 don't try to determine the value under those circumstances.
17498 Need a better fix at dome point. DAPM 11/2007 */
17499 break;
17500
17501 case OP_FLIP:
17502 case OP_FLOP:
17503 {
17504 GV * const gv = gv_fetchpvs(".", GV_NOTQUAL, SVt_PV);
17505 if (gv && GvSV(gv) == uninit_sv)
17506 return newSVpvs_flags("$.", SVs_TEMP);
17507 goto do_op;
17508 }
17509
17510 case OP_LENGTH:
17511 o = cUNOPx(obase)->op_first;
17512 sv = find_uninit_var(o, uninit_sv, match, desc_p);
17513 if (sv) {
17514 Perl_sv_insert_flags(aTHX_ sv, 0, 0, STR_WITH_LEN("length("), 0);
17515 sv_catpvs_nomg(sv, ")");
17516 }
17517 return sv;
17518
17519 case OP_SHIFT:
17520 case OP_POP:
17521 if (match) {
17522 break;
17523 }
17524 if (!(obase->op_flags & OPf_KIDS)) {
17525 sv = newSVpvn_flags("", 0, SVs_TEMP);
17526 }
17527 else {
17528 o = cUNOPx(obase)->op_first;
17529 if (o->op_type == OP_RV2AV) {
17530 o2 = cUNOPx(o)->op_first;
17531 if (o2->op_type != OP_GV) {
17532 break;
17533 }
17534 gv = cGVOPx_gv(o2);
17535 if (!gv) {
17536 break;
17537 }
17538 }
17539 else if (o->op_type == OP_PADAV) {
17540 gv = NULL;
17541 }
17542 else {
17543 break;
17544 }
17545 sv = varname(gv, '@', o->op_targ, NULL, 0, FUV_SUBSCRIPT_NONE);
17546 }
17547 if (sv) {
17548 const char *name = OP_NAME(obase);
17549 Perl_sv_insert_flags(aTHX_ sv, 0, 0, STR_WITH_LEN("("), 0);
17550 Perl_sv_insert_flags(aTHX_ sv, 0, 0, name, strlen(name), 0);
17551 sv_catpvs_nomg(sv, ")");
17552 }
17553 return sv;
17554
17555 case OP_POS:
17556 /* def-ness of rval pos() is independent of the def-ness of its arg */
17557 if ( !(obase->op_flags & OPf_MOD))
17558 break;
17559 /* FALLTHROUGH */
17560
17561 case OP_SCHOMP:
17562 case OP_CHOMP:
17563 if (SvROK(PL_rs) && uninit_sv == SvRV(PL_rs))
17564 return newSVpvs_flags("${$/}", SVs_TEMP);
17565 /* FALLTHROUGH */
17566
17567 default:
17568 do_op:
17569 if (!(obase->op_flags & OPf_KIDS))
17570 break;
17571 o = cUNOPx(obase)->op_first;
17572
17573 do_op2:
17574 if (!o)
17575 break;
17576
17577 /* This loop checks all the kid ops, skipping any that cannot pos-
17578 * sibly be responsible for the uninitialized value; i.e., defined
17579 * constants and ops that return nothing. If there is only one op
17580 * left that is not skipped, then we *know* it is responsible for
17581 * the uninitialized value. If there is more than one op left, we
17582 * have to look for an exact match in the while() loop below.
17583 * Note that we skip padrange, because the individual pad ops that
17584 * it replaced are still in the tree, so we work on them instead.
17585 */
17586 o2 = NULL;
17587 for (kid=o; kid; kid = OpSIBLING(kid)) {
17588 const OPCODE type = kid->op_type;
17589 if ( (type == OP_CONST && SvOK(cSVOPx_sv(kid)))
17590 || (type == OP_NULL && ! (kid->op_flags & OPf_KIDS))
17591 || (type == OP_PUSHMARK)
17592 || (type == OP_PADRANGE)
17593 )
17594 continue;
17595
17596 if (o2) { /* more than one found */
17597 o2 = NULL;
17598 break;
17599 }
17600 o2 = kid;
17601 }
17602 if (o2)
17603 return find_uninit_var(o2, uninit_sv, match, desc_p);
17604
17605 /* scan all args */
17606 while (o) {
17607 sv = find_uninit_var(o, uninit_sv, 1, desc_p);
17608 if (sv)
17609 return sv;
17610 o = OpSIBLING(o);
17611 }
17612 break;
17613 }
17614 return NULL;
17615 }
17616
17617
17618 /*
17619 =for apidoc_section $warning
17620 =for apidoc report_uninit
17621
17622 Print appropriate "Use of uninitialized variable" warning.
17623
17624 =cut
17625 */
17626
17627 void
17628 Perl_report_uninit(pTHX_ const SV *uninit_sv)
17629 {
17630 const char *desc = NULL;
17631 SV* varname = NULL;
17632
17633 if (PL_op) {
17634 desc = PL_op->op_type == OP_STRINGIFY && PL_op->op_folded
17635 ? "join or string"
17636 : PL_op->op_type == OP_MULTICONCAT
17637 && (PL_op->op_private & OPpMULTICONCAT_FAKE)
17638 ? "sprintf"
17639 : OP_DESC(PL_op);
17640 if (uninit_sv && PL_curpad) {
17641 varname = find_uninit_var(PL_op, uninit_sv, 0, &desc);
17642 if (varname)
17643 sv_insert(varname, 0, 0, " ", 1);
17644 }
17645 }
17646 else if (PL_curstackinfo->si_type == PERLSI_SORT && cxstack_ix == 0)
17647 /* we've reached the end of a sort block or sub,
17648 * and the uninit value is probably what that code returned */
17649 desc = "sort";
17650
17651 /* PL_warn_uninit_sv is constant */
17652 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
17653 if (desc)
17654 /* diag_listed_as: Use of uninitialized value%s */
17655 Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit_sv,
17656 SVfARG(varname ? varname : &PL_sv_no),
17657 " in ", desc);
17658 else
17659 Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit,
17660 "", "", "");
17661 GCC_DIAG_RESTORE_STMT;
17662 }
17663
17664 /*
17665 * ex: set ts=8 sts=4 sw=4 et:
17666 */
17667