1 /* hv.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11 /*
12 * I sit beside the fire and think
13 * of all that I have seen.
14 * --Bilbo
15 *
16 * [p.278 of _The Lord of the Rings_, II/iii: "The Ring Goes South"]
17 */
18
19 /*
20 =head1 HV Handling
21 A HV structure represents a Perl hash. It consists mainly of an array
22 of pointers, each of which points to a linked list of HE structures. The
23 array is indexed by the hash function of the key, so each linked list
24 represents all the hash entries with the same hash value. Each HE contains
25 a pointer to the actual value, plus a pointer to a HEK structure which
26 holds the key and hash value.
27
28 =cut
29
30 */
31
32 #include "EXTERN.h"
33 #define PERL_IN_HV_C
34 #define PERL_HASH_INTERNAL_ACCESS
35 #include "perl.h"
36
37 /* we split when we collide and we have a load factor over 0.667.
38 * NOTE if you change this formula so we split earlier than previously
39 * you MUST change the logic in hv_ksplit()
40 */
41
42 /* MAX_BUCKET_MAX is the maximum max bucket index, at which point we stop growing the
43 * number of buckets,
44 */
45 #define MAX_BUCKET_MAX ((1<<26)-1)
46 #define DO_HSPLIT(xhv) ( ( ((xhv)->xhv_keys + ((xhv)->xhv_keys >> 1)) > (xhv)->xhv_max ) && \
47 ((xhv)->xhv_max < MAX_BUCKET_MAX) )
48
49 static const char S_strtab_error[]
50 = "Cannot modify shared string table in hv_%s";
51
52 #define DEBUG_HASH_RAND_BITS (DEBUG_h_TEST)
53
54 /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
55 * See also https://en.wikipedia.org/wiki/Xorshift
56 */
57 #if IVSIZE == 8
58 /* 64 bit version */
59 #define XORSHIFT_RAND_BITS(x) PERL_XORSHIFT64_A(x)
60 #else
61 /* 32 bit version */
62 #define XORSHIFT_RAND_BITS(x) PERL_XORSHIFT32_A(x)
63 #endif
64
65 #define UPDATE_HASH_RAND_BITS_KEY(key,klen) \
66 STMT_START { \
67 XORSHIFT_RAND_BITS(PL_hash_rand_bits); \
68 if (DEBUG_HASH_RAND_BITS) { \
69 PerlIO_printf( Perl_debug_log, \
70 "PL_hash_rand_bits=%016" UVxf" @ %s:%-4d", \
71 (UV)PL_hash_rand_bits, __FILE__, __LINE__ \
72 ); \
73 if (DEBUG_v_TEST && key) { \
74 PerlIO_printf( Perl_debug_log, " key:'%.*s' %" UVuf"\n", \
75 (int)klen, \
76 key ? key : "", /* silence warning */ \
77 (UV)klen \
78 ); \
79 } else { \
80 PerlIO_printf( Perl_debug_log, "\n"); \
81 } \
82 } \
83 } STMT_END
84
85 #define MAYBE_UPDATE_HASH_RAND_BITS_KEY(key,klen) \
86 STMT_START { \
87 if (PL_HASH_RAND_BITS_ENABLED) \
88 UPDATE_HASH_RAND_BITS_KEY(key,klen); \
89 } STMT_END
90
91
92 #define UPDATE_HASH_RAND_BITS() \
93 UPDATE_HASH_RAND_BITS_KEY(NULL,0)
94
95 #define MAYBE_UPDATE_HASH_RAND_BITS() \
96 MAYBE_UPDATE_HASH_RAND_BITS_KEY(NULL,0)
97
98 /* HeKFLAGS(entry) is a single U8, so only provides 8 flags bits.
99 We currently use 3. All 3 we have behave differently, so if we find a use for
100 more flags it's hard to predict which they group with.
101
102 Hash keys are stored as flat octet sequences, not SVs. Hence we need a flag
103 bit to say whether those octet sequences represent ISO-8859-1 or UTF-8 -
104 HVhek_UTF8. The value of this flag bit matters for (regular) hash key
105 lookups.
106
107 To speed up comparisons, keys are normalised to octets. But we (also)
108 preserve whether the key was supplied, so we need another flag bit to say
109 whether to reverse the normalisation when iterating the keys (converting them
110 back to SVs) - HVhek_WASUTF8. The value of this flag bit must be ignored for
111 (regular) hash key lookups.
112
113 But for the shared string table (the private "hash" that manages shared hash
114 keys and their reference counts), we need to be able to store both variants
115 (HVhek_WASUTF8 set and clear), so the code performing lookups in this hash
116 must be different and consider both keys.
117
118 However, regular hashes (now) can have a mix of shared and unshared keys.
119 (This avoids the need to reallocate all the keys into unshared storage at
120 the point where hash passes the "large" hash threshold, and no longer uses
121 the shared string table - existing keys remain shared, to avoid makework.)
122
123 Meaning that HVhek_NOTSHARED *may* be set in regular hashes (but should be
124 ignored for hash lookups) but must always be clear in the keys in the shared
125 string table (because the pointers to these keys are directly copied into
126 regular hashes - this is how shared keys work.)
127
128 Hence all 3 are different, and it's hard to predict the best way to future
129 proof what is needed next.
130
131 We also have HVhek_ENABLEHVKFLAGS, which is used as a mask within the code
132 below to determine whether to set HvHASKFLAGS() true on the hash as a whole.
133 This is a public "optimisation" flag provided to serealisers, to indicate
134 (up front) that a hash contains non-8-bit keys, if they want to use different
135 storage formats for hashes where all keys are simple octet sequences
136 (avoiding needing to store an extra byte per hash key), and they need to know
137 that this holds *before* iterating the hash keys. Only Storable seems to use
138 this. (For this use case, HVhek_NOTSHARED doesn't matter)
139
140 For now, we assume that any future flag bits will need to be distinguished
141 in the shared string table, hence we create this mask for the shared string
142 table code. It happens to be the same as HVhek_ENABLEHVKFLAGS, but that might
143 change if we add a flag bit that matters to the shared string table but not
144 to Storable (or similar). */
145
146 #define HVhek_STORAGE_MASK (0xFF & ~HVhek_NOTSHARED)
147
148 #ifdef PURIFY
149
150 #define new_HE() (HE*)safemalloc(sizeof(HE))
151 #define del_HE(p) safefree((char*)p)
152
153 #else
154
155 STATIC HE*
S_new_he(pTHX)156 S_new_he(pTHX)
157 {
158 HE* he;
159 void ** const root = &PL_body_roots[HE_ARENA_ROOT_IX];
160
161 if (!*root)
162 Perl_more_bodies(aTHX_ HE_ARENA_ROOT_IX, sizeof(HE), PERL_ARENA_SIZE);
163 he = (HE*) *root;
164 assert(he);
165 *root = HeNEXT(he);
166 return he;
167 }
168
169 #define new_HE() new_he()
170 #define del_HE(p) \
171 STMT_START { \
172 HeNEXT(p) = (HE*)(PL_body_roots[HE_ARENA_ROOT_IX]); \
173 PL_body_roots[HE_ARENA_ROOT_IX] = p; \
174 } STMT_END
175
176
177
178 #endif
179
180 STATIC HEK *
S_save_hek_flags(const char * str,I32 len,U32 hash,int flags)181 S_save_hek_flags(const char *str, I32 len, U32 hash, int flags)
182 {
183 char *k;
184 HEK *hek;
185
186 PERL_ARGS_ASSERT_SAVE_HEK_FLAGS;
187
188 Newx(k, HEK_BASESIZE + len + 2, char);
189 hek = (HEK*)k;
190 Copy(str, HEK_KEY(hek), len, char);
191 HEK_KEY(hek)[len] = 0;
192 HEK_LEN(hek) = len;
193 HEK_HASH(hek) = hash;
194 HEK_FLAGS(hek) = HVhek_NOTSHARED | (flags & HVhek_STORAGE_MASK);
195
196 if (flags & HVhek_FREEKEY)
197 Safefree(str);
198 return hek;
199 }
200
201 /* free the pool of temporary HE/HEK pairs returned by hv_fetch_ent
202 * for tied hashes */
203
204 void
Perl_free_tied_hv_pool(pTHX)205 Perl_free_tied_hv_pool(pTHX)
206 {
207 HE *he = PL_hv_fetch_ent_mh;
208 while (he) {
209 HE * const ohe = he;
210 Safefree(HeKEY_hek(he));
211 he = HeNEXT(he);
212 del_HE(ohe);
213 }
214 PL_hv_fetch_ent_mh = NULL;
215 }
216
217 #if defined(USE_ITHREADS)
218 HEK *
Perl_hek_dup(pTHX_ HEK * source,CLONE_PARAMS * param)219 Perl_hek_dup(pTHX_ HEK *source, CLONE_PARAMS* param)
220 {
221 HEK *shared;
222
223 PERL_ARGS_ASSERT_HEK_DUP;
224 PERL_UNUSED_ARG(param);
225
226 if (!source)
227 return NULL;
228
229 shared = (HEK*)ptr_table_fetch(PL_ptr_table, source);
230 if (shared) {
231 /* We already shared this hash key. */
232 (void)share_hek_hek(shared);
233 }
234 else {
235 shared
236 = share_hek_flags(HEK_KEY(source), HEK_LEN(source),
237 HEK_HASH(source), HEK_FLAGS(source));
238 ptr_table_store(PL_ptr_table, source, shared);
239 }
240 return shared;
241 }
242
243 HE *
Perl_he_dup(pTHX_ const HE * e,bool shared,CLONE_PARAMS * param)244 Perl_he_dup(pTHX_ const HE *e, bool shared, CLONE_PARAMS* param)
245 {
246 HE *ret;
247
248 PERL_ARGS_ASSERT_HE_DUP;
249
250 /* All the *_dup functions are deemed to be API, despite most being deeply
251 tied to the internals. Hence we can't simply remove the parameter
252 "shared" from this function. */
253 /* sv_dup and sv_dup_inc seem to be the only two that are used by XS code.
254 Probably the others should be dropped from the API. See #19409 */
255 PERL_UNUSED_ARG(shared);
256
257 if (!e)
258 return NULL;
259 /* look for it in the table first */
260 ret = (HE*)ptr_table_fetch(PL_ptr_table, e);
261 if (ret)
262 return ret;
263
264 /* create anew and remember what it is */
265 ret = new_HE();
266 ptr_table_store(PL_ptr_table, e, ret);
267
268 if (HeKLEN(e) == HEf_SVKEY) {
269 char *k;
270 Newx(k, HEK_BASESIZE + sizeof(const SV *), char);
271 HeKEY_hek(ret) = (HEK*)k;
272 HeKEY_sv(ret) = sv_dup_inc(HeKEY_sv(e), param);
273 }
274 else if (!(HeKFLAGS(e) & HVhek_NOTSHARED)) {
275 /* This is hek_dup inlined, which seems to be important for speed
276 reasons. */
277 HEK * const source = HeKEY_hek(e);
278 HEK *shared = (HEK*)ptr_table_fetch(PL_ptr_table, source);
279
280 if (shared) {
281 /* We already shared this hash key. */
282 (void)share_hek_hek(shared);
283 }
284 else {
285 shared
286 = share_hek_flags(HEK_KEY(source), HEK_LEN(source),
287 HEK_HASH(source), HEK_FLAGS(source));
288 ptr_table_store(PL_ptr_table, source, shared);
289 }
290 HeKEY_hek(ret) = shared;
291 }
292 else
293 HeKEY_hek(ret) = save_hek_flags(HeKEY(e), HeKLEN(e), HeHASH(e),
294 HeKFLAGS(e));
295 HeVAL(ret) = sv_dup_inc(HeVAL(e), param);
296
297 HeNEXT(ret) = he_dup(HeNEXT(e), FALSE, param);
298 return ret;
299 }
300 #endif /* USE_ITHREADS */
301
302 static void
S_hv_notallowed(pTHX_ int flags,const char * key,I32 klen,const char * msg)303 S_hv_notallowed(pTHX_ int flags, const char *key, I32 klen,
304 const char *msg)
305 {
306 /* Straight to SVt_PVN here, as needed by sv_setpvn_fresh and
307 * sv_usepvn would otherwise call it */
308 SV * const sv = newSV_type_mortal(SVt_PV);
309
310 PERL_ARGS_ASSERT_HV_NOTALLOWED;
311
312 if (!(flags & HVhek_FREEKEY)) {
313 sv_setpvn_fresh(sv, key, klen);
314 }
315 else {
316 /* Need to free saved eventually assign to mortal SV */
317 /* XXX is this line an error ???: SV *sv = sv_newmortal(); */
318 sv_usepvn(sv, (char *) key, klen);
319 }
320 if (flags & HVhek_UTF8) {
321 SvUTF8_on(sv);
322 }
323 Perl_croak(aTHX_ msg, SVfARG(sv));
324 }
325
326 /* (klen == HEf_SVKEY) is special for MAGICAL hv entries, meaning key slot
327 * contains an SV* */
328
329 /*
330 =for apidoc hv_store
331 =for apidoc_item hv_stores
332
333 These each store SV C<val> with the specified key in hash C<hv>, returning NULL
334 if the operation failed or if the value did not need to be actually stored
335 within the hash (as in the case of tied hashes). Otherwise it can be
336 dereferenced to get the original C<SV*>.
337
338 They differ only in how the hash key is specified.
339
340 In C<hv_stores>, the key must be a C language string literal, enclosed in
341 double quotes. It is never treated as being in UTF-8. There is no
342 length_parameter.
343
344 In C<hv_store>, C<key> is either NULL or points to the first byte of the string
345 specifying the key, and its length in bytes is given by the absolute value of
346 an additional parameter, C<klen>. A NULL key indicates the key is to be
347 treated as C<undef>, and C<klen> is ignored; otherwise the key string may
348 contain embedded-NUL bytes. If C<klen> is negative, the string is treated as
349 being encoded in UTF-8; otherwise not.
350
351 C<hv_store> has another extra parameter, C<hash>, a precomputed hash of the key
352 string, or zero if it has not been precomputed. This parameter is omitted from
353 C<hv_stores>, as it is computed automatically at compile time.
354
355 If <hv> is NULL, NULL is returned and no action is taken.
356
357 If C<val> is NULL, it is treated as being C<undef>; otherwise the caller is
358 responsible for suitably incrementing the reference count of C<val> before
359 the call, and decrementing it if the function returned C<NULL>. Effectively
360 a successful C<hv_store> takes ownership of one reference to C<val>. This is
361 usually what you want; a newly created SV has a reference count of one, so
362 if all your code does is create SVs then store them in a hash, C<hv_store>
363 will own the only reference to the new SV, and your code doesn't need to do
364 anything further to tidy up.
365
366 C<hv_store> is not implemented as a call to L</C<hv_store_ent>>, and does not
367 create a temporary SV for the key, so if your key data is not already in SV
368 form then use C<hv_store> in preference to C<hv_store_ent>.
369
370 See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more
371 information on how to use this function on tied hashes.
372
373 =for apidoc hv_store_ent
374
375 Stores C<val> in a hash. The hash key is specified as C<key>. The C<hash>
376 parameter is the precomputed hash value; if it is zero then Perl will
377 compute it. The return value is the new hash entry so created. It will be
378 C<NULL> if the operation failed or if the value did not need to be actually
379 stored within the hash (as in the case of tied hashes). Otherwise the
380 contents of the return value can be accessed using the C<He?> macros
381 described here. Note that the caller is responsible for suitably
382 incrementing the reference count of C<val> before the call, and
383 decrementing it if the function returned NULL. Effectively a successful
384 C<hv_store_ent> takes ownership of one reference to C<val>. This is
385 usually what you want; a newly created SV has a reference count of one, so
386 if all your code does is create SVs then store them in a hash, C<hv_store>
387 will own the only reference to the new SV, and your code doesn't need to do
388 anything further to tidy up. Note that C<hv_store_ent> only reads the C<key>;
389 unlike C<val> it does not take ownership of it, so maintaining the correct
390 reference count on C<key> is entirely the caller's responsibility. The reason
391 it does not take ownership, is that C<key> is not used after this function
392 returns, and so can be freed immediately. C<hv_store>
393 is not implemented as a call to C<hv_store_ent>, and does not create a temporary
394 SV for the key, so if your key data is not already in SV form then use
395 C<hv_store> in preference to C<hv_store_ent>.
396
397 See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more
398 information on how to use this function on tied hashes.
399
400 =for apidoc hv_exists
401 =for apidoc_item ||hv_existss|HV *hv|"key"
402
403 These return a boolean indicating whether the specified hash key exists.
404
405 In C<hv_existss>, the key must be a C language string literal, enclosed in
406 double quotes. It is never treated as being in UTF-8. There is no
407 length_parameter.
408
409 In C<hv_exists>, the absolute value of C<klen> is the length of the key. If
410 C<klen> is negative the key is assumed to be in UTF-8-encoded Unicode.
411
412 =for apidoc hv_fetch
413 =for apidoc_item ||hv_fetchs|HV *hv|"key"|I32 lval
414
415 These return the SV which corresponds to the specified key in the hash.
416
417 In C<hv_fetchs>, the key must be a C language string literal, enclosed in
418 double quotes. It is never treated as being in UTF-8. There is no
419 length_parameter.
420
421 In C<hv_fetch>, the absolute value of C<klen> is the length of the key. If
422 C<klen> is negative the key is assumed to be in UTF-8-encoded Unicode.
423
424 In both, if C<lval> is set, then the fetch will be part of a store. This means
425 that if there is no value in the hash associated with the given key, then one
426 is created and a pointer to it is returned. The C<SV*> it points to can be
427 assigned to. But always check that the return value is non-null before
428 dereferencing it to an C<SV*>.
429
430 See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more
431 information on how to use this function on tied hashes.
432
433 =for apidoc hv_exists_ent
434
435 Returns a boolean indicating whether
436 the specified hash key exists. C<hash>
437 can be a valid precomputed hash value, or 0 to ask for it to be
438 computed.
439
440 =cut
441 */
442
443 /* returns an HE * structure with the all fields set */
444 /* note that hent_val will be a mortal sv for MAGICAL hashes */
445 /*
446 =for apidoc hv_fetch_ent
447
448 Returns the hash entry which corresponds to the specified key in the hash.
449 C<hash> must be a valid precomputed hash number for the given C<key>, or 0
450 if you want the function to compute it. IF C<lval> is set then the fetch
451 will be part of a store. Make sure the return value is non-null before
452 accessing it. The return value when C<hv> is a tied hash is a pointer to a
453 static location, so be sure to make a copy of the structure if you need to
454 store it somewhere.
455
456 See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more
457 information on how to use this function on tied hashes.
458
459 =cut
460 */
461
462 /* Common code for hv_delete()/hv_exists()/hv_fetch()/hv_store() */
463 void *
Perl_hv_common_key_len(pTHX_ HV * hv,const char * key,I32 klen_i32,const int action,SV * val,const U32 hash)464 Perl_hv_common_key_len(pTHX_ HV *hv, const char *key, I32 klen_i32,
465 const int action, SV *val, const U32 hash)
466 {
467 STRLEN klen;
468 int flags;
469
470 PERL_ARGS_ASSERT_HV_COMMON_KEY_LEN;
471
472 if (klen_i32 < 0) {
473 klen = -klen_i32;
474 flags = HVhek_UTF8;
475 } else {
476 klen = klen_i32;
477 flags = 0;
478 }
479 return hv_common(hv, NULL, key, klen, flags, action, val, hash);
480 }
481
482 void *
Perl_hv_common(pTHX_ HV * hv,SV * keysv,const char * key,STRLEN klen,int flags,int action,SV * val,U32 hash)483 Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
484 int flags, int action, SV *val, U32 hash)
485 {
486 XPVHV* xhv;
487 HE *entry;
488 HE **oentry;
489 SV *sv;
490 bool is_utf8;
491 bool in_collision;
492 const int return_svp = action & HV_FETCH_JUST_SV;
493 HEK *keysv_hek = NULL;
494
495 if (!hv)
496 return NULL;
497 if (SvIS_FREED(hv))
498 return NULL;
499
500 assert(SvTYPE(hv) == SVt_PVHV);
501
502 if (SvSMAGICAL(hv) && SvGMAGICAL(hv) && !(action & HV_DISABLE_UVAR_XKEY)) {
503 MAGIC* mg;
504 if ((mg = mg_find((const SV *)hv, PERL_MAGIC_uvar))) {
505 struct ufuncs * const uf = (struct ufuncs *)mg->mg_ptr;
506 if (uf->uf_set == NULL) {
507 SV* obj = mg->mg_obj;
508
509 if (!keysv) {
510 keysv = newSVpvn_flags(key, klen, SVs_TEMP |
511 ((flags & HVhek_UTF8)
512 ? SVf_UTF8 : 0));
513 }
514
515 mg->mg_obj = keysv; /* pass key */
516 uf->uf_index = action; /* pass action */
517 magic_getuvar(MUTABLE_SV(hv), mg);
518 keysv = mg->mg_obj; /* may have changed */
519 mg->mg_obj = obj;
520
521 /* If the key may have changed, then we need to invalidate
522 any passed-in computed hash value. */
523 hash = 0;
524 }
525 }
526 }
527
528 /* flags might have HVhek_NOTSHARED set. If so, we need to ignore that.
529 Some callers to hv_common() pass the flags value from an existing HEK,
530 and if that HEK is not shared, then it has the relevant flag bit set,
531 which must not be passed into share_hek_flags().
532
533 It would be "purer" to insist that all callers clear it, but we'll end up
534 with subtle bugs if we leave it to them, or runtime assertion failures if
535 we try to enforce our documentation with landmines.
536
537 If keysv is true, all code paths assign a new value to flags with that
538 bit clear, so we're always "good". Hence we only need to explicitly clear
539 this bit in the else block. */
540 if (keysv) {
541 if (flags & HVhek_FREEKEY)
542 Safefree(key);
543 key = SvPV_const(keysv, klen);
544 is_utf8 = (SvUTF8(keysv) != 0);
545 if (SvIsCOW_shared_hash(keysv)) {
546 flags = HVhek_KEYCANONICAL | (is_utf8 ? HVhek_UTF8 : 0);
547 } else {
548 flags = 0;
549 }
550 } else {
551 is_utf8 = cBOOL(flags & HVhek_UTF8);
552 flags &= ~HVhek_NOTSHARED;
553 }
554
555 if (action & HV_DELETE) {
556 return (void *) hv_delete_common(hv, keysv, key, klen,
557 flags | (is_utf8 ? HVhek_UTF8 : 0),
558 action, hash);
559 }
560
561 xhv = (XPVHV*)SvANY(hv);
562 if (SvMAGICAL(hv)) {
563 if (SvRMAGICAL(hv) && !(action & (HV_FETCH_ISSTORE|HV_FETCH_ISEXISTS))) {
564 if (mg_find((const SV *)hv, PERL_MAGIC_tied)
565 || SvGMAGICAL((const SV *)hv))
566 {
567 /* FIXME should be able to skimp on the HE/HEK here when
568 HV_FETCH_JUST_SV is true. */
569 if (!keysv) {
570 keysv = newSVpvn_utf8(key, klen, is_utf8);
571 } else {
572 keysv = newSVsv(keysv);
573 }
574 sv = sv_newmortal();
575 mg_copy(MUTABLE_SV(hv), sv, (char *)keysv, HEf_SVKEY);
576
577 /* grab a fake HE/HEK pair from the pool or make a new one */
578 entry = PL_hv_fetch_ent_mh;
579 if (entry)
580 PL_hv_fetch_ent_mh = HeNEXT(entry);
581 else {
582 char *k;
583 entry = new_HE();
584 Newx(k, HEK_BASESIZE + sizeof(const SV *), char);
585 HeKEY_hek(entry) = (HEK*)k;
586 }
587 HeNEXT(entry) = NULL;
588 HeSVKEY_set(entry, keysv);
589 HeVAL(entry) = sv;
590 sv_upgrade(sv, SVt_PVLV);
591 LvTYPE(sv) = 'T';
592 /* so we can free entry when freeing sv */
593 LvTARG(sv) = MUTABLE_SV(entry);
594
595 /* XXX remove at some point? */
596 if (flags & HVhek_FREEKEY)
597 Safefree(key);
598
599 if (return_svp) {
600 return entry ? (void *) &HeVAL(entry) : NULL;
601 }
602 return (void *) entry;
603 }
604 #ifdef ENV_IS_CASELESS
605 else if (mg_find((const SV *)hv, PERL_MAGIC_env)) {
606 U32 i;
607 for (i = 0; i < klen; ++i)
608 if (isLOWER(key[i])) {
609 /* Would be nice if we had a routine to do the
610 copy and uppercase in a single pass through. */
611 const char * const nkey = strupr(savepvn(key,klen));
612 /* Note that this fetch is for nkey (the uppercased
613 key) whereas the store is for key (the original) */
614 void *result = hv_common(hv, NULL, nkey, klen,
615 HVhek_FREEKEY, /* free nkey */
616 0 /* non-LVAL fetch */
617 | HV_DISABLE_UVAR_XKEY
618 | return_svp,
619 NULL /* no value */,
620 0 /* compute hash */);
621 if (!result && (action & HV_FETCH_LVALUE)) {
622 /* This call will free key if necessary.
623 Do it this way to encourage compiler to tail
624 call optimise. */
625 result = hv_common(hv, keysv, key, klen, flags,
626 HV_FETCH_ISSTORE
627 | HV_DISABLE_UVAR_XKEY
628 | return_svp,
629 newSV_type(SVt_NULL), hash);
630 } else {
631 if (flags & HVhek_FREEKEY)
632 Safefree(key);
633 }
634 return result;
635 }
636 }
637 #endif
638 } /* ISFETCH */
639 else if (SvRMAGICAL(hv) && (action & HV_FETCH_ISEXISTS)) {
640 if (mg_find((const SV *)hv, PERL_MAGIC_tied)
641 || SvGMAGICAL((const SV *)hv)) {
642 /* I don't understand why hv_exists_ent has svret and sv,
643 whereas hv_exists only had one. */
644 SV * const svret = sv_newmortal();
645 sv = sv_newmortal();
646
647 if (keysv || is_utf8) {
648 if (!keysv) {
649 keysv = newSVpvn_utf8(key, klen, TRUE);
650 } else {
651 keysv = newSVsv(keysv);
652 }
653 mg_copy(MUTABLE_SV(hv), sv, (char *)sv_2mortal(keysv), HEf_SVKEY);
654 } else {
655 mg_copy(MUTABLE_SV(hv), sv, key, klen);
656 }
657 if (flags & HVhek_FREEKEY)
658 Safefree(key);
659 {
660 MAGIC * const mg = mg_find(sv, PERL_MAGIC_tiedelem);
661 if (mg)
662 magic_existspack(svret, mg);
663 }
664 /* This cast somewhat evil, but I'm merely using NULL/
665 not NULL to return the boolean exists.
666 And I know hv is not NULL. */
667 return SvTRUE_NN(svret) ? (void *)hv : NULL;
668 }
669 #ifdef ENV_IS_CASELESS
670 else if (mg_find((const SV *)hv, PERL_MAGIC_env)) {
671 /* XXX This code isn't UTF8 clean. */
672 char * const keysave = (char * const)key;
673 /* Will need to free this, so set FREEKEY flag. */
674 key = savepvn(key,klen);
675 key = (const char*)strupr((char*)key);
676 is_utf8 = FALSE;
677 hash = 0;
678 keysv = 0;
679
680 if (flags & HVhek_FREEKEY) {
681 Safefree(keysave);
682 }
683 flags |= HVhek_FREEKEY;
684 }
685 #endif
686 } /* ISEXISTS */
687 else if (action & HV_FETCH_ISSTORE) {
688 bool needs_copy;
689 bool needs_store;
690 hv_magic_check (hv, &needs_copy, &needs_store);
691 if (needs_copy) {
692 const bool save_taint = TAINT_get;
693 if (keysv || is_utf8) {
694 if (!keysv) {
695 keysv = newSVpvn_utf8(key, klen, TRUE);
696 }
697 if (TAINTING_get)
698 TAINT_set(SvTAINTED(keysv));
699 keysv = sv_2mortal(newSVsv(keysv));
700 mg_copy(MUTABLE_SV(hv), val, (char*)keysv, HEf_SVKEY);
701 } else {
702 mg_copy(MUTABLE_SV(hv), val, key, klen);
703 }
704
705 TAINT_IF(save_taint);
706 #ifdef NO_TAINT_SUPPORT
707 PERL_UNUSED_VAR(save_taint);
708 #endif
709 if (!needs_store) {
710 if (flags & HVhek_FREEKEY)
711 Safefree(key);
712 return NULL;
713 }
714 #ifdef ENV_IS_CASELESS
715 else if (mg_find((const SV *)hv, PERL_MAGIC_env)) {
716 /* XXX This code isn't UTF8 clean. */
717 const char *keysave = key;
718 /* Will need to free this, so set FREEKEY flag. */
719 key = savepvn(key,klen);
720 key = (const char*)strupr((char*)key);
721 is_utf8 = FALSE;
722 hash = 0;
723 keysv = 0;
724
725 if (flags & HVhek_FREEKEY) {
726 Safefree(keysave);
727 }
728 flags |= HVhek_FREEKEY;
729 }
730 #endif
731 }
732 } /* ISSTORE */
733 } /* SvMAGICAL */
734
735 if (!HvARRAY(hv)) {
736 if ((action & (HV_FETCH_LVALUE | HV_FETCH_ISSTORE))
737 #ifdef DYNAMIC_ENV_FETCH /* if it's an %ENV lookup, we may get it on the fly */
738 || (SvRMAGICAL((const SV *)hv)
739 && mg_find((const SV *)hv, PERL_MAGIC_env))
740 #endif
741 ) {
742 char *array;
743 Newxz(array,
744 PERL_HV_ARRAY_ALLOC_BYTES(xhv->xhv_max+1 /* HvMAX(hv)+1 */),
745 char);
746 HvARRAY(hv) = (HE**)array;
747 }
748 #ifdef DYNAMIC_ENV_FETCH
749 else if (action & HV_FETCH_ISEXISTS) {
750 /* for an %ENV exists, if we do an insert it's by a recursive
751 store call, so avoid creating HvARRAY(hv) right now. */
752 }
753 #endif
754 else {
755 /* XXX remove at some point? */
756 if (flags & HVhek_FREEKEY)
757 Safefree(key);
758
759 return NULL;
760 }
761 }
762
763 if (is_utf8 && !(flags & HVhek_KEYCANONICAL)) {
764 char * const keysave = (char *)key;
765 key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
766 if (is_utf8)
767 flags |= HVhek_UTF8;
768 else
769 flags &= ~HVhek_UTF8;
770 if (key != keysave) {
771 if (flags & HVhek_FREEKEY)
772 Safefree(keysave);
773 flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
774 /* If the caller calculated a hash, it was on the sequence of
775 octets that are the UTF-8 form. We've now changed the sequence
776 of octets stored to that of the equivalent byte representation,
777 so the hash we need is different. */
778 hash = 0;
779 }
780 }
781
782 if (keysv && (SvIsCOW_shared_hash(keysv))) {
783 if (HvSHAREKEYS(hv))
784 keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv));
785 hash = SvSHARED_HASH(keysv);
786 }
787 else if (!hash)
788 PERL_HASH(hash, key, klen);
789
790 #ifdef DYNAMIC_ENV_FETCH
791 if (!HvARRAY(hv)) entry = NULL;
792 else
793 #endif
794 {
795 entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)];
796 }
797
798 if (!entry)
799 goto not_found;
800
801 if (keysv_hek) {
802 /* keysv is actually a HEK in disguise, so we can match just by
803 * comparing the HEK pointers in the HE chain. There is a slight
804 * caveat: on something like "\x80", which has both plain and utf8
805 * representations, perl's hashes do encoding-insensitive lookups,
806 * but preserve the encoding of the stored key. Thus a particular
807 * key could map to two different HEKs in PL_strtab. We only
808 * conclude 'not found' if all the flags are the same; otherwise
809 * we fall back to a full search (this should only happen in rare
810 * cases).
811 */
812 int keysv_flags = HEK_FLAGS(keysv_hek);
813 HE *orig_entry = entry;
814
815 for (; entry; entry = HeNEXT(entry)) {
816 HEK *hek = HeKEY_hek(entry);
817 if (hek == keysv_hek)
818 goto found;
819 if (HEK_FLAGS(hek) != keysv_flags)
820 break; /* need to do full match */
821 }
822 if (!entry)
823 goto not_found;
824 /* failed on shortcut - do full search loop */
825 entry = orig_entry;
826 }
827
828 for (; entry; entry = HeNEXT(entry)) {
829 if (HeHASH(entry) != hash) /* strings can't be equal */
830 continue;
831 if (HeKLEN(entry) != (I32)klen)
832 continue;
833 if (memNE(HeKEY(entry),key,klen)) /* is this it? */
834 continue;
835 if ((HeKFLAGS(entry) ^ flags) & HVhek_UTF8)
836 continue;
837
838 found:
839 if (action & (HV_FETCH_LVALUE|HV_FETCH_ISSTORE)) {
840 if ((HeKFLAGS(entry) ^ flags) & HVhek_WASUTF8) {
841 /* We match if HVhek_UTF8 bit in our flags and hash key's
842 match. But if entry was set previously with HVhek_WASUTF8
843 and key now doesn't (or vice versa) then we should change
844 the key's flag, as this is assignment. */
845 if ((HeKFLAGS(entry) & HVhek_NOTSHARED) == 0) {
846 /* Need to swap the key we have for a key with the flags we
847 need. As keys are shared we can't just write to the
848 flag, so we share the new one, unshare the old one. */
849 HEK * const new_hek
850 = share_hek_flags(key, klen, hash, flags & ~HVhek_FREEKEY);
851 unshare_hek (HeKEY_hek(entry));
852 HeKEY_hek(entry) = new_hek;
853 }
854 else if (hv == PL_strtab) {
855 /* PL_strtab is usually the only hash without HvSHAREKEYS,
856 so putting this test here is cheap */
857 if (flags & HVhek_FREEKEY)
858 Safefree(key);
859 Perl_croak(aTHX_ S_strtab_error,
860 action & HV_FETCH_LVALUE ? "fetch" : "store");
861 }
862 else {
863 /* Effectively this is save_hek_flags() for a new version
864 of the HEK and Safefree() of the old rolled together. */
865 HeKFLAGS(entry) ^= HVhek_WASUTF8;
866 }
867 if (flags & HVhek_ENABLEHVKFLAGS)
868 HvHASKFLAGS_on(hv);
869 }
870 if (HeVAL(entry) == &PL_sv_placeholder) {
871 /* yes, can store into placeholder slot */
872 if (action & HV_FETCH_LVALUE) {
873 if (SvMAGICAL(hv)) {
874 /* This preserves behaviour with the old hv_fetch
875 implementation which at this point would bail out
876 with a break; (at "if we find a placeholder, we
877 pretend we haven't found anything")
878
879 That break mean that if a placeholder were found, it
880 caused a call into hv_store, which in turn would
881 check magic, and if there is no magic end up pretty
882 much back at this point (in hv_store's code). */
883 break;
884 }
885 /* LVAL fetch which actually needs a store. */
886 val = newSV_type(SVt_NULL);
887 HvPLACEHOLDERS(hv)--;
888 } else {
889 /* store */
890 if (val != &PL_sv_placeholder)
891 HvPLACEHOLDERS(hv)--;
892 }
893 HeVAL(entry) = val;
894 } else if (action & HV_FETCH_ISSTORE) {
895 SvREFCNT_dec(HeVAL(entry));
896 HeVAL(entry) = val;
897 }
898 } else if (HeVAL(entry) == &PL_sv_placeholder) {
899 /* if we find a placeholder, we pretend we haven't found
900 anything */
901 break;
902 }
903 if (flags & HVhek_FREEKEY)
904 Safefree(key);
905 if (return_svp) {
906 return (void *) &HeVAL(entry);
907 }
908 return entry;
909 }
910
911 not_found:
912 #ifdef DYNAMIC_ENV_FETCH /* %ENV lookup? If so, try to fetch the value now */
913 if (!(action & HV_FETCH_ISSTORE)
914 && SvRMAGICAL((const SV *)hv)
915 && mg_find((const SV *)hv, PERL_MAGIC_env)) {
916 unsigned long len;
917 const char * const env = PerlEnv_ENVgetenv_len(key,&len);
918 if (env) {
919 sv = newSVpvn(env,len);
920 SvTAINTED_on(sv);
921 return hv_common(hv, keysv, key, klen, flags,
922 HV_FETCH_ISSTORE|HV_DISABLE_UVAR_XKEY|return_svp,
923 sv, hash);
924 }
925 }
926 #endif
927
928 if (!entry && SvREADONLY(hv) && !(action & HV_FETCH_ISEXISTS)) {
929 hv_notallowed(flags, key, klen,
930 "Attempt to access disallowed key '%" SVf "' in"
931 " a restricted hash");
932 }
933 if (!(action & (HV_FETCH_LVALUE|HV_FETCH_ISSTORE))) {
934 /* Not doing some form of store, so return failure. */
935 if (flags & HVhek_FREEKEY)
936 Safefree(key);
937 return NULL;
938 }
939 if (action & HV_FETCH_LVALUE) {
940 val = action & HV_FETCH_EMPTY_HE ? NULL : newSV_type(SVt_NULL);
941 if (SvMAGICAL(hv)) {
942 /* At this point the old hv_fetch code would call to hv_store,
943 which in turn might do some tied magic. So we need to make that
944 magic check happen. */
945 /* gonna assign to this, so it better be there */
946 /* If a fetch-as-store fails on the fetch, then the action is to
947 recurse once into "hv_store". If we didn't do this, then that
948 recursive call would call the key conversion routine again.
949 However, as we replace the original key with the converted
950 key, this would result in a double conversion, which would show
951 up as a bug if the conversion routine is not idempotent.
952 Hence the use of HV_DISABLE_UVAR_XKEY. */
953 return hv_common(hv, keysv, key, klen, flags,
954 HV_FETCH_ISSTORE|HV_DISABLE_UVAR_XKEY|return_svp,
955 val, hash);
956 /* XXX Surely that could leak if the fetch-was-store fails?
957 Just like the hv_fetch. */
958 }
959 }
960
961 /* Welcome to hv_store... */
962
963 if (!HvARRAY(hv)) {
964 /* Not sure if we can get here. I think the only case of oentry being
965 NULL is for %ENV with dynamic env fetch. But that should disappear
966 with magic in the previous code. */
967 char *array;
968 Newxz(array,
969 PERL_HV_ARRAY_ALLOC_BYTES(xhv->xhv_max+1 /* HvMAX(hv)+1 */),
970 char);
971 HvARRAY(hv) = (HE**)array;
972 }
973
974 oentry = &(HvARRAY(hv))[hash & (I32) xhv->xhv_max];
975
976 /* share_hek_flags will do the free for us. This might be considered
977 bad API design. */
978 if (LIKELY(HvSHAREKEYS(hv))) {
979 entry = new_HE();
980 HeKEY_hek(entry) = share_hek_flags(key, klen, hash, flags);
981 }
982 else if (UNLIKELY(hv == PL_strtab)) {
983 /* PL_strtab is usually the only hash without HvSHAREKEYS, so putting
984 this test here is cheap */
985 if (flags & HVhek_FREEKEY)
986 Safefree(key);
987 Perl_croak(aTHX_ S_strtab_error,
988 action & HV_FETCH_LVALUE ? "fetch" : "store");
989 }
990 else {
991 /* gotta do the real thing */
992 entry = new_HE();
993 HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags);
994 }
995 HeVAL(entry) = val;
996 in_collision = cBOOL(*oentry != NULL);
997
998
999 #ifdef PERL_HASH_RANDOMIZE_KEYS
1000 /* This logic semi-randomizes the insert order in a bucket.
1001 * Either we insert into the top, or the slot below the top,
1002 * making it harder to see if there is a collision. We also
1003 * reset the iterator randomizer if there is one.
1004 */
1005
1006
1007 if ( *oentry && PL_HASH_RAND_BITS_ENABLED) {
1008 UPDATE_HASH_RAND_BITS_KEY(key,klen);
1009 if ( PL_hash_rand_bits & 1 ) {
1010 HeNEXT(entry) = HeNEXT(*oentry);
1011 HeNEXT(*oentry) = entry;
1012 } else {
1013 HeNEXT(entry) = *oentry;
1014 *oentry = entry;
1015 }
1016 } else
1017 #endif
1018 {
1019 HeNEXT(entry) = *oentry;
1020 *oentry = entry;
1021 }
1022 #ifdef PERL_HASH_RANDOMIZE_KEYS
1023 if (HvHasAUX(hv)) {
1024 /* Currently this makes various tests warn in annoying ways.
1025 * So Silenced for now. - Yves | bogus end of comment =>* /
1026 if (HvAUX(hv)->xhv_riter != -1) {
1027 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
1028 "[TESTING] Inserting into a hash during each() traversal results in undefined behavior"
1029 pTHX__FORMAT
1030 pTHX__VALUE);
1031 }
1032 */
1033 MAYBE_UPDATE_HASH_RAND_BITS_KEY(key,klen);
1034 HvAUX(hv)->xhv_rand= (U32)PL_hash_rand_bits;
1035 }
1036 #endif
1037
1038 if (val == &PL_sv_placeholder)
1039 HvPLACEHOLDERS(hv)++;
1040 if (flags & HVhek_ENABLEHVKFLAGS)
1041 HvHASKFLAGS_on(hv);
1042
1043 xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
1044 if ( in_collision && DO_HSPLIT(xhv) ) {
1045 const STRLEN oldsize = xhv->xhv_max + 1;
1046 const U32 items = (U32)HvPLACEHOLDERS_get(hv);
1047
1048 if (items /* hash has placeholders */
1049 && !SvREADONLY(hv) /* but is not a restricted hash */) {
1050 /* If this hash previously was a "restricted hash" and had
1051 placeholders, but the "restricted" flag has been turned off,
1052 then the placeholders no longer serve any useful purpose.
1053 However, they have the downsides of taking up RAM, and adding
1054 extra steps when finding used values. It's safe to clear them
1055 at this point, even though Storable rebuilds restricted hashes by
1056 putting in all the placeholders (first) before turning on the
1057 readonly flag, because Storable always pre-splits the hash.
1058 If we're lucky, then we may clear sufficient placeholders to
1059 avoid needing to split the hash at all. */
1060 clear_placeholders(hv, items);
1061 if (DO_HSPLIT(xhv))
1062 hsplit(hv, oldsize, oldsize * 2);
1063 } else
1064 hsplit(hv, oldsize, oldsize * 2);
1065 }
1066
1067 if (return_svp) {
1068 return entry ? (void *) &HeVAL(entry) : NULL;
1069 }
1070 return (void *) entry;
1071 }
1072
1073 STATIC void
S_hv_magic_check(HV * hv,bool * needs_copy,bool * needs_store)1074 S_hv_magic_check(HV *hv, bool *needs_copy, bool *needs_store)
1075 {
1076 const MAGIC *mg = SvMAGIC(hv);
1077
1078 PERL_ARGS_ASSERT_HV_MAGIC_CHECK;
1079
1080 *needs_copy = FALSE;
1081 *needs_store = TRUE;
1082 while (mg) {
1083 if (isUPPER(mg->mg_type)) {
1084 *needs_copy = TRUE;
1085 if (mg->mg_type == PERL_MAGIC_tied) {
1086 *needs_store = FALSE;
1087 return; /* We've set all there is to set. */
1088 }
1089 }
1090 mg = mg->mg_moremagic;
1091 }
1092 }
1093
1094 /*
1095 =for apidoc hv_scalar
1096
1097 Evaluates the hash in scalar context and returns the result.
1098
1099 When the hash is tied dispatches through to the SCALAR method,
1100 otherwise returns a mortal SV containing the number of keys
1101 in the hash.
1102
1103 Note, prior to 5.25 this function returned what is now
1104 returned by the hv_bucket_ratio() function.
1105
1106 =cut
1107 */
1108
1109 SV *
Perl_hv_scalar(pTHX_ HV * hv)1110 Perl_hv_scalar(pTHX_ HV *hv)
1111 {
1112 SV *sv;
1113 UV u;
1114
1115 PERL_ARGS_ASSERT_HV_SCALAR;
1116
1117 if (SvRMAGICAL(hv)) {
1118 MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_tied);
1119 if (mg)
1120 return magic_scalarpack(hv, mg);
1121 }
1122
1123 sv = newSV_type_mortal(SVt_IV);
1124
1125 /* Inlined sv_setuv(sv, HvUSEDKEYS(hv)) follows:*/
1126 u = HvUSEDKEYS(hv);
1127
1128 if (u <= (UV)IV_MAX) {
1129 SvIV_set(sv, (IV)u);
1130 (void)SvIOK_only(sv);
1131 SvTAINT(sv);
1132 } else {
1133 SvIV_set(sv, 0);
1134 SvUV_set(sv, u);
1135 (void)SvIOK_only_UV(sv);
1136 SvTAINT(sv);
1137 }
1138
1139 return sv;
1140 }
1141
1142
1143 /*
1144 hv_pushkv(): push all the keys and/or values of a hash onto the stack.
1145 The rough Perl equivalents:
1146 () = %hash;
1147 () = keys %hash;
1148 () = values %hash;
1149
1150 Resets the hash's iterator.
1151
1152 flags : 1 = push keys
1153 2 = push values
1154 1|2 = push keys and values
1155 XXX use symbolic flag constants at some point?
1156 I might unroll the non-tied hv_iternext() in here at some point - DAPM
1157 */
1158
1159 void
Perl_hv_pushkv(pTHX_ HV * hv,U32 flags)1160 Perl_hv_pushkv(pTHX_ HV *hv, U32 flags)
1161 {
1162 HE *entry;
1163 bool tied = SvRMAGICAL(hv) && (mg_find(MUTABLE_SV(hv), PERL_MAGIC_tied)
1164 #ifdef DYNAMIC_ENV_FETCH /* might not know number of keys yet */
1165 || mg_find(MUTABLE_SV(hv), PERL_MAGIC_env)
1166 #endif
1167 );
1168 PERL_ARGS_ASSERT_HV_PUSHKV;
1169 assert(flags); /* must be pushing at least one of keys and values */
1170
1171 (void)hv_iterinit(hv);
1172
1173 if (tied) {
1174 SSize_t ext = (flags == 3) ? 2 : 1;
1175 while ((entry = hv_iternext(hv))) {
1176 rpp_extend(ext);
1177 if (flags & 1)
1178 rpp_push_1(hv_iterkeysv(entry));
1179 if (flags & 2)
1180 rpp_push_1(hv_iterval(hv, entry));
1181 }
1182 }
1183 else {
1184 Size_t nkeys = HvUSEDKEYS(hv);
1185 SSize_t ext;
1186
1187 if (!nkeys)
1188 return;
1189
1190 /* 2*nkeys() should never be big enough to truncate or wrap */
1191 assert(nkeys <= (SSize_t_MAX >> 1));
1192 ext = nkeys * ((flags == 3) ? 2 : 1);
1193
1194 EXTEND_MORTAL(nkeys);
1195 rpp_extend(ext);
1196
1197 while ((entry = hv_iternext(hv))) {
1198 if (flags & 1) {
1199 SV *keysv = newSVhek(HeKEY_hek(entry));
1200 SvTEMP_on(keysv);
1201 PL_tmps_stack[++PL_tmps_ix] = keysv;
1202 rpp_push_1(keysv);
1203 }
1204 if (flags & 2)
1205 rpp_push_1(HeVAL(entry));
1206 }
1207 }
1208 }
1209
1210
1211 /*
1212 =for apidoc hv_bucket_ratio
1213
1214 If the hash is tied dispatches through to the SCALAR tied method,
1215 otherwise if the hash contains no keys returns 0, otherwise returns
1216 a mortal sv containing a string specifying the number of used buckets,
1217 followed by a slash, followed by the number of available buckets.
1218
1219 This function is expensive, it must scan all of the buckets
1220 to determine which are used, and the count is NOT cached.
1221 In a large hash this could be a lot of buckets.
1222
1223 =cut
1224 */
1225
1226 SV *
Perl_hv_bucket_ratio(pTHX_ HV * hv)1227 Perl_hv_bucket_ratio(pTHX_ HV *hv)
1228 {
1229 SV *sv;
1230
1231 PERL_ARGS_ASSERT_HV_BUCKET_RATIO;
1232
1233 if (SvRMAGICAL(hv)) {
1234 MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_tied);
1235 if (mg)
1236 return magic_scalarpack(hv, mg);
1237 }
1238
1239 if (HvUSEDKEYS((HV *)hv)) {
1240 sv = sv_newmortal();
1241 Perl_sv_setpvf(aTHX_ sv, "%ld/%ld",
1242 (long)HvFILL(hv), (long)HvMAX(hv) + 1);
1243 }
1244 else
1245 sv = &PL_sv_zero;
1246
1247 return sv;
1248 }
1249
1250 /*
1251 =for apidoc hv_delete
1252 =for apidoc_item ||hv_deletes|HV *hv|"key"|U32 flags
1253
1254 These delete a key/value pair in the hash. The value's SV is removed from
1255 the hash, made mortal, and returned to the caller.
1256
1257 In C<hv_deletes>, the key must be a C language string literal, enclosed in
1258 double quotes. It is never treated as being in UTF-8. There is no
1259 length_parameter.
1260
1261 In C<hv_delete>, the absolute value of C<klen> is the length of the key. If
1262 C<klen> is negative the key is assumed to be in UTF-8-encoded Unicode.
1263
1264 In both, the C<flags> value will normally be zero; if set to C<G_DISCARD> then
1265 C<NULL> will be returned. C<NULL> will also be returned if the key is not
1266 found.
1267
1268 =for apidoc hv_delete_ent
1269
1270 Deletes a key/value pair in the hash. The value SV is removed from the hash,
1271 made mortal, and returned to the caller. The C<flags> value will normally be
1272 zero; if set to C<G_DISCARD> then C<NULL> will be returned. C<NULL> will also
1273 be returned if the key is not found. C<hash> can be a valid precomputed hash
1274 value, or 0 to ask for it to be computed.
1275
1276 =cut
1277 */
1278
1279 STATIC SV *
S_hv_delete_common(pTHX_ HV * hv,SV * keysv,const char * key,STRLEN klen,int k_flags,I32 d_flags,U32 hash)1280 S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
1281 int k_flags, I32 d_flags, U32 hash)
1282 {
1283 XPVHV* xhv;
1284 HE *entry;
1285 HE **oentry;
1286 HE **first_entry;
1287 bool is_utf8 = cBOOL(k_flags & HVhek_UTF8);
1288 HEK *keysv_hek = NULL;
1289 U8 mro_changes = 0; /* 1 = isa; 2 = package moved */
1290 SV *sv;
1291 GV *gv = NULL;
1292 HV *stash = NULL;
1293
1294 if (SvMAGICAL(hv)) {
1295 bool needs_copy;
1296 bool needs_store;
1297 hv_magic_check (hv, &needs_copy, &needs_store);
1298
1299 if (needs_copy) {
1300 SV *sv;
1301 entry = (HE *) hv_common(hv, keysv, key, klen,
1302 k_flags & ~HVhek_FREEKEY,
1303 HV_FETCH_LVALUE|HV_DISABLE_UVAR_XKEY,
1304 NULL, hash);
1305 sv = entry ? HeVAL(entry) : NULL;
1306 if (sv) {
1307 if (SvMAGICAL(sv)) {
1308 mg_clear(sv);
1309 }
1310 if (!needs_store) {
1311 if (mg_find(sv, PERL_MAGIC_tiedelem)) {
1312 /* No longer an element */
1313 sv_unmagic(sv, PERL_MAGIC_tiedelem);
1314 return sv;
1315 }
1316 return NULL; /* element cannot be deleted */
1317 }
1318 #ifdef ENV_IS_CASELESS
1319 else if (mg_find((const SV *)hv, PERL_MAGIC_env)) {
1320 /* XXX This code isn't UTF8 clean. */
1321 keysv = newSVpvn_flags(key, klen, SVs_TEMP);
1322 if (k_flags & HVhek_FREEKEY) {
1323 Safefree(key);
1324 }
1325 key = strupr(SvPVX(keysv));
1326 is_utf8 = 0;
1327 k_flags = 0;
1328 hash = 0;
1329 }
1330 #endif
1331 }
1332 }
1333 }
1334 xhv = (XPVHV*)SvANY(hv);
1335 if (!HvTOTALKEYS(hv))
1336 return NULL;
1337
1338 if (is_utf8 && !(k_flags & HVhek_KEYCANONICAL)) {
1339 const char * const keysave = key;
1340 key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
1341
1342 if (is_utf8)
1343 k_flags |= HVhek_UTF8;
1344 else
1345 k_flags &= ~HVhek_UTF8;
1346 if (key != keysave) {
1347 if (k_flags & HVhek_FREEKEY) {
1348 /* This shouldn't happen if our caller does what we expect,
1349 but strictly the API allows it. */
1350 Safefree(keysave);
1351 }
1352 k_flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
1353 }
1354 }
1355
1356 if (keysv && (SvIsCOW_shared_hash(keysv))) {
1357 if (HvSHAREKEYS(hv))
1358 keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv));
1359 hash = SvSHARED_HASH(keysv);
1360 }
1361 else if (!hash)
1362 PERL_HASH(hash, key, klen);
1363
1364 first_entry = oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)];
1365 entry = *oentry;
1366
1367 if (!entry)
1368 goto not_found;
1369
1370 if (keysv_hek) {
1371 /* keysv is actually a HEK in disguise, so we can match just by
1372 * comparing the HEK pointers in the HE chain. There is a slight
1373 * caveat: on something like "\x80", which has both plain and utf8
1374 * representations, perl's hashes do encoding-insensitive lookups,
1375 * but preserve the encoding of the stored key. Thus a particular
1376 * key could map to two different HEKs in PL_strtab. We only
1377 * conclude 'not found' if all the flags are the same; otherwise
1378 * we fall back to a full search (this should only happen in rare
1379 * cases).
1380 */
1381 int keysv_flags = HEK_FLAGS(keysv_hek);
1382
1383 for (; entry; oentry = &HeNEXT(entry), entry = *oentry) {
1384 HEK *hek = HeKEY_hek(entry);
1385 if (hek == keysv_hek)
1386 goto found;
1387 if (HEK_FLAGS(hek) != keysv_flags)
1388 break; /* need to do full match */
1389 }
1390 if (!entry)
1391 goto not_found;
1392 /* failed on shortcut - do full search loop */
1393 oentry = first_entry;
1394 entry = *oentry;
1395 }
1396
1397 for (; entry; oentry = &HeNEXT(entry), entry = *oentry) {
1398 if (HeHASH(entry) != hash) /* strings can't be equal */
1399 continue;
1400 if (HeKLEN(entry) != (I32)klen)
1401 continue;
1402 if (memNE(HeKEY(entry),key,klen)) /* is this it? */
1403 continue;
1404 if ((HeKFLAGS(entry) ^ k_flags) & HVhek_UTF8)
1405 continue;
1406
1407 found:
1408 if (hv == PL_strtab) {
1409 if (k_flags & HVhek_FREEKEY)
1410 Safefree(key);
1411 Perl_croak(aTHX_ S_strtab_error, "delete");
1412 }
1413
1414 sv = HeVAL(entry);
1415
1416 /* if placeholder is here, it's already been deleted.... */
1417 if (sv == &PL_sv_placeholder) {
1418 if (k_flags & HVhek_FREEKEY)
1419 Safefree(key);
1420 return NULL;
1421 }
1422 if (SvREADONLY(hv) && sv && SvREADONLY(sv)) {
1423 hv_notallowed(k_flags, key, klen,
1424 "Attempt to delete readonly key '%" SVf "' from"
1425 " a restricted hash");
1426 }
1427
1428 /*
1429 * If a restricted hash, rather than really deleting the entry, put
1430 * a placeholder there. This marks the key as being "approved", so
1431 * we can still access via not-really-existing key without raising
1432 * an error.
1433 */
1434 if (SvREADONLY(hv)) {
1435 /* We'll be saving this slot, so the number of allocated keys
1436 * doesn't go down, but the number placeholders goes up */
1437 HeVAL(entry) = &PL_sv_placeholder;
1438 HvPLACEHOLDERS(hv)++;
1439 }
1440 else {
1441 HeVAL(entry) = NULL;
1442 *oentry = HeNEXT(entry);
1443 if (HvHasAUX(hv) && entry == HvAUX(hv)->xhv_eiter /* HvEITER(hv) */) {
1444 HvLAZYDEL_on(hv);
1445 }
1446 else {
1447 if (HvHasAUX(hv) && HvLAZYDEL(hv) &&
1448 entry == HeNEXT(HvAUX(hv)->xhv_eiter))
1449 HeNEXT(HvAUX(hv)->xhv_eiter) = HeNEXT(entry);
1450 hv_free_ent(NULL, entry);
1451 }
1452 xhv->xhv_keys--; /* HvTOTALKEYS(hv)-- */
1453 if (xhv->xhv_keys == 0)
1454 HvHASKFLAGS_off(hv);
1455 }
1456
1457 /* If this is a stash and the key ends with ::, then someone is
1458 * deleting a package.
1459 */
1460 if (sv && SvTYPE(sv) == SVt_PVGV && HvHasENAME(hv)) {
1461 gv = (GV *)sv;
1462 if ((
1463 (klen > 1 && key[klen-2] == ':' && key[klen-1] == ':')
1464 ||
1465 (klen == 1 && key[0] == ':')
1466 )
1467 && (klen != 6 || hv!=PL_defstash || memNE(key,"main::",6))
1468 && (stash = GvHV((GV *)gv))
1469 && HvHasENAME(stash)) {
1470 /* A previous version of this code checked that the
1471 * GV was still in the symbol table by fetching the
1472 * GV with its name. That is not necessary (and
1473 * sometimes incorrect), as HvENAME cannot be set
1474 * on hv if it is not in the symtab. */
1475 mro_changes = 2;
1476 /* Hang on to it for a bit. */
1477 SvREFCNT_inc_simple_void_NN(
1478 sv_2mortal((SV *)gv)
1479 );
1480 }
1481 else if (memEQs(key, klen, "ISA") && GvAV(gv)) {
1482 AV *isa = GvAV(gv);
1483 MAGIC *mg = mg_find((SV*)isa, PERL_MAGIC_isa);
1484
1485 mro_changes = 1;
1486 if (mg) {
1487 if (mg->mg_obj == (SV*)gv) {
1488 /* This is the only stash this ISA was used for.
1489 * The isaelem magic asserts if there's no
1490 * isa magic on the array, so explicitly
1491 * remove the magic on both the array and its
1492 * elements. @ISA shouldn't be /too/ large.
1493 */
1494 SV **svp, **end;
1495 strip_magic:
1496 svp = AvARRAY(isa);
1497 if (svp) {
1498 end = svp + (AvFILLp(isa)+1);
1499 while (svp < end) {
1500 if (*svp)
1501 mg_free_type(*svp, PERL_MAGIC_isaelem);
1502 ++svp;
1503 }
1504 }
1505 mg_free_type((SV*)GvAV(gv), PERL_MAGIC_isa);
1506 }
1507 else {
1508 /* mg_obj is an array of stashes
1509 Note that the array doesn't keep a reference
1510 count on the stashes.
1511 */
1512 AV *av = (AV*)mg->mg_obj;
1513 SV **svp, **arrayp;
1514 SSize_t index;
1515 SSize_t items;
1516
1517 assert(SvTYPE(mg->mg_obj) == SVt_PVAV);
1518
1519 /* remove the stash from the magic array */
1520 arrayp = svp = AvARRAY(av);
1521 items = AvFILLp(av) + 1;
1522 if (items == 1) {
1523 assert(*arrayp == (SV *)gv);
1524 mg->mg_obj = NULL;
1525 /* avoid a double free on the last stash */
1526 AvFILLp(av) = -1;
1527 /* The magic isn't MGf_REFCOUNTED, so release
1528 * the array manually.
1529 */
1530 SvREFCNT_dec_NN(av);
1531 goto strip_magic;
1532 }
1533 else {
1534 while (items--) {
1535 if (*svp == (SV*)gv)
1536 break;
1537 ++svp;
1538 }
1539 index = svp - arrayp;
1540 assert(index >= 0 && index <= AvFILLp(av));
1541 if (index < AvFILLp(av)) {
1542 arrayp[index] = arrayp[AvFILLp(av)];
1543 }
1544 arrayp[AvFILLp(av)] = NULL;
1545 --AvFILLp(av);
1546 }
1547 }
1548 }
1549 }
1550 }
1551
1552 if (k_flags & HVhek_FREEKEY)
1553 Safefree(key);
1554
1555 if (sv) {
1556 /* deletion of method from stash */
1557 if (isGV(sv) && isGV_with_GP(sv) && GvCVu(sv)
1558 && HvHasENAME(hv))
1559 mro_method_changed_in(hv);
1560
1561 if (d_flags & G_DISCARD) {
1562 SvREFCNT_dec(sv);
1563 sv = NULL;
1564 }
1565 else {
1566 sv_2mortal(sv);
1567 }
1568 }
1569
1570 if (mro_changes == 1) mro_isa_changed_in(hv);
1571 else if (mro_changes == 2)
1572 mro_package_moved(NULL, stash, gv, 1);
1573
1574 return sv;
1575 }
1576
1577 not_found:
1578 if (SvREADONLY(hv)) {
1579 hv_notallowed(k_flags, key, klen,
1580 "Attempt to delete disallowed key '%" SVf "' from"
1581 " a restricted hash");
1582 }
1583
1584 if (k_flags & HVhek_FREEKEY)
1585 Safefree(key);
1586 return NULL;
1587 }
1588
1589 /* HVs are used for (at least) three things
1590 1) objects
1591 2) symbol tables
1592 3) associative arrays
1593
1594 shared hash keys benefit the first two greatly, because keys are likely
1595 to be re-used between objects, or for constants in the optree
1596
1597 However, for large associative arrays (lookup tables, "seen" hashes) keys are
1598 unlikely to be re-used. Hence having those keys in the shared string table as
1599 well as the hash is a memory hit, if they are never actually shared with a
1600 second hash. Hence we turn off shared hash keys if a (regular) hash gets
1601 large.
1602
1603 This is a heuristic. There might be a better answer than 42, but for now
1604 we'll use it.
1605
1606 NOTE: Configure with -Accflags='-DPERL_USE_UNSHARED_KEYS_IN_LARGE_HASHES'
1607 to enable this new functionality.
1608 */
1609
1610 #ifdef PERL_USE_UNSHARED_KEYS_IN_LARGE_HASHES
1611 static bool
S_large_hash_heuristic(pTHX_ HV * hv,STRLEN size)1612 S_large_hash_heuristic(pTHX_ HV *hv, STRLEN size) {
1613 if (size > 42
1614 && !SvOBJECT(hv)
1615 && !(HvHasAUX(hv) && HvENAME_get(hv))) {
1616 /* This hash appears to be growing quite large.
1617 We gamble that it is not sharing keys with other hashes. */
1618 return TRUE;
1619 }
1620 return FALSE;
1621 }
1622 #endif
1623
1624 STATIC void
S_hsplit(pTHX_ HV * hv,STRLEN const oldsize,STRLEN newsize)1625 S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize)
1626 {
1627 STRLEN i = 0;
1628 char *a = (char*) HvARRAY(hv);
1629 HE **aep;
1630
1631 PERL_ARGS_ASSERT_HSPLIT;
1632 if (newsize > MAX_BUCKET_MAX+1)
1633 return;
1634
1635 PL_nomemok = TRUE;
1636 Renew(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char);
1637 PL_nomemok = FALSE;
1638 if (!a) {
1639 return;
1640 }
1641
1642 #ifdef PERL_HASH_RANDOMIZE_KEYS
1643 /* the idea of this is that we create a "random" value by hashing the address of
1644 * the array, we then use the low bit to decide if we insert at the top, or insert
1645 * second from top. After each such insert we rotate the hashed value. So we can
1646 * use the same hashed value over and over, and in normal build environments use
1647 * very few ops to do so. ROTL32() should produce a single machine operation. */
1648 MAYBE_UPDATE_HASH_RAND_BITS();
1649 #endif
1650 HvARRAY(hv) = (HE**) a;
1651 HvMAX(hv) = newsize - 1;
1652 /* now we can safely clear the second half */
1653 Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); /* zero 2nd half*/
1654
1655 if (!HvTOTALKEYS(hv)) /* skip rest if no entries */
1656 return;
1657
1658 /* don't share keys in large simple hashes */
1659 if (LARGE_HASH_HEURISTIC(hv, HvTOTALKEYS(hv)))
1660 HvSHAREKEYS_off(hv);
1661
1662
1663 newsize--;
1664 aep = (HE**)a;
1665 do {
1666 HE **oentry = aep + i;
1667 HE *entry = aep[i];
1668
1669 if (!entry) /* non-existent */
1670 continue;
1671 do {
1672 U32 j = (HeHASH(entry) & newsize);
1673 if (j != (U32)i) {
1674 *oentry = HeNEXT(entry);
1675 #ifdef PERL_HASH_RANDOMIZE_KEYS
1676 /* if the target cell is empty or PL_HASH_RAND_BITS_ENABLED is false
1677 * insert to top, otherwise rotate the bucket rand 1 bit,
1678 * and use the new low bit to decide if we insert at top,
1679 * or next from top. IOW, we only rotate on a collision.*/
1680 if (aep[j] && PL_HASH_RAND_BITS_ENABLED) {
1681 UPDATE_HASH_RAND_BITS();
1682 if (PL_hash_rand_bits & 1) {
1683 HeNEXT(entry)= HeNEXT(aep[j]);
1684 HeNEXT(aep[j])= entry;
1685 } else {
1686 /* Note, this is structured in such a way as the optimizer
1687 * should eliminate the duplicated code here and below without
1688 * us needing to explicitly use a goto. */
1689 HeNEXT(entry) = aep[j];
1690 aep[j] = entry;
1691 }
1692 } else
1693 #endif
1694 {
1695 /* see comment above about duplicated code */
1696 HeNEXT(entry) = aep[j];
1697 aep[j] = entry;
1698 }
1699 }
1700 else {
1701 oentry = &HeNEXT(entry);
1702 }
1703 entry = *oentry;
1704 } while (entry);
1705 } while (i++ < oldsize);
1706 }
1707
1708 /*
1709 =for apidoc hv_ksplit
1710
1711 Attempt to grow the hash C<hv> so it has at least C<newmax> buckets available.
1712 Perl chooses the actual number for its convenience.
1713
1714 This is the same as doing the following in Perl code:
1715
1716 keys %hv = newmax;
1717
1718 =cut
1719 */
1720
1721 void
Perl_hv_ksplit(pTHX_ HV * hv,IV newmax)1722 Perl_hv_ksplit(pTHX_ HV *hv, IV newmax)
1723 {
1724 XPVHV* xhv = (XPVHV*)SvANY(hv);
1725 const I32 oldsize = (I32) xhv->xhv_max+1; /* HvMAX(hv)+1 */
1726 I32 newsize;
1727 I32 wantsize;
1728 I32 trysize;
1729 char *a;
1730
1731 PERL_ARGS_ASSERT_HV_KSPLIT;
1732
1733 wantsize = (I32) newmax; /* possible truncation here */
1734 if (wantsize != newmax)
1735 return;
1736
1737 wantsize= wantsize + (wantsize >> 1); /* wantsize *= 1.5 */
1738 if (wantsize < newmax) /* overflow detection */
1739 return;
1740
1741 newsize = oldsize;
1742 while (wantsize > newsize) {
1743 trysize = newsize << 1;
1744 if (trysize > newsize) {
1745 newsize = trysize;
1746 } else {
1747 /* we overflowed */
1748 return;
1749 }
1750 }
1751
1752 if (newsize <= oldsize)
1753 return; /* overflow detection */
1754
1755 a = (char *) HvARRAY(hv);
1756 if (a) {
1757 #ifdef PERL_HASH_RANDOMIZE_KEYS
1758 U32 was_ook = HvHasAUX(hv);
1759 #endif
1760 hsplit(hv, oldsize, newsize);
1761 #ifdef PERL_HASH_RANDOMIZE_KEYS
1762 if (was_ook && HvHasAUX(hv) && HvTOTALKEYS(hv)) {
1763 MAYBE_UPDATE_HASH_RAND_BITS();
1764 HvAUX(hv)->xhv_rand = (U32)PL_hash_rand_bits;
1765 }
1766 #endif
1767 } else {
1768 if (LARGE_HASH_HEURISTIC(hv, newmax))
1769 HvSHAREKEYS_off(hv);
1770 Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char);
1771 xhv->xhv_max = newsize - 1;
1772 HvARRAY(hv) = (HE **) a;
1773 }
1774 }
1775
1776 /* IMO this should also handle cases where hv_max is smaller than hv_keys
1777 * as tied hashes could play silly buggers and mess us around. We will
1778 * do the right thing during hv_store() afterwards, but still - Yves */
1779 #define HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys) STMT_START {\
1780 /* Can we use fewer buckets? (hv_max is always 2^n-1) */ \
1781 if (hv_max < PERL_HASH_DEFAULT_HvMAX) { \
1782 hv_max = PERL_HASH_DEFAULT_HvMAX; \
1783 } else { \
1784 while (hv_max > PERL_HASH_DEFAULT_HvMAX && hv_max + 1 >= hv_keys * 2) \
1785 hv_max = hv_max / 2; \
1786 } \
1787 HvMAX(hv) = hv_max; \
1788 } STMT_END
1789
1790
1791 /*
1792 =for apidoc newHVhv
1793
1794 The content of C<ohv> is copied to a new hash. A pointer to the new hash is
1795 returned.
1796
1797 =cut
1798 */
1799
1800 HV *
Perl_newHVhv(pTHX_ HV * ohv)1801 Perl_newHVhv(pTHX_ HV *ohv)
1802 {
1803 HV * const hv = newHV();
1804 STRLEN hv_max;
1805
1806 if (!ohv || (!HvTOTALKEYS(ohv) && !SvMAGICAL((const SV *)ohv)))
1807 return hv;
1808 hv_max = HvMAX(ohv);
1809
1810 if (!SvMAGICAL((const SV *)ohv)) {
1811 /* It's an ordinary hash, so copy it fast. AMS 20010804 */
1812 STRLEN i;
1813 HE **ents, ** const oents = (HE **)HvARRAY(ohv);
1814 char *a;
1815 Newx(a, PERL_HV_ARRAY_ALLOC_BYTES(hv_max+1), char);
1816 ents = (HE**)a;
1817
1818 if (HvSHAREKEYS(ohv)) {
1819 #ifdef NODEFAULT_SHAREKEYS
1820 HvSHAREKEYS_on(hv);
1821 #else
1822 /* Shared is the default - it should have been set by newHV(). */
1823 assert(HvSHAREKEYS(hv));
1824 #endif
1825 }
1826 else {
1827 HvSHAREKEYS_off(hv);
1828 }
1829
1830 /* In each bucket... */
1831 for (i = 0; i <= hv_max; i++) {
1832 HE *prev = NULL;
1833 HE *oent = oents[i];
1834
1835 if (!oent) {
1836 ents[i] = NULL;
1837 continue;
1838 }
1839
1840 /* Copy the linked list of entries. */
1841 for (; oent; oent = HeNEXT(oent)) {
1842 HE * const ent = new_HE();
1843 SV *const val = HeVAL(oent);
1844 const int flags = HeKFLAGS(oent);
1845
1846 HeVAL(ent) = SvIMMORTAL(val) ? val : newSVsv(val);
1847 if ((flags & HVhek_NOTSHARED) == 0) {
1848 HeKEY_hek(ent) = share_hek_hek(HeKEY_hek(oent));
1849 }
1850 else {
1851 const U32 hash = HeHASH(oent);
1852 const char * const key = HeKEY(oent);
1853 const STRLEN len = HeKLEN(oent);
1854 HeKEY_hek(ent) = save_hek_flags(key, len, hash, flags);
1855 }
1856 if (prev)
1857 HeNEXT(prev) = ent;
1858 else
1859 ents[i] = ent;
1860 prev = ent;
1861 HeNEXT(ent) = NULL;
1862 }
1863 }
1864
1865 HvMAX(hv) = hv_max;
1866 HvTOTALKEYS(hv) = HvTOTALKEYS(ohv);
1867 HvARRAY(hv) = ents;
1868 } /* not magical */
1869 else {
1870 /* Iterate over ohv, copying keys and values one at a time. */
1871 HE *entry;
1872 const I32 riter = HvRITER_get(ohv);
1873 HE * const eiter = HvEITER_get(ohv);
1874 STRLEN hv_keys = HvTOTALKEYS(ohv);
1875
1876 HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys);
1877
1878 hv_iterinit(ohv);
1879 while ((entry = hv_iternext_flags(ohv, 0))) {
1880 SV *val = hv_iterval(ohv,entry);
1881 SV * const keysv = HeSVKEY(entry);
1882 val = SvIMMORTAL(val) ? val : newSVsv(val);
1883 if (keysv)
1884 (void)hv_store_ent(hv, keysv, val, 0);
1885 else
1886 (void)hv_store_flags(hv, HeKEY(entry), HeKLEN(entry), val,
1887 HeHASH(entry), HeKFLAGS(entry));
1888 }
1889 HvRITER_set(ohv, riter);
1890 HvEITER_set(ohv, eiter);
1891 }
1892
1893 return hv;
1894 }
1895
1896 /*
1897 =for apidoc hv_copy_hints_hv
1898
1899 A specialised version of L</newHVhv> for copying C<%^H>. C<ohv> must be
1900 a pointer to a hash (which may have C<%^H> magic, but should be generally
1901 non-magical), or C<NULL> (interpreted as an empty hash). The content
1902 of C<ohv> is copied to a new hash, which has the C<%^H>-specific magic
1903 added to it. A pointer to the new hash is returned.
1904
1905 =cut
1906 */
1907
1908 HV *
Perl_hv_copy_hints_hv(pTHX_ HV * const ohv)1909 Perl_hv_copy_hints_hv(pTHX_ HV *const ohv)
1910 {
1911 HV * const hv = newHV();
1912
1913 if (ohv) {
1914 STRLEN hv_max = HvMAX(ohv);
1915 STRLEN hv_keys = HvTOTALKEYS(ohv);
1916 HE *entry;
1917 const I32 riter = HvRITER_get(ohv);
1918 HE * const eiter = HvEITER_get(ohv);
1919
1920 ENTER;
1921 SAVEFREESV(hv);
1922
1923 HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys);
1924
1925 hv_iterinit(ohv);
1926 while ((entry = hv_iternext_flags(ohv, 0))) {
1927 SV *const sv = newSVsv(hv_iterval(ohv,entry));
1928 SV *heksv = HeSVKEY(entry);
1929 if (!heksv && sv) heksv = newSVhek(HeKEY_hek(entry));
1930 if (sv) sv_magic(sv, NULL, PERL_MAGIC_hintselem,
1931 (char *)heksv, HEf_SVKEY);
1932 if (heksv == HeSVKEY(entry))
1933 (void)hv_store_ent(hv, heksv, sv, 0);
1934 else {
1935 (void)hv_common(hv, heksv, HeKEY(entry), HeKLEN(entry),
1936 HeKFLAGS(entry), HV_FETCH_ISSTORE|HV_FETCH_JUST_SV, sv, HeHASH(entry));
1937 SvREFCNT_dec_NN(heksv);
1938 }
1939 }
1940 HvRITER_set(ohv, riter);
1941 HvEITER_set(ohv, eiter);
1942
1943 SvREFCNT_inc_simple_void_NN(hv);
1944 LEAVE;
1945 }
1946 hv_magic(hv, NULL, PERL_MAGIC_hints);
1947 return hv;
1948 }
1949 #undef HV_SET_MAX_ADJUSTED_FOR_KEYS
1950
1951 /* like hv_free_ent, but returns the SV rather than freeing it */
1952 STATIC SV*
S_hv_free_ent_ret(pTHX_ HE * entry)1953 S_hv_free_ent_ret(pTHX_ HE *entry)
1954 {
1955 PERL_ARGS_ASSERT_HV_FREE_ENT_RET;
1956
1957 SV *val = HeVAL(entry);
1958 if (HeKLEN(entry) == HEf_SVKEY) {
1959 SvREFCNT_dec(HeKEY_sv(entry));
1960 Safefree(HeKEY_hek(entry));
1961 }
1962 else if ((HeKFLAGS(entry) & HVhek_NOTSHARED) == 0) {
1963 unshare_hek(HeKEY_hek(entry));
1964 }
1965 else {
1966 Safefree(HeKEY_hek(entry));
1967 }
1968 del_HE(entry);
1969 return val;
1970 }
1971
1972
1973 void
Perl_hv_free_ent(pTHX_ HV * notused,HE * entry)1974 Perl_hv_free_ent(pTHX_ HV *notused, HE *entry)
1975 {
1976 PERL_UNUSED_ARG(notused);
1977
1978 if (!entry)
1979 return;
1980
1981 SV *val = hv_free_ent_ret(entry);
1982 SvREFCNT_dec(val);
1983 }
1984
1985
1986 void
Perl_hv_delayfree_ent(pTHX_ HV * notused,HE * entry)1987 Perl_hv_delayfree_ent(pTHX_ HV *notused, HE *entry)
1988 {
1989 PERL_UNUSED_ARG(notused);
1990
1991 if (!entry)
1992 return;
1993 /* SvREFCNT_inc to counter the SvREFCNT_dec in hv_free_ent */
1994 sv_2mortal(SvREFCNT_inc(HeVAL(entry))); /* free between statements */
1995 if (HeKLEN(entry) == HEf_SVKEY) {
1996 sv_2mortal(SvREFCNT_inc(HeKEY_sv(entry)));
1997 }
1998 hv_free_ent(NULL, entry);
1999 }
2000
2001 /*
2002 =for apidoc hv_clear
2003
2004 Frees all the elements of a hash, leaving it empty.
2005 The XS equivalent of C<%hash = ()>. See also L</hv_undef>.
2006
2007 See L</av_clear> for a note about the hash possibly being invalid on
2008 return.
2009
2010 =cut
2011 */
2012
2013 void
Perl_hv_clear(pTHX_ HV * hv)2014 Perl_hv_clear(pTHX_ HV *hv)
2015 {
2016 SSize_t orig_ix;
2017
2018 if (!hv)
2019 return;
2020
2021 DEBUG_A(Perl_hv_assert(aTHX_ hv));
2022
2023 /* avoid hv being freed when calling destructors below */
2024 EXTEND_MORTAL(1);
2025 PL_tmps_stack[++PL_tmps_ix] = SvREFCNT_inc_simple_NN(hv);
2026 orig_ix = PL_tmps_ix;
2027 if (SvREADONLY(hv) && HvTOTALKEYS(hv)) {
2028 /* restricted hash: convert all keys to placeholders */
2029 STRLEN max = HvMAX(hv);
2030 STRLEN i;
2031 for (i = 0; i <= max; i++) {
2032 HE *entry = (HvARRAY(hv))[i];
2033 for (; entry; entry = HeNEXT(entry)) {
2034 /* not already placeholder */
2035 if (HeVAL(entry) != &PL_sv_placeholder) {
2036 if (HeVAL(entry)) {
2037 if (SvREADONLY(HeVAL(entry))) {
2038 SV* const keysv = hv_iterkeysv(entry);
2039 Perl_croak_nocontext(
2040 "Attempt to delete readonly key '%" SVf "' from a restricted hash",
2041 (void*)keysv);
2042 }
2043 SvREFCNT_dec_NN(HeVAL(entry));
2044 }
2045 HeVAL(entry) = &PL_sv_placeholder;
2046 HvPLACEHOLDERS(hv)++;
2047 }
2048 }
2049 }
2050 }
2051 else {
2052 hv_free_entries(hv);
2053 HvPLACEHOLDERS_set(hv, 0);
2054
2055 if (SvRMAGICAL(hv))
2056 mg_clear(MUTABLE_SV(hv));
2057
2058 HvHASKFLAGS_off(hv);
2059 }
2060 if (HvHasAUX(hv)) {
2061 if(HvENAME_get(hv))
2062 mro_isa_changed_in(hv);
2063 HvEITER_set(hv, NULL);
2064 }
2065 /* disarm hv's premature free guard */
2066 if (LIKELY(PL_tmps_ix == orig_ix))
2067 PL_tmps_ix--;
2068 else
2069 PL_tmps_stack[orig_ix] = &PL_sv_undef;
2070 SvREFCNT_dec_NN(hv);
2071 }
2072
2073 /*
2074 =for apidoc hv_clear_placeholders
2075
2076 Clears any placeholders from a hash. If a restricted hash has any of its keys
2077 marked as readonly and the key is subsequently deleted, the key is not actually
2078 deleted but is marked by assigning it a value of C<&PL_sv_placeholder>. This tags
2079 it so it will be ignored by future operations such as iterating over the hash,
2080 but will still allow the hash to have a value reassigned to the key at some
2081 future point. This function clears any such placeholder keys from the hash.
2082 See C<L<Hash::Util::lock_keys()|Hash::Util/lock_keys>> for an example of its
2083 use.
2084
2085 =cut
2086 */
2087
2088 void
Perl_hv_clear_placeholders(pTHX_ HV * hv)2089 Perl_hv_clear_placeholders(pTHX_ HV *hv)
2090 {
2091 const U32 items = (U32)HvPLACEHOLDERS_get(hv);
2092
2093 PERL_ARGS_ASSERT_HV_CLEAR_PLACEHOLDERS;
2094
2095 if (items)
2096 clear_placeholders(hv, items);
2097 }
2098
2099 static void
S_clear_placeholders(pTHX_ HV * hv,const U32 placeholders)2100 S_clear_placeholders(pTHX_ HV *hv, const U32 placeholders)
2101 {
2102 I32 i;
2103 U32 to_find = placeholders;
2104
2105 PERL_ARGS_ASSERT_CLEAR_PLACEHOLDERS;
2106
2107 assert(to_find);
2108
2109 i = HvMAX(hv);
2110 do {
2111 /* Loop down the linked list heads */
2112 HE **oentry = &(HvARRAY(hv))[i];
2113 HE *entry;
2114
2115 while ((entry = *oentry)) {
2116 if (HeVAL(entry) == &PL_sv_placeholder) {
2117 *oentry = HeNEXT(entry);
2118 if (entry == HvEITER_get(hv))
2119 HvLAZYDEL_on(hv);
2120 else {
2121 if (HvHasAUX(hv) && HvLAZYDEL(hv) &&
2122 entry == HeNEXT(HvAUX(hv)->xhv_eiter))
2123 HeNEXT(HvAUX(hv)->xhv_eiter) = HeNEXT(entry);
2124 hv_free_ent(NULL, entry);
2125 }
2126
2127 if (--to_find == 0) {
2128 /* Finished. */
2129 HvTOTALKEYS(hv) -= (IV)placeholders;
2130 if (HvTOTALKEYS(hv) == 0)
2131 HvHASKFLAGS_off(hv);
2132 HvPLACEHOLDERS_set(hv, 0);
2133 return;
2134 }
2135 } else {
2136 oentry = &HeNEXT(entry);
2137 }
2138 }
2139 } while (--i >= 0);
2140 /* You can't get here, hence assertion should always fail. */
2141 assert (to_find == 0);
2142 NOT_REACHED; /* NOTREACHED */
2143 }
2144
2145 STATIC void
S_hv_free_entries(pTHX_ HV * hv)2146 S_hv_free_entries(pTHX_ HV *hv)
2147 {
2148 STRLEN index = 0;
2149 SV *sv;
2150
2151 PERL_ARGS_ASSERT_HV_FREE_ENTRIES;
2152
2153 while ((sv = Perl_hfree_next_entry(aTHX_ hv, &index)) || HvTOTALKEYS(hv)) {
2154 SvREFCNT_dec(sv);
2155 }
2156 }
2157
2158
2159 /* hfree_next_entry()
2160 * For use only by S_hv_free_entries() and sv_clear().
2161 * Delete the next available HE from hv and return the associated SV.
2162 * Returns null on empty hash. Nevertheless null is not a reliable
2163 * indicator that the hash is empty, as the deleted entry may have a
2164 * null value.
2165 * indexp is a pointer to the current index into HvARRAY. The index should
2166 * initially be set to 0. hfree_next_entry() may update it. */
2167
2168 SV*
Perl_hfree_next_entry(pTHX_ HV * hv,STRLEN * indexp)2169 Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp)
2170 {
2171 struct xpvhv_aux *iter;
2172 HE *entry;
2173 HE ** array;
2174 #ifdef DEBUGGING
2175 STRLEN orig_index = *indexp;
2176 #endif
2177
2178 PERL_ARGS_ASSERT_HFREE_NEXT_ENTRY;
2179
2180 if (HvHasAUX(hv) && ((iter = HvAUX(hv)))) {
2181 if ((entry = iter->xhv_eiter)) {
2182 /* the iterator may get resurrected after each
2183 * destructor call, so check each time */
2184 if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */
2185 HvLAZYDEL_off(hv);
2186 hv_free_ent(NULL, entry);
2187 /* warning: at this point HvARRAY may have been
2188 * re-allocated, HvMAX changed etc */
2189 }
2190 iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
2191 iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
2192 #ifdef PERL_HASH_RANDOMIZE_KEYS
2193 iter->xhv_last_rand = iter->xhv_rand;
2194 #endif
2195 }
2196 }
2197
2198 if (!((XPVHV*)SvANY(hv))->xhv_keys)
2199 return NULL;
2200
2201 array = HvARRAY(hv);
2202 assert(array);
2203 while ( ! ((entry = array[*indexp])) ) {
2204 if ((*indexp)++ >= HvMAX(hv))
2205 *indexp = 0;
2206 assert(*indexp != orig_index);
2207 }
2208 array[*indexp] = HeNEXT(entry);
2209 ((XPVHV*) SvANY(hv))->xhv_keys--;
2210
2211 if ( PL_phase != PERL_PHASE_DESTRUCT && HvHasENAME(hv)
2212 && HeVAL(entry) && isGV(HeVAL(entry))
2213 && GvHV(HeVAL(entry)) && HvHasENAME(GvHV(HeVAL(entry)))
2214 ) {
2215 STRLEN klen;
2216 const char * const key = HePV(entry,klen);
2217 if ((klen > 1 && key[klen-1]==':' && key[klen-2]==':')
2218 || (klen == 1 && key[0] == ':')) {
2219 mro_package_moved(
2220 NULL, GvHV(HeVAL(entry)),
2221 (GV *)HeVAL(entry), 0
2222 );
2223 }
2224 }
2225 return hv_free_ent_ret(entry);
2226 }
2227
2228
2229 /*
2230 =for apidoc hv_undef
2231
2232 Undefines the hash. The XS equivalent of C<undef(%hash)>.
2233
2234 As well as freeing all the elements of the hash (like C<hv_clear()>), this
2235 also frees any auxiliary data and storage associated with the hash.
2236
2237 See L</av_clear> for a note about the hash possibly being invalid on
2238 return.
2239
2240 =cut
2241 */
2242
2243 void
Perl_hv_undef_flags(pTHX_ HV * hv,U32 flags)2244 Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags)
2245 {
2246 bool save;
2247 SSize_t orig_ix = PL_tmps_ix; /* silence compiler warning about uninitialized vars */
2248
2249 if (!hv)
2250 return;
2251 save = cBOOL(SvREFCNT(hv));
2252 DEBUG_A(Perl_hv_assert(aTHX_ hv));
2253
2254 /* The name must be deleted before the call to hv_free_entries so that
2255 CVs are anonymised properly. But the effective name must be pre-
2256 served until after that call (and only deleted afterwards if the
2257 call originated from sv_clear). For stashes with one name that is
2258 both the canonical name and the effective name, hv_name_set has to
2259 allocate an array for storing the effective name. We can skip that
2260 during global destruction, as it does not matter where the CVs point
2261 if they will be freed anyway. */
2262 /* note that the code following prior to hv_free_entries is duplicated
2263 * in sv_clear(), and changes here should be done there too */
2264 if (PL_phase != PERL_PHASE_DESTRUCT && HvHasNAME(hv)) {
2265 if (PL_stashcache) {
2266 HEK *hek = HvNAME_HEK(hv);
2267 DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for '%"
2268 HEKf "'\n", HEKfARG(hek)));
2269 (void)hv_deletehek(PL_stashcache, hek, G_DISCARD);
2270 }
2271 hv_name_set(hv, NULL, 0, 0);
2272 }
2273 if (save) {
2274 /* avoid hv being freed when calling destructors below */
2275 EXTEND_MORTAL(1);
2276 PL_tmps_stack[++PL_tmps_ix] = SvREFCNT_inc_simple_NN(hv);
2277 orig_ix = PL_tmps_ix;
2278 }
2279
2280 /* As well as any/all HE*s in HvARRAY(), this call also ensures that
2281 xhv_eiter is NULL, including handling the case of a tied hash partway
2282 through iteration where HvLAZYDEL() is true and xhv_eiter points to an
2283 HE* that needs to be explicitly freed. */
2284 hv_free_entries(hv);
2285
2286 /* HvHasAUX() is true for a hash if it has struct xpvhv_aux allocated. That
2287 structure has several other pieces of allocated memory - hence those must
2288 be freed before the structure itself can be freed. Some can be freed when
2289 a hash is "undefined" (this function), but some must persist until it is
2290 destroyed (which might be this function's immediate caller).
2291
2292 Hence the code in this block frees what it is logical to free (and NULLs
2293 out anything freed) so that the structure is left in a logically
2294 consistent state - pointers are NULL or point to valid memory, and
2295 non-pointer values are correct for an empty hash. The structure state
2296 must remain consistent, because this code can no longer clear SVf_OOK,
2297 meaning that this structure might be read again at any point in the
2298 future without further checks or reinitialisation. */
2299 if (HvHasAUX(hv)) {
2300 struct xpvhv_aux *aux = HvAUX(hv);
2301 struct mro_meta *meta;
2302 const char *name;
2303
2304 if (HvHasENAME(hv)) {
2305 if (PL_phase != PERL_PHASE_DESTRUCT)
2306 mro_isa_changed_in(hv);
2307 if (PL_stashcache) {
2308 DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for effective name '%"
2309 HEKf "'\n", HEKfARG(HvENAME_HEK_NN(hv))));
2310 (void)hv_deletehek(PL_stashcache, HvENAME_HEK_NN(hv), G_DISCARD);
2311 }
2312 }
2313
2314 /* If this call originated from sv_clear, then we must check for
2315 * effective names that need freeing, as well as the usual name. */
2316 name = HvNAME(hv);
2317 if (flags & HV_NAME_SETALL
2318 ? cBOOL(aux->xhv_name_u.xhvnameu_name)
2319 : cBOOL(name))
2320 {
2321 if (name && PL_stashcache) {
2322 DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for name '%"
2323 HEKf "'\n", HEKfARG(HvNAME_HEK_NN(hv))));
2324 (void)hv_deletehek(PL_stashcache, HvNAME_HEK_NN(hv), G_DISCARD);
2325 }
2326 hv_name_set(hv, NULL, 0, flags);
2327 }
2328 if((meta = aux->xhv_mro_meta)) {
2329 if (meta->mro_linear_all) {
2330 SvREFCNT_dec_NN(meta->mro_linear_all);
2331 /* mro_linear_current is just acting as a shortcut pointer,
2332 hence the else. */
2333 }
2334 else
2335 /* Only the current MRO is stored, so this owns the data.
2336 */
2337 SvREFCNT_dec(meta->mro_linear_current);
2338 SvREFCNT_dec(meta->mro_nextmethod);
2339 SvREFCNT_dec(meta->isa);
2340 SvREFCNT_dec(meta->super);
2341 Safefree(meta);
2342 aux->xhv_mro_meta = NULL;
2343 }
2344
2345 if(HvSTASH_IS_CLASS(hv)) {
2346 SvREFCNT_dec(aux->xhv_class_superclass);
2347 SvREFCNT_dec(aux->xhv_class_initfields_cv);
2348 SvREFCNT_dec(aux->xhv_class_adjust_blocks);
2349 if(aux->xhv_class_fields)
2350 PadnamelistREFCNT_dec(aux->xhv_class_fields);
2351 SvREFCNT_dec(aux->xhv_class_param_map);
2352 Safefree(aux->xhv_class_suspended_initfields_compcv);
2353 aux->xhv_class_suspended_initfields_compcv = NULL;
2354
2355 aux->xhv_aux_flags &= ~HvAUXf_IS_CLASS;
2356 }
2357 }
2358
2359 Safefree(HvARRAY(hv));
2360 HvMAX(hv) = PERL_HASH_DEFAULT_HvMAX; /* 7 (it's a normal hash) */
2361 HvARRAY(hv) = 0;
2362
2363 /* if we're freeing the HV, the SvMAGIC field has been reused for
2364 * other purposes, and so there can't be any placeholder magic */
2365 if (SvREFCNT(hv))
2366 HvPLACEHOLDERS_set(hv, 0);
2367
2368 if (SvRMAGICAL(hv))
2369 mg_clear(MUTABLE_SV(hv));
2370
2371 if (save) {
2372 /* disarm hv's premature free guard */
2373 if (LIKELY(PL_tmps_ix == orig_ix))
2374 PL_tmps_ix--;
2375 else
2376 PL_tmps_stack[orig_ix] = &PL_sv_undef;
2377 SvREFCNT_dec_NN(hv);
2378 }
2379 }
2380
2381 /*
2382 =for apidoc hv_fill
2383
2384 Returns the number of hash buckets that happen to be in use.
2385
2386 This function implements the L<C<HvFILL> macro|perlapi/HvFILL> which you should
2387 use instead.
2388
2389 As of perl 5.25 this function is used only for debugging
2390 purposes, and the number of used hash buckets is not
2391 in any way cached, thus this function can be costly
2392 to execute as it must iterate over all the buckets in the
2393 hash.
2394
2395 =cut
2396 */
2397
2398 STRLEN
Perl_hv_fill(pTHX_ HV * const hv)2399 Perl_hv_fill(pTHX_ HV *const hv)
2400 {
2401 STRLEN count = 0;
2402 HE **ents = HvARRAY(hv);
2403
2404 PERL_UNUSED_CONTEXT;
2405 PERL_ARGS_ASSERT_HV_FILL;
2406
2407 /* No keys implies no buckets used.
2408 One key can only possibly mean one bucket used. */
2409 if (HvTOTALKEYS(hv) < 2)
2410 return HvTOTALKEYS(hv);
2411
2412 if (ents) {
2413 /* I wonder why we count down here...
2414 * Is it some micro-optimisation?
2415 * I would have thought counting up was better.
2416 * - Yves
2417 */
2418 HE *const *const last = ents + HvMAX(hv);
2419 count = last + 1 - ents;
2420
2421 do {
2422 if (!*ents)
2423 --count;
2424 } while (++ents <= last);
2425 }
2426 return count;
2427 }
2428
2429 static struct xpvhv_aux*
S_hv_auxinit(pTHX_ HV * hv)2430 S_hv_auxinit(pTHX_ HV *hv) {
2431 struct xpvhv_aux *iter;
2432
2433 PERL_ARGS_ASSERT_HV_AUXINIT;
2434
2435 if (!HvHasAUX(hv)) {
2436 char *array = (char *) HvARRAY(hv);
2437 if (!array) {
2438 Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1), char);
2439 HvARRAY(hv) = (HE**)array;
2440 }
2441 iter = Perl_hv_auxalloc(aTHX_ hv);
2442 #ifdef PERL_HASH_RANDOMIZE_KEYS
2443 MAYBE_UPDATE_HASH_RAND_BITS();
2444 iter->xhv_rand = (U32)PL_hash_rand_bits;
2445 #endif
2446 } else {
2447 iter = HvAUX(hv);
2448 }
2449
2450 iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
2451 iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
2452 #ifdef PERL_HASH_RANDOMIZE_KEYS
2453 iter->xhv_last_rand = iter->xhv_rand;
2454 #endif
2455 iter->xhv_name_u.xhvnameu_name = 0;
2456 iter->xhv_name_count = 0;
2457 iter->xhv_backreferences = 0;
2458 iter->xhv_mro_meta = NULL;
2459 iter->xhv_aux_flags = 0;
2460 return iter;
2461 }
2462
2463 /*
2464 =for apidoc hv_iterinit
2465
2466 Prepares a starting point to traverse a hash table. Returns the number of
2467 keys in the hash, including placeholders (i.e. the same as C<HvTOTALKEYS(hv)>).
2468 The return value is currently only meaningful for hashes without tie magic.
2469
2470 NOTE: Before version 5.004_65, C<hv_iterinit> used to return the number of
2471 hash buckets that happen to be in use. If you still need that esoteric
2472 value, you can get it through the macro C<HvFILL(hv)>.
2473
2474
2475 =cut
2476 */
2477
2478 I32
Perl_hv_iterinit(pTHX_ HV * hv)2479 Perl_hv_iterinit(pTHX_ HV *hv)
2480 {
2481 PERL_ARGS_ASSERT_HV_ITERINIT;
2482
2483 if (HvHasAUX(hv)) {
2484 struct xpvhv_aux * iter = HvAUX(hv);
2485 HE * const entry = iter->xhv_eiter; /* HvEITER(hv) */
2486 if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */
2487 HvLAZYDEL_off(hv);
2488 hv_free_ent(NULL, entry);
2489 }
2490 iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
2491 iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
2492 #ifdef PERL_HASH_RANDOMIZE_KEYS
2493 iter->xhv_last_rand = iter->xhv_rand;
2494 #endif
2495 } else {
2496 hv_auxinit(hv);
2497 }
2498
2499 /* note this includes placeholders! */
2500 return HvTOTALKEYS(hv);
2501 }
2502
2503 /*
2504 =for apidoc hv_riter_p
2505
2506 Implements C<HvRITER> which you should use instead.
2507
2508 =cut
2509 */
2510
2511 I32 *
Perl_hv_riter_p(pTHX_ HV * hv)2512 Perl_hv_riter_p(pTHX_ HV *hv) {
2513 struct xpvhv_aux *iter;
2514
2515 PERL_ARGS_ASSERT_HV_RITER_P;
2516
2517 iter = HvHasAUX(hv) ? HvAUX(hv) : hv_auxinit(hv);
2518 return &(iter->xhv_riter);
2519 }
2520
2521 /*
2522 =for apidoc hv_eiter_p
2523
2524 Implements C<HvEITER> which you should use instead.
2525
2526 =cut
2527 */
2528
2529 HE **
Perl_hv_eiter_p(pTHX_ HV * hv)2530 Perl_hv_eiter_p(pTHX_ HV *hv) {
2531 struct xpvhv_aux *iter;
2532
2533 PERL_ARGS_ASSERT_HV_EITER_P;
2534
2535 iter = HvHasAUX(hv) ? HvAUX(hv) : hv_auxinit(hv);
2536 return &(iter->xhv_eiter);
2537 }
2538
2539 /*
2540 =for apidoc hv_riter_set
2541
2542 Implements C<HvRITER_set> which you should use instead.
2543
2544 =cut
2545 */
2546
2547 void
Perl_hv_riter_set(pTHX_ HV * hv,I32 riter)2548 Perl_hv_riter_set(pTHX_ HV *hv, I32 riter) {
2549 struct xpvhv_aux *iter;
2550
2551 PERL_ARGS_ASSERT_HV_RITER_SET;
2552
2553 if (HvHasAUX(hv)) {
2554 iter = HvAUX(hv);
2555 } else {
2556 if (riter == -1)
2557 return;
2558
2559 iter = hv_auxinit(hv);
2560 }
2561 iter->xhv_riter = riter;
2562 }
2563
2564 void
Perl_hv_rand_set(pTHX_ HV * hv,U32 new_xhv_rand)2565 Perl_hv_rand_set(pTHX_ HV *hv, U32 new_xhv_rand) {
2566 struct xpvhv_aux *iter;
2567
2568 PERL_ARGS_ASSERT_HV_RAND_SET;
2569
2570 #ifdef PERL_HASH_RANDOMIZE_KEYS
2571 if (HvHasAUX(hv)) {
2572 iter = HvAUX(hv);
2573 } else {
2574 iter = hv_auxinit(hv);
2575 }
2576 iter->xhv_rand = new_xhv_rand;
2577 #else
2578 Perl_croak(aTHX_ "This Perl has not been built with support for randomized hash key traversal but something called Perl_hv_rand_set().");
2579 #endif
2580 }
2581
2582 /*
2583 =for apidoc hv_eiter_set
2584
2585 Implements C<HvEITER_set> which you should use instead.
2586
2587 =cut
2588 */
2589
2590 void
Perl_hv_eiter_set(pTHX_ HV * hv,HE * eiter)2591 Perl_hv_eiter_set(pTHX_ HV *hv, HE *eiter) {
2592 struct xpvhv_aux *iter;
2593
2594 PERL_ARGS_ASSERT_HV_EITER_SET;
2595
2596 if (HvHasAUX(hv)) {
2597 iter = HvAUX(hv);
2598 } else {
2599 /* 0 is the default so don't go malloc()ing a new structure just to
2600 hold 0. */
2601 if (!eiter)
2602 return;
2603
2604 iter = hv_auxinit(hv);
2605 }
2606 iter->xhv_eiter = eiter;
2607 }
2608
2609 /*
2610 =for apidoc hv_name_set
2611 =for apidoc_item ||hv_name_sets|HV *hv|"name"|U32 flags
2612
2613 These each set the name of stash C<hv> to the specified name.
2614
2615 They differ only in how the name is specified.
2616
2617 In C<hv_name_sets>, the name is a literal C string, enclosed in double quotes.
2618
2619 In C<hv_name_set>, C<name> points to the first byte of the name, and an
2620 additional parameter, C<len>, specifies its length in bytes. Hence, the name
2621 may contain embedded-NUL characters.
2622
2623 If C<SVf_UTF8> is set in C<flags>, the name is treated as being in UTF-8;
2624 otherwise not.
2625
2626 If C<HV_NAME_SETALL> is set in C<flags>, both the name and the effective name
2627 are set.
2628
2629 =for apidoc Amnh||HV_NAME_SETALL
2630
2631 =cut
2632 */
2633
2634 void
Perl_hv_name_set(pTHX_ HV * hv,const char * name,U32 len,U32 flags)2635 Perl_hv_name_set(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
2636 {
2637 struct xpvhv_aux *iter;
2638 U32 hash;
2639 HEK **spot;
2640
2641 PERL_ARGS_ASSERT_HV_NAME_SET;
2642
2643 if (len > I32_MAX)
2644 Perl_croak(aTHX_ "panic: hv name too long (%" UVuf ")", (UV) len);
2645
2646 if (HvHasAUX(hv)) {
2647 iter = HvAUX(hv);
2648 if (iter->xhv_name_u.xhvnameu_name) {
2649 if(iter->xhv_name_count) {
2650 if(flags & HV_NAME_SETALL) {
2651 HEK ** const this_name = HvAUX(hv)->xhv_name_u.xhvnameu_names;
2652 HEK **hekp = this_name + (
2653 iter->xhv_name_count < 0
2654 ? -iter->xhv_name_count
2655 : iter->xhv_name_count
2656 );
2657 while(hekp-- > this_name+1)
2658 unshare_hek_or_pvn(*hekp, 0, 0, 0);
2659 /* The first elem may be null. */
2660 if(*this_name) unshare_hek_or_pvn(*this_name, 0, 0, 0);
2661 Safefree(this_name);
2662 spot = &iter->xhv_name_u.xhvnameu_name;
2663 iter->xhv_name_count = 0;
2664 }
2665 else {
2666 if(iter->xhv_name_count > 0) {
2667 /* shift some things over */
2668 Renew(
2669 iter->xhv_name_u.xhvnameu_names, iter->xhv_name_count + 1, HEK *
2670 );
2671 spot = iter->xhv_name_u.xhvnameu_names;
2672 spot[iter->xhv_name_count] = spot[1];
2673 spot[1] = spot[0];
2674 iter->xhv_name_count = -(iter->xhv_name_count + 1);
2675 }
2676 else if(*(spot = iter->xhv_name_u.xhvnameu_names)) {
2677 unshare_hek_or_pvn(*spot, 0, 0, 0);
2678 }
2679 }
2680 }
2681 else if (flags & HV_NAME_SETALL) {
2682 unshare_hek_or_pvn(iter->xhv_name_u.xhvnameu_name, 0, 0, 0);
2683 spot = &iter->xhv_name_u.xhvnameu_name;
2684 }
2685 else {
2686 HEK * const existing_name = iter->xhv_name_u.xhvnameu_name;
2687 Newx(iter->xhv_name_u.xhvnameu_names, 2, HEK *);
2688 iter->xhv_name_count = -2;
2689 spot = iter->xhv_name_u.xhvnameu_names;
2690 spot[1] = existing_name;
2691 }
2692 }
2693 else { spot = &iter->xhv_name_u.xhvnameu_name; iter->xhv_name_count = 0; }
2694 } else {
2695 if (name == 0)
2696 return;
2697
2698 iter = hv_auxinit(hv);
2699 spot = &iter->xhv_name_u.xhvnameu_name;
2700 }
2701 PERL_HASH(hash, name, len);
2702 *spot = name ? share_hek(name, flags & SVf_UTF8 ? -(I32)len : (I32)len, hash) : NULL;
2703 }
2704
2705 /*
2706 This is basically sv_eq_flags() in sv.c, but we avoid the magic
2707 and bytes checking.
2708 */
2709
2710 STATIC I32
hek_eq_pvn_flags(pTHX_ const HEK * hek,const char * pv,const I32 pvlen,const U32 flags)2711 hek_eq_pvn_flags(pTHX_ const HEK *hek, const char* pv, const I32 pvlen, const U32 flags) {
2712 if ( (HEK_UTF8(hek) ? 1 : 0) != (flags & SVf_UTF8 ? 1 : 0) ) {
2713 if (flags & SVf_UTF8)
2714 return (bytes_cmp_utf8(
2715 (const U8*)HEK_KEY(hek), HEK_LEN(hek),
2716 (const U8*)pv, pvlen) == 0);
2717 else
2718 return (bytes_cmp_utf8(
2719 (const U8*)pv, pvlen,
2720 (const U8*)HEK_KEY(hek), HEK_LEN(hek)) == 0);
2721 }
2722 else
2723 return HEK_LEN(hek) == pvlen && ((HEK_KEY(hek) == pv)
2724 || memEQ(HEK_KEY(hek), pv, pvlen));
2725 }
2726
2727 /*
2728 =for apidoc hv_ename_add
2729
2730 Adds a name to a stash's internal list of effective names. See
2731 C<L</hv_ename_delete>>.
2732
2733 This is called when a stash is assigned to a new location in the symbol
2734 table.
2735
2736 =cut
2737 */
2738
2739 void
Perl_hv_ename_add(pTHX_ HV * hv,const char * name,U32 len,U32 flags)2740 Perl_hv_ename_add(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
2741 {
2742 struct xpvhv_aux *aux = HvHasAUX(hv) ? HvAUX(hv) : hv_auxinit(hv);
2743 U32 hash;
2744
2745 PERL_ARGS_ASSERT_HV_ENAME_ADD;
2746
2747 if (len > I32_MAX)
2748 Perl_croak(aTHX_ "panic: hv name too long (%" UVuf ")", (UV) len);
2749
2750 PERL_HASH(hash, name, len);
2751
2752 if (aux->xhv_name_count) {
2753 I32 count = aux->xhv_name_count;
2754 HEK ** const xhv_name = aux->xhv_name_u.xhvnameu_names + (count<0);
2755 HEK **hekp = xhv_name + (count < 0 ? -count - 1 : count);
2756 while (hekp-- > xhv_name)
2757 {
2758 assert(*hekp);
2759 if (
2760 (HEK_UTF8(*hekp) || (flags & SVf_UTF8))
2761 ? hek_eq_pvn_flags(aTHX_ *hekp, name, (I32)len, flags)
2762 : (HEK_LEN(*hekp) == (I32)len && memEQ(HEK_KEY(*hekp), name, len))
2763 ) {
2764 if (hekp == xhv_name && count < 0)
2765 aux->xhv_name_count = -count;
2766 return;
2767 }
2768 }
2769 if (count < 0) aux->xhv_name_count--, count = -count;
2770 else aux->xhv_name_count++;
2771 Renew(aux->xhv_name_u.xhvnameu_names, count + 1, HEK *);
2772 (aux->xhv_name_u.xhvnameu_names)[count] = share_hek(name, (flags & SVf_UTF8 ? -(I32)len : (I32)len), hash);
2773 }
2774 else {
2775 HEK *existing_name = aux->xhv_name_u.xhvnameu_name;
2776 if (
2777 existing_name && (
2778 (HEK_UTF8(existing_name) || (flags & SVf_UTF8))
2779 ? hek_eq_pvn_flags(aTHX_ existing_name, name, (I32)len, flags)
2780 : (HEK_LEN(existing_name) == (I32)len && memEQ(HEK_KEY(existing_name), name, len))
2781 )
2782 ) return;
2783 Newx(aux->xhv_name_u.xhvnameu_names, 2, HEK *);
2784 aux->xhv_name_count = existing_name ? 2 : -2;
2785 *aux->xhv_name_u.xhvnameu_names = existing_name;
2786 (aux->xhv_name_u.xhvnameu_names)[1] = share_hek(name, (flags & SVf_UTF8 ? -(I32)len : (I32)len), hash);
2787 }
2788 }
2789
2790 /*
2791 =for apidoc hv_ename_delete
2792
2793 Removes a name from a stash's internal list of effective names. If this is
2794 the name returned by C<HvENAME>, then another name in the list will take
2795 its place (C<HvENAME> will use it).
2796
2797 This is called when a stash is deleted from the symbol table.
2798
2799 =cut
2800 */
2801
2802 void
Perl_hv_ename_delete(pTHX_ HV * hv,const char * name,U32 len,U32 flags)2803 Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
2804 {
2805 struct xpvhv_aux *aux;
2806
2807 PERL_ARGS_ASSERT_HV_ENAME_DELETE;
2808
2809 if (len > I32_MAX)
2810 Perl_croak(aTHX_ "panic: hv name too long (%" UVuf ")", (UV) len);
2811
2812 if (!HvHasAUX(hv)) return;
2813
2814 aux = HvAUX(hv);
2815 if (!aux->xhv_name_u.xhvnameu_name) return;
2816
2817 if (aux->xhv_name_count) {
2818 HEK ** const namep = aux->xhv_name_u.xhvnameu_names;
2819 I32 const count = aux->xhv_name_count;
2820 HEK **victim = namep + (count < 0 ? -count : count);
2821 while (victim-- > namep + 1)
2822 if (
2823 (HEK_UTF8(*victim) || (flags & SVf_UTF8))
2824 ? hek_eq_pvn_flags(aTHX_ *victim, name, (I32)len, flags)
2825 : (HEK_LEN(*victim) == (I32)len && memEQ(HEK_KEY(*victim), name, len))
2826 ) {
2827 unshare_hek_or_pvn(*victim, 0, 0, 0);
2828 if (count < 0) ++aux->xhv_name_count;
2829 else --aux->xhv_name_count;
2830 if (
2831 (aux->xhv_name_count == 1 || aux->xhv_name_count == -1)
2832 && !*namep
2833 ) { /* if there are none left */
2834 Safefree(namep);
2835 aux->xhv_name_u.xhvnameu_names = NULL;
2836 aux->xhv_name_count = 0;
2837 }
2838 else {
2839 /* Move the last one back to fill the empty slot. It
2840 does not matter what order they are in. */
2841 *victim = *(namep + (count < 0 ? -count : count) - 1);
2842 }
2843 return;
2844 }
2845 if (
2846 count > 0 && ((HEK_UTF8(*namep) || (flags & SVf_UTF8))
2847 ? hek_eq_pvn_flags(aTHX_ *namep, name, (I32)len, flags)
2848 : (HEK_LEN(*namep) == (I32)len && memEQ(HEK_KEY(*namep), name, len))
2849 )
2850 ) {
2851 aux->xhv_name_count = -count;
2852 }
2853 }
2854 else if(
2855 (HEK_UTF8(aux->xhv_name_u.xhvnameu_name) || (flags & SVf_UTF8))
2856 ? hek_eq_pvn_flags(aTHX_ aux->xhv_name_u.xhvnameu_name, name, (I32)len, flags)
2857 : (HEK_LEN(aux->xhv_name_u.xhvnameu_name) == (I32)len &&
2858 memEQ(HEK_KEY(aux->xhv_name_u.xhvnameu_name), name, len))
2859 ) {
2860 HEK * const namehek = aux->xhv_name_u.xhvnameu_name;
2861 Newx(aux->xhv_name_u.xhvnameu_names, 1, HEK *);
2862 *aux->xhv_name_u.xhvnameu_names = namehek;
2863 aux->xhv_name_count = -1;
2864 }
2865 }
2866
2867 AV **
Perl_hv_backreferences_p(pTHX_ HV * hv)2868 Perl_hv_backreferences_p(pTHX_ HV *hv) {
2869 PERL_ARGS_ASSERT_HV_BACKREFERENCES_P;
2870 /* See also Perl_sv_get_backrefs in sv.c where this logic is unrolled */
2871 {
2872 struct xpvhv_aux * const iter = HvHasAUX(hv) ? HvAUX(hv) : hv_auxinit(hv);
2873 return &(iter->xhv_backreferences);
2874 }
2875 }
2876
2877 void
Perl_hv_kill_backrefs(pTHX_ HV * hv)2878 Perl_hv_kill_backrefs(pTHX_ HV *hv) {
2879 AV *av;
2880
2881 PERL_ARGS_ASSERT_HV_KILL_BACKREFS;
2882
2883 if (!HvHasAUX(hv))
2884 return;
2885
2886 av = HvAUX(hv)->xhv_backreferences;
2887
2888 if (av) {
2889 HvAUX(hv)->xhv_backreferences = 0;
2890 Perl_sv_kill_backrefs(aTHX_ MUTABLE_SV(hv), av);
2891 if (SvTYPE(av) == SVt_PVAV)
2892 SvREFCNT_dec_NN(av);
2893 }
2894 }
2895
2896 /*
2897 hv_iternext is implemented as a macro in hv.h
2898
2899 =for apidoc hv_iternext
2900
2901 Returns entries from a hash iterator. See C<L</hv_iterinit>>.
2902
2903 You may call C<hv_delete> or C<hv_delete_ent> on the hash entry that the
2904 iterator currently points to, without losing your place or invalidating your
2905 iterator. Note that in this case the current entry is deleted from the hash
2906 with your iterator holding the last reference to it. Your iterator is flagged
2907 to free the entry on the next call to C<hv_iternext>, so you must not discard
2908 your iterator immediately else the entry will leak - call C<hv_iternext> to
2909 trigger the resource deallocation.
2910
2911 =for apidoc hv_iternext_flags
2912
2913 Returns entries from a hash iterator. See C<L</hv_iterinit>> and
2914 C<L</hv_iternext>>.
2915 The C<flags> value will normally be zero; if C<HV_ITERNEXT_WANTPLACEHOLDERS> is
2916 set the placeholders keys (for restricted hashes) will be returned in addition
2917 to normal keys. By default placeholders are automatically skipped over.
2918 Currently a placeholder is implemented with a value that is
2919 C<&PL_sv_placeholder>. Note that the implementation of placeholders and
2920 restricted hashes may change, and the implementation currently is
2921 insufficiently abstracted for any change to be tidy.
2922
2923 =for apidoc Amnh||HV_ITERNEXT_WANTPLACEHOLDERS
2924
2925 =cut
2926 */
2927
2928 HE *
Perl_hv_iternext_flags(pTHX_ HV * hv,I32 flags)2929 Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags)
2930 {
2931 HE *entry;
2932 HE *oldentry;
2933 MAGIC* mg;
2934 struct xpvhv_aux *iter;
2935
2936 PERL_ARGS_ASSERT_HV_ITERNEXT_FLAGS;
2937
2938 if (!HvHasAUX(hv)) {
2939 /* Too many things (well, pp_each at least) merrily assume that you can
2940 call hv_iternext without calling hv_iterinit, so we'll have to deal
2941 with it. */
2942 hv_iterinit(hv);
2943 }
2944 else if (!HvARRAY(hv)) {
2945 /* Since 5.002 calling hv_iternext() has ensured that HvARRAY() is
2946 non-NULL. There was explicit code for this added as part of commit
2947 4633a7c4bad06b47, without any explicit comment as to why, but from
2948 code inspection it seems to be a fix to ensure that the later line
2949 entry = ((HE**)xhv->xhv_array)[xhv->xhv_riter];
2950 was accessing a valid address, because that lookup in the loop was
2951 always reached even if the hash had no keys.
2952
2953 That explicit code was removed in 2005 as part of b79f7545f218479c:
2954 Store the xhv_aux structure after the main array.
2955 This reduces the size of HV bodies from 24 to 20 bytes on a 32 bit
2956 build. It has the side effect of defined %symbol_table:: now always
2957 being true. defined %hash is already deprecated.
2958
2959 with a comment and assertion added to note that after the call to
2960 hv_iterinit() HvARRAY() will now always be non-NULL.
2961
2962 In turn, that potential NULL-pointer access within the loop was made
2963 unreachable in 2009 by commit 9eb4ebd1619c0362
2964 In Perl_hv_iternext_flags(), clarify and generalise the empty hash bailout code.
2965
2966 which skipped the entire while loop if the hash had no keys.
2967 (If the hash has any keys, HvARRAY() cannot be NULL.)
2968 Hence the code in hv_iternext_flags() has long been able to handle
2969 HvARRAY() being NULL because no keys are allocated.
2970
2971 Now that we have decoupled the aux structure from HvARRAY(),
2972 HvARRAY() can now be NULL even when SVf_OOK is true (and the aux
2973 struct is allocated and correction initialised).
2974
2975 Is this actually a guarantee that we need to make? We should check
2976 whether anything is actually relying on this, or if we are simply
2977 making work for ourselves.
2978
2979 For now, keep the behaviour as-was - after calling hv_iternext_flags
2980 ensure that HvARRAY() is non-NULL. Many (other) things are changing -
2981 no need to add risk by changing this too. But in the future we should
2982 consider changing hv_iternext_flags() to avoid allocating HvARRAY()
2983 here, and potentially also we avoid allocating HvARRAY()
2984 automatically in hv_auxinit() */
2985
2986 char *array;
2987 Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1), char);
2988 HvARRAY(hv) = (HE**)array;
2989 }
2990
2991 iter = HvAUX(hv);
2992
2993 oldentry = entry = iter->xhv_eiter; /* HvEITER(hv) */
2994 if (SvMAGICAL(hv) && SvRMAGICAL(hv)) {
2995 if ( ( mg = mg_find((const SV *)hv, PERL_MAGIC_tied) ) ) {
2996 SV * const key = sv_newmortal();
2997 if (entry) {
2998 sv_setsv(key, HeSVKEY_force(entry));
2999 SvREFCNT_dec(HeSVKEY(entry)); /* get rid of previous key */
3000 HeSVKEY_set(entry, NULL);
3001 }
3002 else {
3003 char *k;
3004 HEK *hek;
3005
3006 /* one HE per MAGICAL hash */
3007 iter->xhv_eiter = entry = new_HE(); /* HvEITER(hv) = new_HE() */
3008 HvLAZYDEL_on(hv); /* make sure entry gets freed */
3009 Zero(entry, 1, HE);
3010 Newxz(k, HEK_BASESIZE + sizeof(const SV *), char);
3011 hek = (HEK*)k;
3012 HeKEY_hek(entry) = hek;
3013 HeKLEN(entry) = HEf_SVKEY;
3014 }
3015 magic_nextpack(MUTABLE_SV(hv),mg,key);
3016 if (SvOK(key)) {
3017 /* force key to stay around until next time */
3018 HeSVKEY_set(entry, SvREFCNT_inc_simple_NN(key));
3019 return entry; /* beware, hent_val is not set */
3020 }
3021 SvREFCNT_dec(HeVAL(entry));
3022 Safefree(HeKEY_hek(entry));
3023 del_HE(entry);
3024 iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
3025 HvLAZYDEL_off(hv);
3026 return NULL;
3027 }
3028 }
3029 #if defined(DYNAMIC_ENV_FETCH) && defined(VMS) /* set up %ENV for iteration */
3030 if (!entry && SvRMAGICAL((const SV *)hv)
3031 && mg_find((const SV *)hv, PERL_MAGIC_env)) {
3032 prime_env_iter();
3033 }
3034 #endif
3035
3036 /* hv_iterinit now ensures this. */
3037 assert (HvARRAY(hv));
3038
3039 /* At start of hash, entry is NULL. */
3040 if (entry)
3041 {
3042 entry = HeNEXT(entry);
3043 if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) {
3044 /*
3045 * Skip past any placeholders -- don't want to include them in
3046 * any iteration.
3047 */
3048 while (entry && HeVAL(entry) == &PL_sv_placeholder) {
3049 entry = HeNEXT(entry);
3050 }
3051 }
3052 }
3053
3054 #ifdef PERL_HASH_RANDOMIZE_KEYS
3055 if (iter->xhv_last_rand != iter->xhv_rand) {
3056 if (iter->xhv_riter != -1) {
3057 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
3058 "Use of each() on hash after insertion without resetting hash iterator results in undefined behavior"
3059 pTHX__FORMAT
3060 pTHX__VALUE);
3061 }
3062 iter->xhv_last_rand = iter->xhv_rand;
3063 }
3064 #endif
3065
3066 /* Skip the entire loop if the hash is empty. */
3067 if ((flags & HV_ITERNEXT_WANTPLACEHOLDERS)
3068 ? HvTOTALKEYS(hv) : HvUSEDKEYS(hv)) {
3069 STRLEN max = HvMAX(hv);
3070 while (!entry) {
3071 /* OK. Come to the end of the current list. Grab the next one. */
3072
3073 iter->xhv_riter++; /* HvRITER(hv)++ */
3074 if (iter->xhv_riter > (I32)max /* HvRITER(hv) > HvMAX(hv) */) {
3075 /* There is no next one. End of the hash. */
3076 iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
3077 #ifdef PERL_HASH_RANDOMIZE_KEYS
3078 iter->xhv_last_rand = iter->xhv_rand; /* reset xhv_last_rand so we can detect inserts during traversal */
3079 #endif
3080 break;
3081 }
3082 entry = (HvARRAY(hv))[ PERL_HASH_ITER_BUCKET(iter) & max ];
3083
3084 if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) {
3085 /* If we have an entry, but it's a placeholder, don't count it.
3086 Try the next. */
3087 while (entry && HeVAL(entry) == &PL_sv_placeholder)
3088 entry = HeNEXT(entry);
3089 }
3090 /* Will loop again if this linked list starts NULL
3091 (for HV_ITERNEXT_WANTPLACEHOLDERS)
3092 or if we run through it and find only placeholders. */
3093 }
3094 }
3095 else {
3096 iter->xhv_riter = -1;
3097 #ifdef PERL_HASH_RANDOMIZE_KEYS
3098 iter->xhv_last_rand = iter->xhv_rand;
3099 #endif
3100 }
3101
3102 if (oldentry && HvLAZYDEL(hv)) { /* was deleted earlier? */
3103 HvLAZYDEL_off(hv);
3104 hv_free_ent(NULL, oldentry);
3105 }
3106
3107 iter->xhv_eiter = entry; /* HvEITER(hv) = entry */
3108 return entry;
3109 }
3110
3111 /*
3112 =for apidoc hv_iterkey
3113
3114 Returns the key from the current position of the hash iterator. See
3115 C<L</hv_iterinit>>.
3116
3117 =cut
3118 */
3119
3120 char *
Perl_hv_iterkey(pTHX_ HE * entry,I32 * retlen)3121 Perl_hv_iterkey(pTHX_ HE *entry, I32 *retlen)
3122 {
3123 PERL_ARGS_ASSERT_HV_ITERKEY;
3124
3125 if (HeKLEN(entry) == HEf_SVKEY) {
3126 STRLEN len;
3127 char * const p = SvPV(HeKEY_sv(entry), len);
3128 *retlen = len;
3129 return p;
3130 }
3131 else {
3132 *retlen = HeKLEN(entry);
3133 return HeKEY(entry);
3134 }
3135 }
3136
3137 /* unlike hv_iterval(), this always returns a mortal copy of the key */
3138 /*
3139 =for apidoc hv_iterkeysv
3140
3141 Returns the key as an C<SV*> from the current position of the hash
3142 iterator. The return value will always be a mortal copy of the key. Also
3143 see C<L</hv_iterinit>>.
3144
3145 =cut
3146 */
3147
3148 SV *
Perl_hv_iterkeysv(pTHX_ HE * entry)3149 Perl_hv_iterkeysv(pTHX_ HE *entry)
3150 {
3151 PERL_ARGS_ASSERT_HV_ITERKEYSV;
3152
3153 return newSVhek_mortal(HeKEY_hek(entry));
3154 }
3155
3156 /*
3157 =for apidoc hv_iterval
3158
3159 Returns the value from the current position of the hash iterator. See
3160 C<L</hv_iterkey>>.
3161
3162 =cut
3163 */
3164
3165 SV *
Perl_hv_iterval(pTHX_ HV * hv,HE * entry)3166 Perl_hv_iterval(pTHX_ HV *hv, HE *entry)
3167 {
3168 PERL_ARGS_ASSERT_HV_ITERVAL;
3169
3170 if (SvRMAGICAL(hv)) {
3171 if (mg_find((const SV *)hv, PERL_MAGIC_tied)) {
3172 SV* const sv = sv_newmortal();
3173 if (HeKLEN(entry) == HEf_SVKEY)
3174 mg_copy(MUTABLE_SV(hv), sv, (char*)HeKEY_sv(entry), HEf_SVKEY);
3175 else
3176 mg_copy(MUTABLE_SV(hv), sv, HeKEY(entry), HeKLEN(entry));
3177 return sv;
3178 }
3179 }
3180 return HeVAL(entry);
3181 }
3182
3183 /*
3184 =for apidoc hv_iternextsv
3185
3186 Performs an C<hv_iternext>, C<hv_iterkey>, and C<hv_iterval> in one
3187 operation.
3188
3189 =cut
3190 */
3191
3192 SV *
Perl_hv_iternextsv(pTHX_ HV * hv,char ** key,I32 * retlen)3193 Perl_hv_iternextsv(pTHX_ HV *hv, char **key, I32 *retlen)
3194 {
3195 HE * const he = hv_iternext_flags(hv, 0);
3196
3197 PERL_ARGS_ASSERT_HV_ITERNEXTSV;
3198
3199 if (!he)
3200 return NULL;
3201 *key = hv_iterkey(he, retlen);
3202 return hv_iterval(hv, he);
3203 }
3204
3205 /*
3206
3207 Now a macro in hv.h
3208
3209 =for apidoc hv_magic
3210
3211 Adds magic to a hash. See C<L</sv_magic>>.
3212
3213 =for apidoc unsharepvn
3214
3215 If no one has access to shared string C<str> with length C<len>, free it.
3216
3217 C<len> and C<hash> must both be valid for C<str>.
3218
3219 =cut
3220 */
3221
3222 void
Perl_unsharepvn(pTHX_ const char * str,I32 len,U32 hash)3223 Perl_unsharepvn(pTHX_ const char *str, I32 len, U32 hash)
3224 {
3225 unshare_hek_or_pvn (NULL, str, len, hash);
3226 }
3227
3228
3229 void
Perl_unshare_hek(pTHX_ HEK * hek)3230 Perl_unshare_hek(pTHX_ HEK *hek)
3231 {
3232 assert(hek);
3233 unshare_hek_or_pvn(hek, NULL, 0, 0);
3234 }
3235
3236 /* possibly free a shared string if no one has access to it
3237 hek if non-NULL takes priority over the other 3, else str, len and hash
3238 are used. If so, len and hash must both be valid for str.
3239 */
3240 STATIC void
S_unshare_hek_or_pvn(pTHX_ const HEK * hek,const char * str,I32 len,U32 hash)3241 S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash)
3242 {
3243 HE *entry;
3244 HE **oentry;
3245 bool is_utf8 = FALSE;
3246 int k_flags = 0;
3247 const char * const save = str;
3248 struct shared_he *he = NULL;
3249
3250 if (hek) {
3251 assert((HEK_FLAGS(hek) & HVhek_NOTSHARED) == 0);
3252 /* Find the shared he which is just before us in memory. */
3253 he = (struct shared_he *)(((char *)hek)
3254 - STRUCT_OFFSET(struct shared_he,
3255 shared_he_hek));
3256
3257 /* Assert that the caller passed us a genuine (or at least consistent)
3258 shared hek */
3259 assert (he->shared_he_he.hent_hek == hek);
3260
3261 if (he->shared_he_he.he_valu.hent_refcount - 1) {
3262 --he->shared_he_he.he_valu.hent_refcount;
3263 return;
3264 }
3265
3266 hash = HEK_HASH(hek);
3267 } else if (len < 0) {
3268 STRLEN tmplen = -len;
3269 is_utf8 = TRUE;
3270 /* See the note in hv_fetch(). --jhi */
3271 str = (char*)bytes_from_utf8((U8*)str, &tmplen, &is_utf8);
3272 len = tmplen;
3273 if (is_utf8)
3274 k_flags = HVhek_UTF8;
3275 if (str != save)
3276 k_flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
3277 }
3278
3279 /* what follows was the moral equivalent of:
3280 if ((Svp = hv_fetch(PL_strtab, tmpsv, FALSE, hash))) {
3281 if (--*Svp == NULL)
3282 hv_delete(PL_strtab, str, len, G_DISCARD, hash);
3283 } */
3284
3285 /* assert(xhv_array != 0) */
3286 oentry = &(HvARRAY(PL_strtab))[hash & (I32) HvMAX(PL_strtab)];
3287 if (he) {
3288 const HE *const he_he = &(he->shared_he_he);
3289 for (entry = *oentry; entry; oentry = &HeNEXT(entry), entry = *oentry) {
3290 if (entry == he_he)
3291 break;
3292 }
3293 } else {
3294 const U8 flags_masked = k_flags & HVhek_STORAGE_MASK;
3295 for (entry = *oentry; entry; oentry = &HeNEXT(entry), entry = *oentry) {
3296 if (HeHASH(entry) != hash) /* strings can't be equal */
3297 continue;
3298 if (HeKLEN(entry) != len)
3299 continue;
3300 if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */
3301 continue;
3302 if (HeKFLAGS(entry) != flags_masked)
3303 continue;
3304 break;
3305 }
3306 }
3307
3308 if (entry) {
3309 if (--entry->he_valu.hent_refcount == 0) {
3310 *oentry = HeNEXT(entry);
3311 Safefree(entry);
3312 HvTOTALKEYS(PL_strtab)--;
3313 }
3314 }
3315
3316 if (!entry)
3317 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
3318 "Attempt to free nonexistent shared string '%s'%s"
3319 pTHX__FORMAT,
3320 hek ? HEK_KEY(hek) : str,
3321 ((k_flags & HVhek_UTF8) ? " (utf8)" : "") pTHX__VALUE);
3322 if (k_flags & HVhek_FREEKEY)
3323 Safefree(str);
3324 }
3325
3326 /* get a (constant) string ptr from the global string table
3327 * string will get added if it is not already there.
3328 * len and hash must both be valid for str.
3329 */
3330 HEK *
Perl_share_hek(pTHX_ const char * str,SSize_t len,U32 hash)3331 Perl_share_hek(pTHX_ const char *str, SSize_t len, U32 hash)
3332 {
3333 bool is_utf8 = FALSE;
3334 int flags = 0;
3335 const char * const save = str;
3336
3337 PERL_ARGS_ASSERT_SHARE_HEK;
3338
3339 if (len < 0) {
3340 STRLEN tmplen = -len;
3341 is_utf8 = TRUE;
3342 /* See the note in hv_fetch(). --jhi */
3343 str = (char*)bytes_from_utf8((U8*)str, &tmplen, &is_utf8);
3344 len = tmplen;
3345 /* If we were able to downgrade here, then than means that we were passed
3346 in a key which only had chars 0-255, but was utf8 encoded. */
3347 if (is_utf8)
3348 flags = HVhek_UTF8;
3349 /* If we found we were able to downgrade the string to bytes, then
3350 we should flag that it needs upgrading on keys or each. Also flag
3351 that we need share_hek_flags to free the string. */
3352 if (str != save) {
3353 PERL_HASH(hash, str, len);
3354 flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
3355 }
3356 }
3357
3358 return share_hek_flags (str, len, hash, flags);
3359 }
3360
3361 STATIC HEK *
S_share_hek_flags(pTHX_ const char * str,STRLEN len,U32 hash,int flags)3362 S_share_hek_flags(pTHX_ const char *str, STRLEN len, U32 hash, int flags)
3363 {
3364 HE *entry;
3365 const U8 flags_masked = flags & HVhek_STORAGE_MASK;
3366 const U32 hindex = hash & (I32) HvMAX(PL_strtab);
3367
3368 PERL_ARGS_ASSERT_SHARE_HEK_FLAGS;
3369 assert(!(flags & HVhek_NOTSHARED));
3370
3371 if (UNLIKELY(len > (STRLEN) I32_MAX)) {
3372 Perl_croak_nocontext("Sorry, hash keys must be smaller than 2**31 bytes");
3373 }
3374
3375 /* what follows is the moral equivalent of:
3376
3377 if (!(Svp = hv_fetch(PL_strtab, str, len, FALSE)))
3378 hv_store(PL_strtab, str, len, NULL, hash);
3379
3380 Can't rehash the shared string table, so not sure if it's worth
3381 counting the number of entries in the linked list
3382 */
3383
3384 /* assert(xhv_array != 0) */
3385 entry = (HvARRAY(PL_strtab))[hindex];
3386 for (;entry; entry = HeNEXT(entry)) {
3387 if (HeHASH(entry) != hash) /* strings can't be equal */
3388 continue;
3389 if (HeKLEN(entry) != (SSize_t) len)
3390 continue;
3391 if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */
3392 continue;
3393 if (HeKFLAGS(entry) != flags_masked)
3394 continue;
3395 break;
3396 }
3397
3398 if (!entry) {
3399 /* What used to be head of the list.
3400 If this is NULL, then we're the first entry for this slot, which
3401 means we need to increase fill. */
3402 struct shared_he *new_entry;
3403 HEK *hek;
3404 char *k;
3405 HE **const head = &HvARRAY(PL_strtab)[hindex];
3406 HE *const next = *head;
3407 XPVHV * const xhv = (XPVHV*)SvANY(PL_strtab);
3408
3409 /* We don't actually store a HE from the arena and a regular HEK.
3410 Instead we allocate one chunk of memory big enough for both,
3411 and put the HEK straight after the HE. This way we can find the
3412 HE directly from the HEK.
3413 */
3414
3415 Newx(k, STRUCT_OFFSET(struct shared_he,
3416 shared_he_hek.hek_key[0]) + len + 2, char);
3417 new_entry = (struct shared_he *)k;
3418 entry = &(new_entry->shared_he_he);
3419 hek = &(new_entry->shared_he_hek);
3420
3421 Copy(str, HEK_KEY(hek), len, char);
3422 HEK_KEY(hek)[len] = 0;
3423 HEK_LEN(hek) = len;
3424 HEK_HASH(hek) = hash;
3425 HEK_FLAGS(hek) = (unsigned char)flags_masked;
3426
3427 /* Still "point" to the HEK, so that other code need not know what
3428 we're up to. */
3429 HeKEY_hek(entry) = hek;
3430 entry->he_valu.hent_refcount = 0;
3431 HeNEXT(entry) = next;
3432 *head = entry;
3433
3434 xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
3435 if (!next) { /* initial entry? */
3436 } else if ( DO_HSPLIT(xhv) ) {
3437 const STRLEN oldsize = xhv->xhv_max + 1;
3438 hsplit(PL_strtab, oldsize, oldsize * 2);
3439 }
3440 }
3441
3442 ++entry->he_valu.hent_refcount;
3443
3444 if (flags & HVhek_FREEKEY)
3445 Safefree(str);
3446
3447 return HeKEY_hek(entry);
3448 }
3449
3450 SSize_t *
Perl_hv_placeholders_p(pTHX_ HV * hv)3451 Perl_hv_placeholders_p(pTHX_ HV *hv)
3452 {
3453 MAGIC *mg = mg_find((const SV *)hv, PERL_MAGIC_rhash);
3454
3455 PERL_ARGS_ASSERT_HV_PLACEHOLDERS_P;
3456
3457 if (!mg) {
3458 mg = sv_magicext(MUTABLE_SV(hv), 0, PERL_MAGIC_rhash, 0, 0, 0);
3459
3460 if (!mg) {
3461 Perl_die(aTHX_ "panic: hv_placeholders_p");
3462 }
3463 }
3464 return &(mg->mg_len);
3465 }
3466
3467 /*
3468 =for apidoc hv_placeholders_get
3469
3470 Implements C<HvPLACEHOLDERS_get>, which you should use instead.
3471
3472 =cut
3473 */
3474
3475 I32
Perl_hv_placeholders_get(pTHX_ const HV * hv)3476 Perl_hv_placeholders_get(pTHX_ const HV *hv)
3477 {
3478 MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_rhash);
3479
3480 PERL_ARGS_ASSERT_HV_PLACEHOLDERS_GET;
3481 PERL_UNUSED_CONTEXT;
3482
3483 return mg ? mg->mg_len : 0;
3484 }
3485
3486 /*
3487 =for apidoc hv_placeholders_set
3488
3489 Implements C<HvPLACEHOLDERS_set>, which you should use instead.
3490
3491 =cut
3492 */
3493
3494 void
Perl_hv_placeholders_set(pTHX_ HV * hv,I32 ph)3495 Perl_hv_placeholders_set(pTHX_ HV *hv, I32 ph)
3496 {
3497 MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_rhash);
3498
3499 PERL_ARGS_ASSERT_HV_PLACEHOLDERS_SET;
3500
3501 if (mg) {
3502 mg->mg_len = ph;
3503 } else if (ph) {
3504 if (!sv_magicext(MUTABLE_SV(hv), 0, PERL_MAGIC_rhash, 0, 0, ph))
3505 Perl_die(aTHX_ "panic: hv_placeholders_set");
3506 }
3507 /* else we don't need to add magic to record 0 placeholders. */
3508 }
3509
3510 STATIC SV *
S_refcounted_he_value(pTHX_ const struct refcounted_he * he)3511 S_refcounted_he_value(pTHX_ const struct refcounted_he *he)
3512 {
3513 SV *value;
3514
3515 PERL_ARGS_ASSERT_REFCOUNTED_HE_VALUE;
3516
3517 switch(he->refcounted_he_data[0] & HVrhek_typemask) {
3518 case HVrhek_undef:
3519 value = newSV_type(SVt_NULL);
3520 break;
3521 case HVrhek_delete:
3522 value = &PL_sv_placeholder;
3523 break;
3524 case HVrhek_IV:
3525 value = newSViv(he->refcounted_he_val.refcounted_he_u_iv);
3526 break;
3527 case HVrhek_UV:
3528 value = newSVuv(he->refcounted_he_val.refcounted_he_u_uv);
3529 break;
3530 case HVrhek_PV:
3531 case HVrhek_PV_UTF8:
3532 /* Create a string SV that directly points to the bytes in our
3533 structure. */
3534 value = newSV_type(SVt_PV);
3535 SvPV_set(value, (char *) he->refcounted_he_data + 1);
3536 SvCUR_set(value, he->refcounted_he_val.refcounted_he_u_len);
3537 /* This stops anything trying to free it */
3538 SvLEN_set(value, 0);
3539 SvPOK_on(value);
3540 SvREADONLY_on(value);
3541 if ((he->refcounted_he_data[0] & HVrhek_typemask) == HVrhek_PV_UTF8)
3542 SvUTF8_on(value);
3543 break;
3544 default:
3545 Perl_croak(aTHX_ "panic: refcounted_he_value bad flags %" UVxf,
3546 (UV)he->refcounted_he_data[0]);
3547 }
3548 return value;
3549 }
3550
3551 /*
3552 =for apidoc refcounted_he_chain_2hv
3553
3554 Generates and returns a C<HV *> representing the content of a
3555 C<refcounted_he> chain.
3556 C<flags> is currently unused and must be zero.
3557
3558 =cut
3559 */
3560 HV *
Perl_refcounted_he_chain_2hv(pTHX_ const struct refcounted_he * chain,U32 flags)3561 Perl_refcounted_he_chain_2hv(pTHX_ const struct refcounted_he *chain, U32 flags)
3562 {
3563 HV *hv;
3564 U32 placeholders, max;
3565
3566 if (flags)
3567 Perl_croak(aTHX_ "panic: refcounted_he_chain_2hv bad flags %" UVxf,
3568 (UV)flags);
3569
3570 /* We could chase the chain once to get an idea of the number of keys,
3571 and call ksplit. But for now we'll make a potentially inefficient
3572 hash with only 8 entries in its array. */
3573 hv = newHV();
3574 #ifdef NODEFAULT_SHAREKEYS
3575 /* We share keys in the COP, so it's much easier to keep sharing keys in
3576 the hash we build from it. */
3577 HvSHAREKEYS_on(hv);
3578 #endif
3579 max = HvMAX(hv);
3580 if (!HvARRAY(hv)) {
3581 char *array;
3582 Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(max + 1), char);
3583 HvARRAY(hv) = (HE**)array;
3584 }
3585
3586 placeholders = 0;
3587 while (chain) {
3588 #ifdef USE_ITHREADS
3589 U32 hash = chain->refcounted_he_hash;
3590 #else
3591 U32 hash = HEK_HASH(chain->refcounted_he_hek);
3592 #endif
3593 HE **oentry = &((HvARRAY(hv))[hash & max]);
3594 HE *entry = *oentry;
3595 SV *value;
3596
3597 for (; entry; entry = HeNEXT(entry)) {
3598 if (HeHASH(entry) == hash) {
3599 /* We might have a duplicate key here. If so, entry is older
3600 than the key we've already put in the hash, so if they are
3601 the same, skip adding entry. */
3602 #ifdef USE_ITHREADS
3603 const STRLEN klen = HeKLEN(entry);
3604 const char *const key = HeKEY(entry);
3605 if (klen == chain->refcounted_he_keylen
3606 && (cBOOL(HeKUTF8(entry))
3607 == cBOOL((chain->refcounted_he_data[0] & HVhek_UTF8)))
3608 && memEQ(key, REF_HE_KEY(chain), klen))
3609 goto next_please;
3610 #else
3611 if (HeKEY_hek(entry) == chain->refcounted_he_hek)
3612 goto next_please;
3613 if (HeKLEN(entry) == HEK_LEN(chain->refcounted_he_hek)
3614 && HeKUTF8(entry) == HEK_UTF8(chain->refcounted_he_hek)
3615 && memEQ(HeKEY(entry), HEK_KEY(chain->refcounted_he_hek),
3616 HeKLEN(entry)))
3617 goto next_please;
3618 #endif
3619 }
3620 }
3621 assert (!entry);
3622 entry = new_HE();
3623
3624 #ifdef USE_ITHREADS
3625 HeKEY_hek(entry)
3626 = share_hek_flags(REF_HE_KEY(chain),
3627 chain->refcounted_he_keylen,
3628 chain->refcounted_he_hash,
3629 (chain->refcounted_he_data[0]
3630 & (HVhek_UTF8|HVhek_WASUTF8)));
3631 #else
3632 HeKEY_hek(entry) = share_hek_hek(chain->refcounted_he_hek);
3633 #endif
3634 value = refcounted_he_value(chain);
3635 if (value == &PL_sv_placeholder)
3636 placeholders++;
3637 HeVAL(entry) = value;
3638
3639 /* Link it into the chain. */
3640 HeNEXT(entry) = *oentry;
3641 *oentry = entry;
3642
3643 HvTOTALKEYS(hv)++;
3644
3645 next_please:
3646 chain = chain->refcounted_he_next;
3647 }
3648
3649 if (placeholders) {
3650 clear_placeholders(hv, placeholders);
3651 }
3652
3653 /* We could check in the loop to see if we encounter any keys with key
3654 flags, but it's probably not worth it, as this per-hash flag is only
3655 really meant as an optimisation for things like Storable. */
3656 HvHASKFLAGS_on(hv);
3657 DEBUG_A(Perl_hv_assert(aTHX_ hv));
3658
3659 return hv;
3660 }
3661
3662 /*
3663 =for apidoc refcounted_he_fetch_pvn
3664
3665 Search along a C<refcounted_he> chain for an entry with the key specified
3666 by C<keypv> and C<keylen>. If C<flags> has the C<REFCOUNTED_HE_KEY_UTF8>
3667 bit set, the key octets are interpreted as UTF-8, otherwise they
3668 are interpreted as Latin-1. C<hash> is a precomputed hash of the key
3669 string, or zero if it has not been precomputed. Returns a mortal scalar
3670 representing the value associated with the key, or C<&PL_sv_placeholder>
3671 if there is no value associated with the key.
3672
3673 =cut
3674 */
3675
3676 SV *
Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he * chain,const char * keypv,STRLEN keylen,U32 hash,U32 flags)3677 Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
3678 const char *keypv, STRLEN keylen, U32 hash, U32 flags)
3679 {
3680 U8 utf8_flag;
3681 PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_PVN;
3682
3683 if (flags & ~(REFCOUNTED_HE_KEY_UTF8|REFCOUNTED_HE_EXISTS))
3684 Perl_croak(aTHX_ "panic: refcounted_he_fetch_pvn bad flags %" UVxf,
3685 (UV)flags);
3686 if (!chain)
3687 goto ret;
3688 if (flags & REFCOUNTED_HE_KEY_UTF8) {
3689 /* For searching purposes, canonicalise to Latin-1 where possible. */
3690 const char *keyend = keypv + keylen, *p;
3691 STRLEN nonascii_count = 0;
3692 for (p = keypv; p != keyend; p++) {
3693 if (! UTF8_IS_INVARIANT(*p)) {
3694 if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) {
3695 goto canonicalised_key;
3696 }
3697 nonascii_count++;
3698 p++;
3699 }
3700 }
3701 if (nonascii_count) {
3702 char *q;
3703 const char *p = keypv, *keyend = keypv + keylen;
3704 keylen -= nonascii_count;
3705 Newx(q, keylen, char);
3706 SAVEFREEPV(q);
3707 keypv = q;
3708 for (; p != keyend; p++, q++) {
3709 U8 c = (U8)*p;
3710 if (UTF8_IS_INVARIANT(c)) {
3711 *q = (char) c;
3712 }
3713 else {
3714 p++;
3715 *q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p);
3716 }
3717 }
3718 }
3719 flags &= ~REFCOUNTED_HE_KEY_UTF8;
3720 canonicalised_key: ;
3721 }
3722 utf8_flag = (flags & REFCOUNTED_HE_KEY_UTF8) ? HVhek_UTF8 : 0;
3723 if (!hash)
3724 PERL_HASH(hash, keypv, keylen);
3725
3726 for (; chain; chain = chain->refcounted_he_next) {
3727 if (
3728 #ifdef USE_ITHREADS
3729 hash == chain->refcounted_he_hash &&
3730 keylen == chain->refcounted_he_keylen &&
3731 memEQ(REF_HE_KEY(chain), keypv, keylen) &&
3732 utf8_flag == (chain->refcounted_he_data[0] & HVhek_UTF8)
3733 #else
3734 hash == HEK_HASH(chain->refcounted_he_hek) &&
3735 keylen == (STRLEN)HEK_LEN(chain->refcounted_he_hek) &&
3736 memEQ(HEK_KEY(chain->refcounted_he_hek), keypv, keylen) &&
3737 utf8_flag == (HEK_FLAGS(chain->refcounted_he_hek) & HVhek_UTF8)
3738 #endif
3739 ) {
3740 if (flags & REFCOUNTED_HE_EXISTS)
3741 return (chain->refcounted_he_data[0] & HVrhek_typemask)
3742 == HVrhek_delete
3743 ? NULL : &PL_sv_yes;
3744 return sv_2mortal(refcounted_he_value(chain));
3745 }
3746 }
3747 ret:
3748 return flags & REFCOUNTED_HE_EXISTS ? NULL : &PL_sv_placeholder;
3749 }
3750
3751 /*
3752 =for apidoc refcounted_he_fetch_pv
3753
3754 Like L</refcounted_he_fetch_pvn>, but takes a nul-terminated string
3755 instead of a string/length pair.
3756
3757 =cut
3758 */
3759
3760 SV *
Perl_refcounted_he_fetch_pv(pTHX_ const struct refcounted_he * chain,const char * key,U32 hash,U32 flags)3761 Perl_refcounted_he_fetch_pv(pTHX_ const struct refcounted_he *chain,
3762 const char *key, U32 hash, U32 flags)
3763 {
3764 PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_PV;
3765 return refcounted_he_fetch_pvn(chain, key, strlen(key), hash, flags);
3766 }
3767
3768 /*
3769 =for apidoc refcounted_he_fetch_sv
3770
3771 Like L</refcounted_he_fetch_pvn>, but takes a Perl scalar instead of a
3772 string/length pair.
3773
3774 =cut
3775 */
3776
3777 SV *
Perl_refcounted_he_fetch_sv(pTHX_ const struct refcounted_he * chain,SV * key,U32 hash,U32 flags)3778 Perl_refcounted_he_fetch_sv(pTHX_ const struct refcounted_he *chain,
3779 SV *key, U32 hash, U32 flags)
3780 {
3781 const char *keypv;
3782 STRLEN keylen;
3783 PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_SV;
3784 if (flags & REFCOUNTED_HE_KEY_UTF8)
3785 Perl_croak(aTHX_ "panic: refcounted_he_fetch_sv bad flags %" UVxf,
3786 (UV)flags);
3787 keypv = SvPV_const(key, keylen);
3788 if (SvUTF8(key))
3789 flags |= REFCOUNTED_HE_KEY_UTF8;
3790 if (!hash && SvIsCOW_shared_hash(key))
3791 hash = SvSHARED_HASH(key);
3792 return refcounted_he_fetch_pvn(chain, keypv, keylen, hash, flags);
3793 }
3794
3795 /*
3796 =for apidoc refcounted_he_new_pvn
3797
3798 Creates a new C<refcounted_he>. This consists of a single key/value
3799 pair and a reference to an existing C<refcounted_he> chain (which may
3800 be empty), and thus forms a longer chain. When using the longer chain,
3801 the new key/value pair takes precedence over any entry for the same key
3802 further along the chain.
3803
3804 The new key is specified by C<keypv> and C<keylen>. If C<flags> has
3805 the C<REFCOUNTED_HE_KEY_UTF8> bit set, the key octets are interpreted
3806 as UTF-8, otherwise they are interpreted as Latin-1. C<hash> is
3807 a precomputed hash of the key string, or zero if it has not been
3808 precomputed.
3809
3810 C<value> is the scalar value to store for this key. C<value> is copied
3811 by this function, which thus does not take ownership of any reference
3812 to it, and later changes to the scalar will not be reflected in the
3813 value visible in the C<refcounted_he>. Complex types of scalar will not
3814 be stored with referential integrity, but will be coerced to strings.
3815 C<value> may be either null or C<&PL_sv_placeholder> to indicate that no
3816 value is to be associated with the key; this, as with any non-null value,
3817 takes precedence over the existence of a value for the key further along
3818 the chain.
3819
3820 C<parent> points to the rest of the C<refcounted_he> chain to be
3821 attached to the new C<refcounted_he>. This function takes ownership
3822 of one reference to C<parent>, and returns one reference to the new
3823 C<refcounted_he>.
3824
3825 =cut
3826 */
3827
3828 struct refcounted_he *
Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he * parent,const char * keypv,STRLEN keylen,U32 hash,SV * value,U32 flags)3829 Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,
3830 const char *keypv, STRLEN keylen, U32 hash, SV *value, U32 flags)
3831 {
3832 STRLEN value_len = 0;
3833 const char *value_p = NULL;
3834 bool is_pv;
3835 char value_type;
3836 char hekflags;
3837 STRLEN key_offset = 1;
3838 struct refcounted_he *he;
3839 PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_PVN;
3840
3841 if (!value || value == &PL_sv_placeholder) {
3842 value_type = HVrhek_delete;
3843 } else if (SvPOK(value)) {
3844 value_type = HVrhek_PV;
3845 } else if (SvIOK(value)) {
3846 value_type = SvUOK((const SV *)value) ? HVrhek_UV : HVrhek_IV;
3847 } else if (!SvOK(value)) {
3848 value_type = HVrhek_undef;
3849 } else {
3850 value_type = HVrhek_PV;
3851 }
3852 is_pv = value_type == HVrhek_PV;
3853 if (is_pv) {
3854 /* Do it this way so that the SvUTF8() test is after the SvPV, in case
3855 the value is overloaded, and doesn't yet have the UTF-8flag set. */
3856 value_p = SvPV_const(value, value_len);
3857 if (SvUTF8(value))
3858 value_type = HVrhek_PV_UTF8;
3859 key_offset = value_len + 2;
3860 }
3861 hekflags = value_type;
3862
3863 if (flags & REFCOUNTED_HE_KEY_UTF8) {
3864 /* Canonicalise to Latin-1 where possible. */
3865 const char *keyend = keypv + keylen, *p;
3866 STRLEN nonascii_count = 0;
3867 for (p = keypv; p != keyend; p++) {
3868 if (! UTF8_IS_INVARIANT(*p)) {
3869 if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) {
3870 goto canonicalised_key;
3871 }
3872 nonascii_count++;
3873 p++;
3874 }
3875 }
3876 if (nonascii_count) {
3877 char *q;
3878 const char *p = keypv, *keyend = keypv + keylen;
3879 keylen -= nonascii_count;
3880 Newx(q, keylen, char);
3881 SAVEFREEPV(q);
3882 keypv = q;
3883 for (; p != keyend; p++, q++) {
3884 U8 c = (U8)*p;
3885 if (UTF8_IS_INVARIANT(c)) {
3886 *q = (char) c;
3887 }
3888 else {
3889 p++;
3890 *q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p);
3891 }
3892 }
3893 }
3894 flags &= ~REFCOUNTED_HE_KEY_UTF8;
3895 canonicalised_key: ;
3896 }
3897 if (flags & REFCOUNTED_HE_KEY_UTF8)
3898 hekflags |= HVhek_UTF8;
3899 if (!hash)
3900 PERL_HASH(hash, keypv, keylen);
3901
3902 #ifdef USE_ITHREADS
3903 he = (struct refcounted_he*)
3904 PerlMemShared_malloc(sizeof(struct refcounted_he) - 1
3905 + keylen
3906 + key_offset);
3907 #else
3908 he = (struct refcounted_he*)
3909 PerlMemShared_malloc(sizeof(struct refcounted_he) - 1
3910 + key_offset);
3911 #endif
3912
3913 he->refcounted_he_next = parent;
3914
3915 if (is_pv) {
3916 Copy(value_p, he->refcounted_he_data + 1, value_len + 1, char);
3917 he->refcounted_he_val.refcounted_he_u_len = value_len;
3918 } else if (value_type == HVrhek_IV) {
3919 he->refcounted_he_val.refcounted_he_u_iv = SvIVX(value);
3920 } else if (value_type == HVrhek_UV) {
3921 he->refcounted_he_val.refcounted_he_u_uv = SvUVX(value);
3922 }
3923
3924 #ifdef USE_ITHREADS
3925 he->refcounted_he_hash = hash;
3926 he->refcounted_he_keylen = keylen;
3927 Copy(keypv, he->refcounted_he_data + key_offset, keylen, char);
3928 #else
3929 he->refcounted_he_hek = share_hek_flags(keypv, keylen, hash, hekflags);
3930 #endif
3931
3932 he->refcounted_he_data[0] = hekflags;
3933 he->refcounted_he_refcnt = 1;
3934
3935 return he;
3936 }
3937
3938 /*
3939 =for apidoc refcounted_he_new_pv
3940
3941 Like L</refcounted_he_new_pvn>, but takes a nul-terminated string instead
3942 of a string/length pair.
3943
3944 =cut
3945 */
3946
3947 struct refcounted_he *
Perl_refcounted_he_new_pv(pTHX_ struct refcounted_he * parent,const char * key,U32 hash,SV * value,U32 flags)3948 Perl_refcounted_he_new_pv(pTHX_ struct refcounted_he *parent,
3949 const char *key, U32 hash, SV *value, U32 flags)
3950 {
3951 PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_PV;
3952 return refcounted_he_new_pvn(parent, key, strlen(key), hash, value, flags);
3953 }
3954
3955 /*
3956 =for apidoc refcounted_he_new_sv
3957
3958 Like L</refcounted_he_new_pvn>, but takes a Perl scalar instead of a
3959 string/length pair.
3960
3961 =cut
3962 */
3963
3964 struct refcounted_he *
Perl_refcounted_he_new_sv(pTHX_ struct refcounted_he * parent,SV * key,U32 hash,SV * value,U32 flags)3965 Perl_refcounted_he_new_sv(pTHX_ struct refcounted_he *parent,
3966 SV *key, U32 hash, SV *value, U32 flags)
3967 {
3968 const char *keypv;
3969 STRLEN keylen;
3970 PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_SV;
3971 if (flags & REFCOUNTED_HE_KEY_UTF8)
3972 Perl_croak(aTHX_ "panic: refcounted_he_new_sv bad flags %" UVxf,
3973 (UV)flags);
3974 keypv = SvPV_const(key, keylen);
3975 if (SvUTF8(key))
3976 flags |= REFCOUNTED_HE_KEY_UTF8;
3977 if (!hash && SvIsCOW_shared_hash(key))
3978 hash = SvSHARED_HASH(key);
3979 return refcounted_he_new_pvn(parent, keypv, keylen, hash, value, flags);
3980 }
3981
3982 /*
3983 =for apidoc refcounted_he_free
3984
3985 Decrements the reference count of a C<refcounted_he> by one. If the
3986 reference count reaches zero the structure's memory is freed, which
3987 (recursively) causes a reduction of its parent C<refcounted_he>'s
3988 reference count. It is safe to pass a null pointer to this function:
3989 no action occurs in this case.
3990
3991 =cut
3992 */
3993
3994 void
Perl_refcounted_he_free(pTHX_ struct refcounted_he * he)3995 Perl_refcounted_he_free(pTHX_ struct refcounted_he *he) {
3996 PERL_UNUSED_CONTEXT;
3997
3998 while (he) {
3999 struct refcounted_he *copy;
4000 U32 new_count;
4001
4002 HINTS_REFCNT_LOCK;
4003 new_count = --he->refcounted_he_refcnt;
4004 HINTS_REFCNT_UNLOCK;
4005
4006 if (new_count) {
4007 return;
4008 }
4009
4010 #ifndef USE_ITHREADS
4011 unshare_hek_or_pvn (he->refcounted_he_hek, 0, 0, 0);
4012 #endif
4013 copy = he;
4014 he = he->refcounted_he_next;
4015 PerlMemShared_free(copy);
4016 }
4017 }
4018
4019 /*
4020 =for apidoc refcounted_he_inc
4021
4022 Increment the reference count of a C<refcounted_he>. The pointer to the
4023 C<refcounted_he> is also returned. It is safe to pass a null pointer
4024 to this function: no action occurs and a null pointer is returned.
4025
4026 =cut
4027 */
4028
4029 struct refcounted_he *
Perl_refcounted_he_inc(pTHX_ struct refcounted_he * he)4030 Perl_refcounted_he_inc(pTHX_ struct refcounted_he *he)
4031 {
4032 PERL_UNUSED_CONTEXT;
4033 if (he) {
4034 HINTS_REFCNT_LOCK;
4035 he->refcounted_he_refcnt++;
4036 HINTS_REFCNT_UNLOCK;
4037 }
4038 return he;
4039 }
4040
4041 /*
4042 =for apidoc_section $COP
4043 =for apidoc cop_fetch_label
4044
4045 Returns the label attached to a cop, and stores its length in bytes into
4046 C<*len>.
4047 Upon return, C<*flags> will be set to either C<SVf_UTF8> or 0.
4048
4049 Alternatively, use the macro C<L</CopLABEL_len_flags>>;
4050 or if you don't need to know if the label is UTF-8 or not, the macro
4051 C<L</CopLABEL_len>>;
4052 or if you additionally don't need to know the length, C<L</CopLABEL>>.
4053
4054 =cut
4055 */
4056
4057 /* pp_entereval is aware that labels are stored with a key ':' at the top of
4058 the linked list. */
4059 const char *
Perl_cop_fetch_label(pTHX_ COP * const cop,STRLEN * len,U32 * flags)4060 Perl_cop_fetch_label(pTHX_ COP *const cop, STRLEN *len, U32 *flags) {
4061 struct refcounted_he *const chain = cop->cop_hints_hash;
4062
4063 PERL_ARGS_ASSERT_COP_FETCH_LABEL;
4064 PERL_UNUSED_CONTEXT;
4065
4066 if (!chain)
4067 return NULL;
4068 #ifdef USE_ITHREADS
4069 if (chain->refcounted_he_keylen != 1)
4070 return NULL;
4071 if (*REF_HE_KEY(chain) != ':')
4072 return NULL;
4073 #else
4074 if ((STRLEN)HEK_LEN(chain->refcounted_he_hek) != 1)
4075 return NULL;
4076 if (*HEK_KEY(chain->refcounted_he_hek) != ':')
4077 return NULL;
4078 #endif
4079 /* Stop anyone trying to really mess us up by adding their own value for
4080 ':' into %^H */
4081 if ((chain->refcounted_he_data[0] & HVrhek_typemask) != HVrhek_PV
4082 && (chain->refcounted_he_data[0] & HVrhek_typemask) != HVrhek_PV_UTF8)
4083 return NULL;
4084
4085 if (len)
4086 *len = chain->refcounted_he_val.refcounted_he_u_len;
4087 if (flags) {
4088 *flags = ((chain->refcounted_he_data[0] & HVrhek_typemask)
4089 == HVrhek_PV_UTF8) ? SVf_UTF8 : 0;
4090 }
4091 return chain->refcounted_he_data + 1;
4092 }
4093
4094 /*
4095 =for apidoc cop_store_label
4096
4097 Save a label into a C<cop_hints_hash>.
4098 You need to set flags to C<SVf_UTF8>
4099 for a UTF-8 label. Any other flag is ignored.
4100
4101 =cut
4102 */
4103
4104 void
Perl_cop_store_label(pTHX_ COP * const cop,const char * label,STRLEN len,U32 flags)4105 Perl_cop_store_label(pTHX_ COP *const cop, const char *label, STRLEN len,
4106 U32 flags)
4107 {
4108 SV *labelsv;
4109 PERL_ARGS_ASSERT_COP_STORE_LABEL;
4110
4111 if (flags & ~(SVf_UTF8))
4112 Perl_croak(aTHX_ "panic: cop_store_label illegal flag bits 0x%" UVxf,
4113 (UV)flags);
4114 labelsv = newSVpvn_flags(label, len, SVs_TEMP);
4115 if (flags & SVf_UTF8)
4116 SvUTF8_on(labelsv);
4117 cop->cop_hints_hash
4118 = refcounted_he_new_pvs(cop->cop_hints_hash, ":", labelsv, 0);
4119 }
4120
4121 /*
4122 =for apidoc_section $HV
4123 =for apidoc hv_assert
4124
4125 Check that a hash is in an internally consistent state.
4126
4127 =cut
4128 */
4129
4130 #ifdef DEBUGGING
4131
4132 void
Perl_hv_assert(pTHX_ HV * hv)4133 Perl_hv_assert(pTHX_ HV *hv)
4134 {
4135 HE* entry;
4136 int withflags = 0;
4137 int placeholders = 0;
4138 int real = 0;
4139 int bad = 0;
4140 const I32 riter = HvRITER_get(hv);
4141 HE *eiter = HvEITER_get(hv);
4142
4143 PERL_ARGS_ASSERT_HV_ASSERT;
4144
4145 (void)hv_iterinit(hv);
4146
4147 while ((entry = hv_iternext_flags(hv, HV_ITERNEXT_WANTPLACEHOLDERS))) {
4148 /* sanity check the values */
4149 if (HeVAL(entry) == &PL_sv_placeholder)
4150 placeholders++;
4151 else
4152 real++;
4153 /* sanity check the keys */
4154 if (HeSVKEY(entry)) {
4155 NOOP; /* Don't know what to check on SV keys. */
4156 } else if (HeKUTF8(entry)) {
4157 withflags++;
4158 if (HeKWASUTF8(entry)) {
4159 PerlIO_printf(Perl_debug_log,
4160 "hash key has both WASUTF8 and UTF8: '%.*s'\n",
4161 (int) HeKLEN(entry), HeKEY(entry));
4162 bad = 1;
4163 }
4164 } else if (HeKWASUTF8(entry))
4165 withflags++;
4166 }
4167 if (!SvTIED_mg((const SV *)hv, PERL_MAGIC_tied)) {
4168 static const char bad_count[] = "Count %d %s(s), but hash reports %d\n";
4169 const int nhashkeys = HvUSEDKEYS(hv);
4170 const int nhashplaceholders = HvPLACEHOLDERS_get(hv);
4171
4172 if (nhashkeys != real) {
4173 PerlIO_printf(Perl_debug_log, bad_count, real, "keys", nhashkeys );
4174 bad = 1;
4175 }
4176 if (nhashplaceholders != placeholders) {
4177 PerlIO_printf(Perl_debug_log, bad_count, placeholders, "placeholder", nhashplaceholders );
4178 bad = 1;
4179 }
4180 }
4181 if (withflags && ! HvHASKFLAGS(hv)) {
4182 PerlIO_printf(Perl_debug_log,
4183 "Hash has HASKFLAGS off but I count %d key(s) with flags\n",
4184 withflags);
4185 bad = 1;
4186 }
4187 if (bad) {
4188 sv_dump(MUTABLE_SV(hv));
4189 }
4190 HvRITER_set(hv, riter); /* Restore hash iterator state */
4191 HvEITER_set(hv, eiter);
4192 }
4193
4194 #endif
4195
4196 /*
4197 * ex: set ts=8 sts=4 sw=4 et:
4198 */
4199