xref: /openbsd/gnu/usr.bin/perl/ext/Hash-Util/Util.xs (revision e0680481)
1898184e3Ssthen #define PERL_NO_GET_CONTEXT
2898184e3Ssthen 
343003dfeSmillert #include "EXTERN.h"
443003dfeSmillert #include "perl.h"
543003dfeSmillert #include "XSUB.h"
643003dfeSmillert 
743003dfeSmillert MODULE = Hash::Util		PACKAGE = Hash::Util
843003dfeSmillert 
9898184e3Ssthen void
109f11ffb7Safresh1 _clear_placeholders(hashref)
119f11ffb7Safresh1         HV *hashref
129f11ffb7Safresh1     PROTOTYPE: \%
139f11ffb7Safresh1     PREINIT:
149f11ffb7Safresh1         HV *hv;
159f11ffb7Safresh1     CODE:
169f11ffb7Safresh1         hv = MUTABLE_HV(hashref);
179f11ffb7Safresh1         hv_clear_placeholders(hv);
189f11ffb7Safresh1 
199f11ffb7Safresh1 void
2043003dfeSmillert all_keys(hash,keys,placeholder)
21898184e3Ssthen 	HV *hash
22898184e3Ssthen 	AV *keys
23898184e3Ssthen 	AV *placeholder
2443003dfeSmillert     PROTOTYPE: \%\@\@
2543003dfeSmillert     PREINIT:
2643003dfeSmillert         SV *key;
2743003dfeSmillert         HE *he;
28898184e3Ssthen     PPCODE:
29898184e3Ssthen         av_clear(keys);
30898184e3Ssthen         av_clear(placeholder);
3143003dfeSmillert 
32898184e3Ssthen         (void)hv_iterinit(hash);
33898184e3Ssthen 	while((he = hv_iternext_flags(hash, HV_ITERNEXT_WANTPLACEHOLDERS))!= NULL) {
3443003dfeSmillert 	    key=hv_iterkeysv(he);
35898184e3Ssthen 	    av_push(HeVAL(he) == &PL_sv_placeholder ? placeholder : keys,
36898184e3Ssthen 		    SvREFCNT_inc(key));
3743003dfeSmillert         }
38898184e3Ssthen 	XSRETURN(1);
3943003dfeSmillert 
4043003dfeSmillert void
4143003dfeSmillert hidden_ref_keys(hash)
42898184e3Ssthen 	HV *hash
43898184e3Ssthen     ALIAS:
44898184e3Ssthen 	Hash::Util::legal_ref_keys = 1
4543003dfeSmillert     PREINIT:
4643003dfeSmillert         SV *key;
4743003dfeSmillert         HE *he;
4843003dfeSmillert     PPCODE:
49898184e3Ssthen         (void)hv_iterinit(hash);
50898184e3Ssthen 	while((he = hv_iternext_flags(hash, HV_ITERNEXT_WANTPLACEHOLDERS))!= NULL) {
5143003dfeSmillert 	    key=hv_iterkeysv(he);
52898184e3Ssthen             if (ix || HeVAL(he) == &PL_sv_placeholder) {
5343003dfeSmillert                 XPUSHs( key );
5443003dfeSmillert             }
5543003dfeSmillert         }
5643003dfeSmillert 
5743003dfeSmillert void
58898184e3Ssthen hv_store(hash, key, val)
59898184e3Ssthen 	HV *hash
6043003dfeSmillert 	SV* key
6143003dfeSmillert 	SV* val
6243003dfeSmillert     PROTOTYPE: \%$$
6343003dfeSmillert     CODE:
6443003dfeSmillert     {
6543003dfeSmillert         SvREFCNT_inc(val);
66898184e3Ssthen 	if (!hv_store_ent(hash, key, val, 0)) {
6743003dfeSmillert 	    SvREFCNT_dec(val);
6843003dfeSmillert 	    XSRETURN_NO;
6943003dfeSmillert 	} else {
7043003dfeSmillert 	    XSRETURN_YES;
7143003dfeSmillert 	}
7243003dfeSmillert     }
7391f110e0Safresh1 
7491f110e0Safresh1 void
7591f110e0Safresh1 hash_seed()
7691f110e0Safresh1     PROTOTYPE:
7791f110e0Safresh1     PPCODE:
7891f110e0Safresh1     mXPUSHs(newSVpvn((char *)PERL_HASH_SEED,PERL_HASH_SEED_BYTES));
7991f110e0Safresh1     XSRETURN(1);
8091f110e0Safresh1 
8191f110e0Safresh1 
8291f110e0Safresh1 void
hash_value(string,...)83b8851fccSafresh1 hash_value(string,...)
8491f110e0Safresh1         SV* string
85b8851fccSafresh1     PROTOTYPE: $;$
8691f110e0Safresh1     PPCODE:
87b8851fccSafresh1 {
8891f110e0Safresh1     UV uv;
89b8851fccSafresh1     STRLEN len;
90b8851fccSafresh1     char *pv= SvPV(string,len);
91b8851fccSafresh1     if (items<2) {
9291f110e0Safresh1         PERL_HASH(uv, pv, len);
93b8851fccSafresh1     } else {
94b8851fccSafresh1         STRLEN seedlen;
95b8851fccSafresh1         U8 *seedbuf= (U8 *)SvPV(ST(1),seedlen);
96b8851fccSafresh1         if ( seedlen < PERL_HASH_SEED_BYTES ) {
97b8851fccSafresh1             sv_dump(ST(1));
98eac174f2Safresh1             Perl_croak(aTHX_ "seed len must be at least %" UVuf " long only got %"
99eac174f2Safresh1                              UVuf " bytes", (UV)PERL_HASH_SEED_BYTES, (UV)seedlen);
100b8851fccSafresh1         }
101b8851fccSafresh1 
102b8851fccSafresh1         PERL_HASH_WITH_SEED(seedbuf, uv, pv, len);
103b8851fccSafresh1     }
10491f110e0Safresh1     XSRETURN_UV(uv);
105b8851fccSafresh1 }
10691f110e0Safresh1 
10791f110e0Safresh1 void
hash_traversal_mask(rhv,...)10891f110e0Safresh1 hash_traversal_mask(rhv, ...)
10991f110e0Safresh1         SV* rhv
11091f110e0Safresh1     PPCODE:
11191f110e0Safresh1 {
11291f110e0Safresh1 #ifdef PERL_HASH_RANDOMIZE_KEYS
11391f110e0Safresh1     if (SvROK(rhv) && SvTYPE(SvRV(rhv))==SVt_PVHV && !SvMAGICAL(SvRV(rhv))) {
11491f110e0Safresh1         HV *hv = (HV *)SvRV(rhv);
11591f110e0Safresh1         if (items>1) {
11691f110e0Safresh1             hv_rand_set(hv, SvUV(ST(1)));
11791f110e0Safresh1         }
118*e0680481Safresh1         if (HvHasAUX(hv)) {
11991f110e0Safresh1             XSRETURN_UV(HvRAND_get(hv));
12091f110e0Safresh1         } else {
12191f110e0Safresh1             XSRETURN_UNDEF;
12291f110e0Safresh1         }
12391f110e0Safresh1     }
12491f110e0Safresh1 #else
12591f110e0Safresh1     Perl_croak(aTHX_ "Perl has not been compiled with support for randomized hash key traversal");
12691f110e0Safresh1 #endif
12791f110e0Safresh1 }
12891f110e0Safresh1 
12991f110e0Safresh1 void
bucket_info(rhv)13091f110e0Safresh1 bucket_info(rhv)
13191f110e0Safresh1         SV* rhv
13291f110e0Safresh1     PPCODE:
13391f110e0Safresh1 {
13491f110e0Safresh1     /*
13591f110e0Safresh1 
13691f110e0Safresh1     Takes a non-magical hash ref as an argument and returns a list of
13791f110e0Safresh1     statistics about the hash. The number and keys and the size of the
13891f110e0Safresh1     array will always be reported as the first two values. If the array is
13991f110e0Safresh1     actually allocated (they are lazily allocated), then additionally
14091f110e0Safresh1     will return a list of counts of bucket lengths. In other words in
14191f110e0Safresh1 
14291f110e0Safresh1         ($keys, $buckets, $used, @length_count)= hash::bucket_info(\%hash);
14391f110e0Safresh1 
14491f110e0Safresh1     $length_count[0] is the number of empty buckets, and $length_count[1]
14591f110e0Safresh1     is the number of buckets with only one key in it, $buckets - $length_count[0]
14691f110e0Safresh1     gives the number of used buckets, and @length_count-1 is the maximum
14791f110e0Safresh1     bucket depth.
14891f110e0Safresh1 
14991f110e0Safresh1     If the argument is not a hash ref, or if it is magical, then returns
15091f110e0Safresh1     nothing (the empty list).
15191f110e0Safresh1 
15291f110e0Safresh1     */
153b8851fccSafresh1     const HV * hv = NULL;
15491f110e0Safresh1     if (SvROK(rhv) && SvTYPE(SvRV(rhv))==SVt_PVHV && !SvMAGICAL(SvRV(rhv))) {
155b8851fccSafresh1         hv = (const HV *) SvRV(rhv);
156b8851fccSafresh1     } else if (!SvOK(rhv)) {
157b8851fccSafresh1         hv = PL_strtab;
158b8851fccSafresh1     }
159b8851fccSafresh1     if (hv) {
16091f110e0Safresh1         U32 max_bucket_index= HvMAX(hv);
16191f110e0Safresh1         U32 total_keys= HvUSEDKEYS(hv);
16291f110e0Safresh1         HE **bucket_array= HvARRAY(hv);
16391f110e0Safresh1         mXPUSHi(total_keys);
16491f110e0Safresh1         mXPUSHi(max_bucket_index+1);
16591f110e0Safresh1         mXPUSHi(0); /* for the number of used buckets */
16691f110e0Safresh1 #define BUCKET_INFO_ITEMS_ON_STACK 3
16791f110e0Safresh1         if (!bucket_array) {
16891f110e0Safresh1             XSRETURN(BUCKET_INFO_ITEMS_ON_STACK);
16991f110e0Safresh1         } else {
17091f110e0Safresh1             /* we use chain_length to index the stack - we eliminate an add
17191f110e0Safresh1              * by initializing things with the number of items already on the stack.
17291f110e0Safresh1              * If we have 2 items then ST(2+0) (the third stack item) will be the counter
17391f110e0Safresh1              * for empty chains, ST(2+1) will be for chains with one element,  etc.
17491f110e0Safresh1              */
17591f110e0Safresh1             I32 max_chain_length= BUCKET_INFO_ITEMS_ON_STACK - 1; /* so we do not have to do an extra push for the 0 index */
17691f110e0Safresh1             HE *he;
17791f110e0Safresh1             U32 bucket_index;
17891f110e0Safresh1             for ( bucket_index= 0; bucket_index <= max_bucket_index; bucket_index++ ) {
17991f110e0Safresh1                 I32 chain_length= BUCKET_INFO_ITEMS_ON_STACK;
18091f110e0Safresh1                 for (he= bucket_array[bucket_index]; he; he= HeNEXT(he) ) {
18191f110e0Safresh1                     chain_length++;
18291f110e0Safresh1                 }
18391f110e0Safresh1                 while ( max_chain_length < chain_length ) {
18491f110e0Safresh1                     mXPUSHi(0);
18591f110e0Safresh1                     max_chain_length++;
18691f110e0Safresh1                 }
18791f110e0Safresh1                 SvIVX( ST( chain_length ) )++;
18891f110e0Safresh1             }
18991f110e0Safresh1             /* now set the number of used buckets */
19091f110e0Safresh1             SvIVX( ST( BUCKET_INFO_ITEMS_ON_STACK - 1 ) ) = max_bucket_index - SvIVX( ST( BUCKET_INFO_ITEMS_ON_STACK ) ) + 1;
19191f110e0Safresh1             XSRETURN( max_chain_length + 1 ); /* max_chain_length is the index of the last item on the stack, so we add 1 */
19291f110e0Safresh1         }
19391f110e0Safresh1 #undef BUCKET_INFO_ITEMS_ON_STACK
19491f110e0Safresh1     }
19591f110e0Safresh1     XSRETURN(0);
19691f110e0Safresh1 }
19791f110e0Safresh1 
19891f110e0Safresh1 void
bucket_array(rhv)19991f110e0Safresh1 bucket_array(rhv)
20091f110e0Safresh1         SV* rhv
20191f110e0Safresh1     PPCODE:
20291f110e0Safresh1 {
20391f110e0Safresh1     /* Returns an array of arrays representing key/bucket mappings.
20491f110e0Safresh1      * Each element of the array contains either an integer or a reference
20591f110e0Safresh1      * to an array of keys. A plain integer represents K empty buckets. An
20691f110e0Safresh1      * array ref represents a single bucket, with each element being a key in
20791f110e0Safresh1      * the hash. (Note this treats a placeholder as a normal key.)
20891f110e0Safresh1      *
20991f110e0Safresh1      * This allows one to "see" the keyorder. Note the "insert first" nature
21091f110e0Safresh1      * of the hash store, combined with regular remappings means that relative
21191f110e0Safresh1      * order of keys changes each remap.
21291f110e0Safresh1      */
213b8851fccSafresh1     const HV * hv = NULL;
21491f110e0Safresh1     if (SvROK(rhv) && SvTYPE(SvRV(rhv))==SVt_PVHV && !SvMAGICAL(SvRV(rhv))) {
215b8851fccSafresh1         hv = (const HV *) SvRV(rhv);
216b8851fccSafresh1     } else if (!SvOK(rhv)) {
217b8851fccSafresh1         hv = PL_strtab;
218b8851fccSafresh1     }
219b8851fccSafresh1     if (hv) {
22091f110e0Safresh1         HE **he_ptr= HvARRAY(hv);
22191f110e0Safresh1         if (!he_ptr) {
22291f110e0Safresh1             XSRETURN(0);
22391f110e0Safresh1         } else {
22491f110e0Safresh1             U32 i, max;
22591f110e0Safresh1             AV *info_av;
22691f110e0Safresh1             HE *he;
22791f110e0Safresh1             I32 empty_count=0;
22891f110e0Safresh1             if (SvMAGICAL(hv)) {
22991f110e0Safresh1                 Perl_croak(aTHX_ "hash::bucket_array only works on 'normal' hashes");
23091f110e0Safresh1             }
23191f110e0Safresh1             info_av= newAV();
23291f110e0Safresh1             max= HvMAX(hv);
23391f110e0Safresh1             mXPUSHs(newRV_noinc((SV*)info_av));
23491f110e0Safresh1             for ( i= 0; i <= max; i++ ) {
23591f110e0Safresh1                 AV *key_av= NULL;
23691f110e0Safresh1                 for (he= he_ptr[i]; he; he= HeNEXT(he) ) {
23791f110e0Safresh1                     SV *key_sv;
23891f110e0Safresh1                     char *str;
23991f110e0Safresh1                     STRLEN len;
24091f110e0Safresh1                     char mode;
24191f110e0Safresh1                     if (!key_av) {
24291f110e0Safresh1                         key_av= newAV();
24391f110e0Safresh1                         if (empty_count) {
24491f110e0Safresh1                             av_push(info_av, newSViv(empty_count));
24591f110e0Safresh1                             empty_count= 0;
24691f110e0Safresh1                         }
24791f110e0Safresh1                         av_push(info_av, (SV *)newRV_noinc((SV *)key_av));
24891f110e0Safresh1                     }
24991f110e0Safresh1                     if (HeKLEN(he) == HEf_SVKEY) {
25091f110e0Safresh1                         SV *sv= HeSVKEY(he);
25191f110e0Safresh1                         SvGETMAGIC(sv);
25291f110e0Safresh1                         str= SvPV(sv, len);
25391f110e0Safresh1                         mode= SvUTF8(sv) ? 1 : 0;
25491f110e0Safresh1                     } else {
25591f110e0Safresh1                         str= HeKEY(he);
25691f110e0Safresh1                         len= HeKLEN(he);
25791f110e0Safresh1                         mode= HeKUTF8(he) ? 1 : 0;
25891f110e0Safresh1                     }
25991f110e0Safresh1                     key_sv= newSVpvn(str,len);
26091f110e0Safresh1                     av_push(key_av,key_sv);
26191f110e0Safresh1                     if (mode) {
26291f110e0Safresh1                         SvUTF8_on(key_sv);
26391f110e0Safresh1                     }
26491f110e0Safresh1                 }
26591f110e0Safresh1                 if (!key_av)
26691f110e0Safresh1                     empty_count++;
26791f110e0Safresh1             }
26891f110e0Safresh1             if (empty_count) {
26991f110e0Safresh1                 av_push(info_av, newSViv(empty_count));
27091f110e0Safresh1                 empty_count++;
27191f110e0Safresh1             }
27291f110e0Safresh1         }
27391f110e0Safresh1         XSRETURN(1);
27491f110e0Safresh1     }
27591f110e0Safresh1     XSRETURN(0);
27691f110e0Safresh1 }
2779f11ffb7Safresh1 
2789f11ffb7Safresh1 void
2799f11ffb7Safresh1 bucket_ratio(rhv)
2809f11ffb7Safresh1         SV* rhv
2819f11ffb7Safresh1     PROTOTYPE: \%
2829f11ffb7Safresh1     PPCODE:
2839f11ffb7Safresh1 {
2849f11ffb7Safresh1     if (SvROK(rhv)) {
2859f11ffb7Safresh1         rhv= SvRV(rhv);
2869f11ffb7Safresh1         if ( SvTYPE(rhv)==SVt_PVHV ) {
287eac174f2Safresh1 #if PERL_VERSION_LT(5,25,0)
2889f11ffb7Safresh1             SV *ret= Perl_hv_scalar(aTHX_ (HV*)rhv);
2899f11ffb7Safresh1 #else
2909f11ffb7Safresh1             SV *ret= Perl_hv_bucket_ratio(aTHX_ (HV*)rhv);
2919f11ffb7Safresh1 #endif
2929f11ffb7Safresh1             ST(0)= ret;
2939f11ffb7Safresh1             XSRETURN(1);
2949f11ffb7Safresh1         }
2959f11ffb7Safresh1     }
2969f11ffb7Safresh1     XSRETURN_UNDEF;
2979f11ffb7Safresh1 }
2989f11ffb7Safresh1 
2999f11ffb7Safresh1 void
3009f11ffb7Safresh1 num_buckets(rhv)
3019f11ffb7Safresh1         SV* rhv
3029f11ffb7Safresh1     PROTOTYPE: \%
3039f11ffb7Safresh1     PPCODE:
3049f11ffb7Safresh1 {
3059f11ffb7Safresh1     if (SvROK(rhv)) {
3069f11ffb7Safresh1         rhv= SvRV(rhv);
3079f11ffb7Safresh1         if ( SvTYPE(rhv)==SVt_PVHV ) {
3089f11ffb7Safresh1             XSRETURN_UV(HvMAX((HV*)rhv)+1);
3099f11ffb7Safresh1         }
3109f11ffb7Safresh1     }
3119f11ffb7Safresh1     XSRETURN_UNDEF;
3129f11ffb7Safresh1 }
3139f11ffb7Safresh1 
3149f11ffb7Safresh1 void
3159f11ffb7Safresh1 used_buckets(rhv)
3169f11ffb7Safresh1         SV* rhv
3179f11ffb7Safresh1     PROTOTYPE: \%
3189f11ffb7Safresh1     PPCODE:
3199f11ffb7Safresh1 {
3209f11ffb7Safresh1     if (SvROK(rhv)) {
3219f11ffb7Safresh1         rhv= SvRV(rhv);
3229f11ffb7Safresh1         if ( SvTYPE(rhv)==SVt_PVHV ) {
3239f11ffb7Safresh1             XSRETURN_UV(HvFILL((HV*)rhv));
3249f11ffb7Safresh1         }
3259f11ffb7Safresh1     }
3269f11ffb7Safresh1     XSRETURN_UNDEF;
3279f11ffb7Safresh1 }
3289f11ffb7Safresh1 
329