1package Hash::Util;
2
3require 5.007003;
4use strict;
5use Carp;
6use warnings;
7no warnings 'uninitialized';
8use warnings::register;
9use Scalar::Util qw(reftype);
10
11require Exporter;
12our @ISA        = qw(Exporter);
13our @EXPORT_OK  = qw(
14                     fieldhash fieldhashes
15
16                     all_keys
17                     lock_keys unlock_keys
18                     lock_value unlock_value
19                     lock_hash unlock_hash
20                     lock_keys_plus
21                     hash_locked hash_unlocked
22                     hashref_locked hashref_unlocked
23                     hidden_keys legal_keys
24
25                     lock_ref_keys unlock_ref_keys
26                     lock_ref_value unlock_ref_value
27                     lock_hashref unlock_hashref
28                     lock_ref_keys_plus
29                     hidden_ref_keys legal_ref_keys
30
31                     hash_seed hash_value hv_store
32                     bucket_stats bucket_info bucket_array
33                     lock_hash_recurse unlock_hash_recurse
34
35                     hash_traversal_mask
36                    );
37our $VERSION = '0.16';
38require XSLoader;
39XSLoader::load();
40
41sub import {
42    my $class = shift;
43    if ( grep /fieldhash/, @_ ) {
44        require Hash::Util::FieldHash;
45        Hash::Util::FieldHash->import(':all'); # for re-export
46    }
47    unshift @_, $class;
48    goto &Exporter::import;
49}
50
51
52=head1 NAME
53
54Hash::Util - A selection of general-utility hash subroutines
55
56=head1 SYNOPSIS
57
58  # Restricted hashes
59
60  use Hash::Util qw(
61                     fieldhash fieldhashes
62
63                     all_keys
64                     lock_keys unlock_keys
65                     lock_value unlock_value
66                     lock_hash unlock_hash
67                     lock_keys_plus
68                     hash_locked hash_unlocked
69                     hashref_locked hashref_unlocked
70                     hidden_keys legal_keys
71
72                     lock_ref_keys unlock_ref_keys
73                     lock_ref_value unlock_ref_value
74                     lock_hashref unlock_hashref
75                     lock_ref_keys_plus
76                     hidden_ref_keys legal_ref_keys
77
78                     hash_seed hash_value hv_store
79                     bucket_stats bucket_info bucket_array
80                     lock_hash_recurse unlock_hash_recurse
81
82                     hash_traversal_mask
83                   );
84
85  %hash = (foo => 42, bar => 23);
86  # Ways to restrict a hash
87  lock_keys(%hash);
88  lock_keys(%hash, @keyset);
89  lock_keys_plus(%hash, @additional_keys);
90
91  # Ways to inspect the properties of a restricted hash
92  my @legal = legal_keys(%hash);
93  my @hidden = hidden_keys(%hash);
94  my $ref = all_keys(%hash,@keys,@hidden);
95  my $is_locked = hash_locked(%hash);
96
97  # Remove restrictions on the hash
98  unlock_keys(%hash);
99
100  # Lock individual values in a hash
101  lock_value  (%hash, 'foo');
102  unlock_value(%hash, 'foo');
103
104  # Ways to change the restrictions on both keys and values
105  lock_hash  (%hash);
106  unlock_hash(%hash);
107
108  my $hashes_are_randomised = hash_seed() != 0;
109
110  my $int_hash_value = hash_value( 'string' );
111
112  my $mask= hash_traversal_mask(%hash);
113
114  hash_traversal_mask(%hash,1234);
115
116=head1 DESCRIPTION
117
118C<Hash::Util> and C<Hash::Util::FieldHash> contain special functions
119for manipulating hashes that don't really warrant a keyword.
120
121C<Hash::Util> contains a set of functions that support
122L<restricted hashes|/"Restricted hashes">. These are described in
123this document.  C<Hash::Util::FieldHash> contains an (unrelated)
124set of functions that support the use of hashes in
125I<inside-out classes>, described in L<Hash::Util::FieldHash>.
126
127By default C<Hash::Util> does not export anything.
128
129=head2 Restricted hashes
130
1315.8.0 introduces the ability to restrict a hash to a certain set of
132keys.  No keys outside of this set can be added.  It also introduces
133the ability to lock an individual key so it cannot be deleted and the
134ability to ensure that an individual value cannot be changed.
135
136This is intended to largely replace the deprecated pseudo-hashes.
137
138=over 4
139
140=item B<lock_keys>
141
142=item B<unlock_keys>
143
144  lock_keys(%hash);
145  lock_keys(%hash, @keys);
146
147Restricts the given %hash's set of keys to @keys.  If @keys is not
148given it restricts it to its current keyset.  No more keys can be
149added. delete() and exists() will still work, but will not alter
150the set of allowed keys. B<Note>: the current implementation prevents
151the hash from being bless()ed while it is in a locked state. Any attempt
152to do so will raise an exception. Of course you can still bless()
153the hash before you call lock_keys() so this shouldn't be a problem.
154
155  unlock_keys(%hash);
156
157Removes the restriction on the %hash's keyset.
158
159B<Note> that if any of the values of the hash have been locked they will not
160be unlocked after this sub executes.
161
162Both routines return a reference to the hash operated on.
163
164=cut
165
166sub lock_ref_keys {
167    my($hash, @keys) = @_;
168
169    Internals::hv_clear_placeholders %$hash;
170    if( @keys ) {
171        my %keys = map { ($_ => 1) } @keys;
172        my %original_keys = map { ($_ => 1) } keys %$hash;
173        foreach my $k (keys %original_keys) {
174            croak "Hash has key '$k' which is not in the new key set"
175              unless $keys{$k};
176        }
177
178        foreach my $k (@keys) {
179            $hash->{$k} = undef unless exists $hash->{$k};
180        }
181        Internals::SvREADONLY %$hash, 1;
182
183        foreach my $k (@keys) {
184            delete $hash->{$k} unless $original_keys{$k};
185        }
186    }
187    else {
188        Internals::SvREADONLY %$hash, 1;
189    }
190
191    return $hash;
192}
193
194sub unlock_ref_keys {
195    my $hash = shift;
196
197    Internals::SvREADONLY %$hash, 0;
198    return $hash;
199}
200
201sub   lock_keys (\%;@) {   lock_ref_keys(@_) }
202sub unlock_keys (\%)   { unlock_ref_keys(@_) }
203
204=item B<lock_keys_plus>
205
206  lock_keys_plus(%hash,@additional_keys)
207
208Similar to C<lock_keys()>, with the difference being that the optional key list
209specifies keys that may or may not be already in the hash. Essentially this is
210an easier way to say
211
212  lock_keys(%hash,@additional_keys,keys %hash);
213
214Returns a reference to %hash
215
216=cut
217
218
219sub lock_ref_keys_plus {
220    my ($hash,@keys) = @_;
221    my @delete;
222    Internals::hv_clear_placeholders(%$hash);
223    foreach my $key (@keys) {
224        unless (exists($hash->{$key})) {
225            $hash->{$key}=undef;
226            push @delete,$key;
227        }
228    }
229    Internals::SvREADONLY(%$hash,1);
230    delete @{$hash}{@delete};
231    return $hash
232}
233
234sub lock_keys_plus(\%;@) { lock_ref_keys_plus(@_) }
235
236
237=item B<lock_value>
238
239=item B<unlock_value>
240
241  lock_value  (%hash, $key);
242  unlock_value(%hash, $key);
243
244Locks and unlocks the value for an individual key of a hash.  The value of a
245locked key cannot be changed.
246
247Unless %hash has already been locked the key/value could be deleted
248regardless of this setting.
249
250Returns a reference to the %hash.
251
252=cut
253
254sub lock_ref_value {
255    my($hash, $key) = @_;
256    # I'm doubtful about this warning, as it seems not to be true.
257    # Marking a value in the hash as RO is useful, regardless
258    # of the status of the hash itself.
259    carp "Cannot usefully lock values in an unlocked hash"
260      if !Internals::SvREADONLY(%$hash) && warnings::enabled;
261    Internals::SvREADONLY $hash->{$key}, 1;
262    return $hash
263}
264
265sub unlock_ref_value {
266    my($hash, $key) = @_;
267    Internals::SvREADONLY $hash->{$key}, 0;
268    return $hash
269}
270
271sub   lock_value (\%$) {   lock_ref_value(@_) }
272sub unlock_value (\%$) { unlock_ref_value(@_) }
273
274
275=item B<lock_hash>
276
277=item B<unlock_hash>
278
279    lock_hash(%hash);
280
281lock_hash() locks an entire hash, making all keys and values read-only.
282No value can be changed, no keys can be added or deleted.
283
284    unlock_hash(%hash);
285
286unlock_hash() does the opposite of lock_hash().  All keys and values
287are made writable.  All values can be changed and keys can be added
288and deleted.
289
290Returns a reference to the %hash.
291
292=cut
293
294sub lock_hashref {
295    my $hash = shift;
296
297    lock_ref_keys($hash);
298
299    foreach my $value (values %$hash) {
300        Internals::SvREADONLY($value,1);
301    }
302
303    return $hash;
304}
305
306sub unlock_hashref {
307    my $hash = shift;
308
309    foreach my $value (values %$hash) {
310        Internals::SvREADONLY($value, 0);
311    }
312
313    unlock_ref_keys($hash);
314
315    return $hash;
316}
317
318sub   lock_hash (\%) {   lock_hashref(@_) }
319sub unlock_hash (\%) { unlock_hashref(@_) }
320
321=item B<lock_hash_recurse>
322
323=item B<unlock_hash_recurse>
324
325    lock_hash_recurse(%hash);
326
327lock_hash() locks an entire hash and any hashes it references recursively,
328making all keys and values read-only. No value can be changed, no keys can
329be added or deleted.
330
331This method B<only> recurses into hashes that are referenced by another hash.
332Thus a Hash of Hashes (HoH) will all be restricted, but a Hash of Arrays of
333Hashes (HoAoH) will only have the top hash restricted.
334
335    unlock_hash_recurse(%hash);
336
337unlock_hash_recurse() does the opposite of lock_hash_recurse().  All keys and
338values are made writable.  All values can be changed and keys can be added
339and deleted. Identical recursion restrictions apply as to lock_hash_recurse().
340
341Returns a reference to the %hash.
342
343=cut
344
345sub lock_hashref_recurse {
346    my $hash = shift;
347
348    lock_ref_keys($hash);
349    foreach my $value (values %$hash) {
350        my $type = reftype($value);
351        if (defined($type) and $type eq 'HASH') {
352            lock_hashref_recurse($value);
353        }
354        Internals::SvREADONLY($value,1);
355    }
356    return $hash
357}
358
359sub unlock_hashref_recurse {
360    my $hash = shift;
361
362    foreach my $value (values %$hash) {
363        my $type = reftype($value);
364        if (defined($type) and $type eq 'HASH') {
365            unlock_hashref_recurse($value);
366        }
367        Internals::SvREADONLY($value,1);
368    }
369    unlock_ref_keys($hash);
370    return $hash;
371}
372
373sub   lock_hash_recurse (\%) {   lock_hashref_recurse(@_) }
374sub unlock_hash_recurse (\%) { unlock_hashref_recurse(@_) }
375
376=item B<hashref_locked>
377
378=item B<hash_locked>
379
380  hashref_locked(\%hash) and print "Hash is locked!\n";
381  hash_locked(%hash) and print "Hash is locked!\n";
382
383Returns true if the hash and its keys are locked.
384
385=cut
386
387sub hashref_locked {
388    my $hash=shift;
389    Internals::SvREADONLY(%$hash);
390}
391
392sub hash_locked(\%) { hashref_locked(@_) }
393
394=item B<hashref_unlocked>
395
396=item B<hash_unlocked>
397
398  hashref_unlocked(\%hash) and print "Hash is unlocked!\n";
399  hash_unlocked(%hash) and print "Hash is unlocked!\n";
400
401Returns true if the hash and its keys are unlocked.
402
403=cut
404
405sub hashref_unlocked {
406    my $hash=shift;
407    !Internals::SvREADONLY(%$hash);
408}
409
410sub hash_unlocked(\%) { hashref_unlocked(@_) }
411
412=for demerphqs_editor
413sub legal_ref_keys{}
414sub hidden_ref_keys{}
415sub all_keys{}
416
417=cut
418
419sub legal_keys(\%) { legal_ref_keys(@_)  }
420sub hidden_keys(\%){ hidden_ref_keys(@_) }
421
422=item B<legal_keys>
423
424  my @keys = legal_keys(%hash);
425
426Returns the list of the keys that are legal in a restricted hash.
427In the case of an unrestricted hash this is identical to calling
428keys(%hash).
429
430=item B<hidden_keys>
431
432  my @keys = hidden_keys(%hash);
433
434Returns the list of the keys that are legal in a restricted hash but
435do not have a value associated to them. Thus if 'foo' is a
436"hidden" key of the %hash it will return false for both C<defined>
437and C<exists> tests.
438
439In the case of an unrestricted hash this will return an empty list.
440
441B<NOTE> this is an experimental feature that is heavily dependent
442on the current implementation of restricted hashes. Should the
443implementation change, this routine may become meaningless, in which
444case it will return an empty list.
445
446=item B<all_keys>
447
448  all_keys(%hash,@keys,@hidden);
449
450Populates the arrays @keys with the all the keys that would pass
451an C<exists> tests, and populates @hidden with the remaining legal
452keys that have not been utilized.
453
454Returns a reference to the hash.
455
456In the case of an unrestricted hash this will be equivalent to
457
458  $ref = do {
459      @keys = keys %hash;
460      @hidden = ();
461      \%hash
462  };
463
464B<NOTE> this is an experimental feature that is heavily dependent
465on the current implementation of restricted hashes. Should the
466implementation change this routine may become meaningless in which
467case it will behave identically to how it would behave on an
468unrestricted hash.
469
470=item B<hash_seed>
471
472    my $hash_seed = hash_seed();
473
474hash_seed() returns the seed bytes used to randomise hash ordering.
475
476B<Note that the hash seed is sensitive information>: by knowing it one
477can craft a denial-of-service attack against Perl code, even remotely,
478see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
479B<Do not disclose the hash seed> to people who don't need to know it.
480See also L<perlrun/PERL_HASH_SEED_DEBUG>.
481
482Prior to Perl 5.17.6 this function returned a UV, it now returns a string,
483which may be of nearly any size as determined by the hash function your
484Perl has been built with. Possible sizes may be but are not limited to
4854 bytes (for most hash algorithms) and 16 bytes (for siphash).
486
487=item B<hash_value>
488
489    my $hash_value = hash_value($string);
490
491hash_value() returns the current perl's internal hash value for a given
492string.
493
494Returns a 32 bit integer representing the hash value of the string passed
495in. This value is only reliable for the lifetime of the process. It may
496be different depending on invocation, environment variables,  perl version,
497architectures, and build options.
498
499B<Note that the hash value of a given string is sensitive information>:
500by knowing it one can deduce the hash seed which in turn can allow one to
501craft a denial-of-service attack against Perl code, even remotely,
502see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
503B<Do not disclose the hash value of a string> to people who don't need to
504know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>.
505
506=item B<bucket_info>
507
508Return a set of basic information about a hash.
509
510    my ($keys, $buckets, $used, @length_counts)= bucket_info($hash);
511
512Fields are as follows:
513
514    0: Number of keys in the hash
515    1: Number of buckets in the hash
516    2: Number of used buckets in the hash
517    rest : list of counts, Kth element is the number of buckets
518           with K keys in it.
519
520See also bucket_stats() and bucket_array().
521
522=item B<bucket_stats>
523
524Returns a list of statistics about a hash.
525
526    my ($keys, buckets, $used, $utilization_ratio, $collision_pct,
527        $mean, $stddev, @length_counts) = bucket_info($hashref);
528
529
530Fields are as follows:
531
532
533    0: Number of keys in the hash
534    1: Number of buckets in the hash
535    2: Number of used buckets in the hash
536    3: Hash Quality Score
537    4: Percent of buckets used
538    5: Percent of keys which are in collision
539    6: Average bucket length
540    7: Standard Deviation of bucket lengths.
541    rest : list of counts, Kth element is the number of buckets
542           with K keys in it.
543
544See also bucket_info() and bucket_array().
545
546Note that Hash Quality Score would be 1 for an ideal hash, numbers
547close to and below 1 indicate good hashing, and number significantly
548above indicate a poor score. In practice it should be around 0.95 to 1.05.
549It is defined as:
550
551 $score= sum( $count[$length] * ($length * ($length + 1) / 2) )
552            /
553            ( ( $keys / 2 * $buckets ) *
554              ( $keys + ( 2 * $buckets ) - 1 ) )
555
556The formula is from the Red Dragon book (reformulated to use the data available)
557and is documented at L<http://www.strchr.com/hash_functions>
558
559=item B<bucket_array>
560
561    my $array= bucket_array(\%hash);
562
563Returns a packed representation of the bucket array associated with a hash. Each element
564of the array is either an integer K, in which case it represents K empty buckets, or
565a reference to another array which contains the keys that are in that bucket.
566
567B<Note that the information returned by bucket_array is sensitive information>:
568by knowing it one can directly attack perl's hash function which in turn may allow
569one to craft a denial-of-service attack against Perl code, even remotely,
570see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
571B<Do not disclose the output of this function> to people who don't need to
572know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. This function is provided strictly
573for  debugging and diagnostics purposes only, it is hard to imagine a reason why it
574would be used in production code.
575
576=cut
577
578
579sub bucket_stats {
580    my ($hash) = @_;
581    my ($keys, $buckets, $used, @length_counts) = bucket_info($hash);
582    my $sum;
583    my $score;
584    for (0 .. $#length_counts) {
585        $sum += ($length_counts[$_] * $_);
586        $score += $length_counts[$_] * ( $_ * ($_ + 1 ) / 2 );
587    }
588    $score = $score /
589             (( $keys / (2 * $buckets )) * ( $keys + ( 2 * $buckets ) - 1 ))
590                 if $keys;
591    my $mean= $sum/$buckets;
592    $sum= 0;
593    $sum += ($length_counts[$_] * (($_-$mean)**2)) for 0 .. $#length_counts;
594
595    my $stddev= sqrt($sum/$buckets);
596    return $keys, $buckets, $used, $keys ? ($score, $used/$buckets, ($keys-$used)/$keys, $mean, $stddev, @length_counts) : ();
597}
598
599=item B<hv_store>
600
601  my $sv = 0;
602  hv_store(%hash,$key,$sv) or die "Failed to alias!";
603  $hash{$key} = 1;
604  print $sv; # prints 1
605
606Stores an alias to a variable in a hash instead of copying the value.
607
608=item B<hash_traversal_mask>
609
610As of Perl 5.18 every hash has its own hash traversal order, and this order
611changes every time a new element is inserted into the hash. This functionality
612is provided by maintaining an unsigned integer mask (U32) which is xor'ed
613with the actual bucket id during a traversal of the hash buckets using keys(),
614values() or each().
615
616You can use this subroutine to get and set the traversal mask for a specific
617hash. Setting the mask ensures that a given hash will produce the same key
618order. B<Note> that this does B<not> guarantee that B<two> hashes will produce
619the same key order for the same hash seed and traversal mask, items that
620collide into one bucket may have different orders regardless of this setting.
621
622=back
623
624=head2 Operating on references to hashes.
625
626Most subroutines documented in this module have equivalent versions
627that operate on references to hashes instead of native hashes.
628The following is a list of these subs. They are identical except
629in name and in that instead of taking a %hash they take a $hashref,
630and additionally are not prototyped.
631
632=over 4
633
634=item lock_ref_keys
635
636=item unlock_ref_keys
637
638=item lock_ref_keys_plus
639
640=item lock_ref_value
641
642=item unlock_ref_value
643
644=item lock_hashref
645
646=item unlock_hashref
647
648=item lock_hashref_recurse
649
650=item unlock_hashref_recurse
651
652=item hash_ref_unlocked
653
654=item legal_ref_keys
655
656=item hidden_ref_keys
657
658=back
659
660=head1 CAVEATS
661
662Note that the trapping of the restricted operations is not atomic:
663for example
664
665    eval { %hash = (illegal_key => 1) }
666
667leaves the C<%hash> empty rather than with its original contents.
668
669=head1 BUGS
670
671The interface exposed by this module is very close to the current
672implementation of restricted hashes. Over time it is expected that
673this behavior will be extended and the interface abstracted further.
674
675=head1 AUTHOR
676
677Michael G Schwern <schwern@pobox.com> on top of code by Nick
678Ing-Simmons and Jeffrey Friedl.
679
680hv_store() is from Array::RefElem, Copyright 2000 Gisle Aas.
681
682Additional code by Yves Orton.
683
684=head1 SEE ALSO
685
686L<Scalar::Util>, L<List::Util> and L<perlsec/"Algorithmic Complexity Attacks">.
687
688L<Hash::Util::FieldHash>.
689
690=cut
691
6921;
693