1package Hash::Util; 2 3require 5.007003; 4use strict; 5use Carp; 6use warnings; 7no warnings 'uninitialized'; 8use warnings::register; 9use Scalar::Util qw(reftype); 10 11require Exporter; 12our @ISA = qw(Exporter); 13our @EXPORT_OK = qw( 14 fieldhash fieldhashes 15 16 all_keys 17 lock_keys unlock_keys 18 lock_value unlock_value 19 lock_hash unlock_hash 20 lock_keys_plus 21 hash_locked hash_unlocked 22 hashref_locked hashref_unlocked 23 hidden_keys legal_keys 24 25 lock_ref_keys unlock_ref_keys 26 lock_ref_value unlock_ref_value 27 lock_hashref unlock_hashref 28 lock_ref_keys_plus 29 hidden_ref_keys legal_ref_keys 30 31 hash_seed hash_value hv_store 32 bucket_stats bucket_info bucket_array 33 lock_hash_recurse unlock_hash_recurse 34 35 hash_traversal_mask 36 ); 37our $VERSION = '0.16'; 38require XSLoader; 39XSLoader::load(); 40 41sub import { 42 my $class = shift; 43 if ( grep /fieldhash/, @_ ) { 44 require Hash::Util::FieldHash; 45 Hash::Util::FieldHash->import(':all'); # for re-export 46 } 47 unshift @_, $class; 48 goto &Exporter::import; 49} 50 51 52=head1 NAME 53 54Hash::Util - A selection of general-utility hash subroutines 55 56=head1 SYNOPSIS 57 58 # Restricted hashes 59 60 use Hash::Util qw( 61 fieldhash fieldhashes 62 63 all_keys 64 lock_keys unlock_keys 65 lock_value unlock_value 66 lock_hash unlock_hash 67 lock_keys_plus 68 hash_locked hash_unlocked 69 hashref_locked hashref_unlocked 70 hidden_keys legal_keys 71 72 lock_ref_keys unlock_ref_keys 73 lock_ref_value unlock_ref_value 74 lock_hashref unlock_hashref 75 lock_ref_keys_plus 76 hidden_ref_keys legal_ref_keys 77 78 hash_seed hash_value hv_store 79 bucket_stats bucket_info bucket_array 80 lock_hash_recurse unlock_hash_recurse 81 82 hash_traversal_mask 83 ); 84 85 %hash = (foo => 42, bar => 23); 86 # Ways to restrict a hash 87 lock_keys(%hash); 88 lock_keys(%hash, @keyset); 89 lock_keys_plus(%hash, @additional_keys); 90 91 # Ways to inspect the properties of a restricted hash 92 my @legal = legal_keys(%hash); 93 my @hidden = hidden_keys(%hash); 94 my $ref = all_keys(%hash,@keys,@hidden); 95 my $is_locked = hash_locked(%hash); 96 97 # Remove restrictions on the hash 98 unlock_keys(%hash); 99 100 # Lock individual values in a hash 101 lock_value (%hash, 'foo'); 102 unlock_value(%hash, 'foo'); 103 104 # Ways to change the restrictions on both keys and values 105 lock_hash (%hash); 106 unlock_hash(%hash); 107 108 my $hashes_are_randomised = hash_seed() != 0; 109 110 my $int_hash_value = hash_value( 'string' ); 111 112 my $mask= hash_traversal_mask(%hash); 113 114 hash_traversal_mask(%hash,1234); 115 116=head1 DESCRIPTION 117 118C<Hash::Util> and C<Hash::Util::FieldHash> contain special functions 119for manipulating hashes that don't really warrant a keyword. 120 121C<Hash::Util> contains a set of functions that support 122L<restricted hashes|/"Restricted hashes">. These are described in 123this document. C<Hash::Util::FieldHash> contains an (unrelated) 124set of functions that support the use of hashes in 125I<inside-out classes>, described in L<Hash::Util::FieldHash>. 126 127By default C<Hash::Util> does not export anything. 128 129=head2 Restricted hashes 130 1315.8.0 introduces the ability to restrict a hash to a certain set of 132keys. No keys outside of this set can be added. It also introduces 133the ability to lock an individual key so it cannot be deleted and the 134ability to ensure that an individual value cannot be changed. 135 136This is intended to largely replace the deprecated pseudo-hashes. 137 138=over 4 139 140=item B<lock_keys> 141 142=item B<unlock_keys> 143 144 lock_keys(%hash); 145 lock_keys(%hash, @keys); 146 147Restricts the given %hash's set of keys to @keys. If @keys is not 148given it restricts it to its current keyset. No more keys can be 149added. delete() and exists() will still work, but will not alter 150the set of allowed keys. B<Note>: the current implementation prevents 151the hash from being bless()ed while it is in a locked state. Any attempt 152to do so will raise an exception. Of course you can still bless() 153the hash before you call lock_keys() so this shouldn't be a problem. 154 155 unlock_keys(%hash); 156 157Removes the restriction on the %hash's keyset. 158 159B<Note> that if any of the values of the hash have been locked they will not 160be unlocked after this sub executes. 161 162Both routines return a reference to the hash operated on. 163 164=cut 165 166sub lock_ref_keys { 167 my($hash, @keys) = @_; 168 169 Internals::hv_clear_placeholders %$hash; 170 if( @keys ) { 171 my %keys = map { ($_ => 1) } @keys; 172 my %original_keys = map { ($_ => 1) } keys %$hash; 173 foreach my $k (keys %original_keys) { 174 croak "Hash has key '$k' which is not in the new key set" 175 unless $keys{$k}; 176 } 177 178 foreach my $k (@keys) { 179 $hash->{$k} = undef unless exists $hash->{$k}; 180 } 181 Internals::SvREADONLY %$hash, 1; 182 183 foreach my $k (@keys) { 184 delete $hash->{$k} unless $original_keys{$k}; 185 } 186 } 187 else { 188 Internals::SvREADONLY %$hash, 1; 189 } 190 191 return $hash; 192} 193 194sub unlock_ref_keys { 195 my $hash = shift; 196 197 Internals::SvREADONLY %$hash, 0; 198 return $hash; 199} 200 201sub lock_keys (\%;@) { lock_ref_keys(@_) } 202sub unlock_keys (\%) { unlock_ref_keys(@_) } 203 204=item B<lock_keys_plus> 205 206 lock_keys_plus(%hash,@additional_keys) 207 208Similar to C<lock_keys()>, with the difference being that the optional key list 209specifies keys that may or may not be already in the hash. Essentially this is 210an easier way to say 211 212 lock_keys(%hash,@additional_keys,keys %hash); 213 214Returns a reference to %hash 215 216=cut 217 218 219sub lock_ref_keys_plus { 220 my ($hash,@keys) = @_; 221 my @delete; 222 Internals::hv_clear_placeholders(%$hash); 223 foreach my $key (@keys) { 224 unless (exists($hash->{$key})) { 225 $hash->{$key}=undef; 226 push @delete,$key; 227 } 228 } 229 Internals::SvREADONLY(%$hash,1); 230 delete @{$hash}{@delete}; 231 return $hash 232} 233 234sub lock_keys_plus(\%;@) { lock_ref_keys_plus(@_) } 235 236 237=item B<lock_value> 238 239=item B<unlock_value> 240 241 lock_value (%hash, $key); 242 unlock_value(%hash, $key); 243 244Locks and unlocks the value for an individual key of a hash. The value of a 245locked key cannot be changed. 246 247Unless %hash has already been locked the key/value could be deleted 248regardless of this setting. 249 250Returns a reference to the %hash. 251 252=cut 253 254sub lock_ref_value { 255 my($hash, $key) = @_; 256 # I'm doubtful about this warning, as it seems not to be true. 257 # Marking a value in the hash as RO is useful, regardless 258 # of the status of the hash itself. 259 carp "Cannot usefully lock values in an unlocked hash" 260 if !Internals::SvREADONLY(%$hash) && warnings::enabled; 261 Internals::SvREADONLY $hash->{$key}, 1; 262 return $hash 263} 264 265sub unlock_ref_value { 266 my($hash, $key) = @_; 267 Internals::SvREADONLY $hash->{$key}, 0; 268 return $hash 269} 270 271sub lock_value (\%$) { lock_ref_value(@_) } 272sub unlock_value (\%$) { unlock_ref_value(@_) } 273 274 275=item B<lock_hash> 276 277=item B<unlock_hash> 278 279 lock_hash(%hash); 280 281lock_hash() locks an entire hash, making all keys and values read-only. 282No value can be changed, no keys can be added or deleted. 283 284 unlock_hash(%hash); 285 286unlock_hash() does the opposite of lock_hash(). All keys and values 287are made writable. All values can be changed and keys can be added 288and deleted. 289 290Returns a reference to the %hash. 291 292=cut 293 294sub lock_hashref { 295 my $hash = shift; 296 297 lock_ref_keys($hash); 298 299 foreach my $value (values %$hash) { 300 Internals::SvREADONLY($value,1); 301 } 302 303 return $hash; 304} 305 306sub unlock_hashref { 307 my $hash = shift; 308 309 foreach my $value (values %$hash) { 310 Internals::SvREADONLY($value, 0); 311 } 312 313 unlock_ref_keys($hash); 314 315 return $hash; 316} 317 318sub lock_hash (\%) { lock_hashref(@_) } 319sub unlock_hash (\%) { unlock_hashref(@_) } 320 321=item B<lock_hash_recurse> 322 323=item B<unlock_hash_recurse> 324 325 lock_hash_recurse(%hash); 326 327lock_hash() locks an entire hash and any hashes it references recursively, 328making all keys and values read-only. No value can be changed, no keys can 329be added or deleted. 330 331This method B<only> recurses into hashes that are referenced by another hash. 332Thus a Hash of Hashes (HoH) will all be restricted, but a Hash of Arrays of 333Hashes (HoAoH) will only have the top hash restricted. 334 335 unlock_hash_recurse(%hash); 336 337unlock_hash_recurse() does the opposite of lock_hash_recurse(). All keys and 338values are made writable. All values can be changed and keys can be added 339and deleted. Identical recursion restrictions apply as to lock_hash_recurse(). 340 341Returns a reference to the %hash. 342 343=cut 344 345sub lock_hashref_recurse { 346 my $hash = shift; 347 348 lock_ref_keys($hash); 349 foreach my $value (values %$hash) { 350 my $type = reftype($value); 351 if (defined($type) and $type eq 'HASH') { 352 lock_hashref_recurse($value); 353 } 354 Internals::SvREADONLY($value,1); 355 } 356 return $hash 357} 358 359sub unlock_hashref_recurse { 360 my $hash = shift; 361 362 foreach my $value (values %$hash) { 363 my $type = reftype($value); 364 if (defined($type) and $type eq 'HASH') { 365 unlock_hashref_recurse($value); 366 } 367 Internals::SvREADONLY($value,1); 368 } 369 unlock_ref_keys($hash); 370 return $hash; 371} 372 373sub lock_hash_recurse (\%) { lock_hashref_recurse(@_) } 374sub unlock_hash_recurse (\%) { unlock_hashref_recurse(@_) } 375 376=item B<hashref_locked> 377 378=item B<hash_locked> 379 380 hashref_locked(\%hash) and print "Hash is locked!\n"; 381 hash_locked(%hash) and print "Hash is locked!\n"; 382 383Returns true if the hash and its keys are locked. 384 385=cut 386 387sub hashref_locked { 388 my $hash=shift; 389 Internals::SvREADONLY(%$hash); 390} 391 392sub hash_locked(\%) { hashref_locked(@_) } 393 394=item B<hashref_unlocked> 395 396=item B<hash_unlocked> 397 398 hashref_unlocked(\%hash) and print "Hash is unlocked!\n"; 399 hash_unlocked(%hash) and print "Hash is unlocked!\n"; 400 401Returns true if the hash and its keys are unlocked. 402 403=cut 404 405sub hashref_unlocked { 406 my $hash=shift; 407 !Internals::SvREADONLY(%$hash); 408} 409 410sub hash_unlocked(\%) { hashref_unlocked(@_) } 411 412=for demerphqs_editor 413sub legal_ref_keys{} 414sub hidden_ref_keys{} 415sub all_keys{} 416 417=cut 418 419sub legal_keys(\%) { legal_ref_keys(@_) } 420sub hidden_keys(\%){ hidden_ref_keys(@_) } 421 422=item B<legal_keys> 423 424 my @keys = legal_keys(%hash); 425 426Returns the list of the keys that are legal in a restricted hash. 427In the case of an unrestricted hash this is identical to calling 428keys(%hash). 429 430=item B<hidden_keys> 431 432 my @keys = hidden_keys(%hash); 433 434Returns the list of the keys that are legal in a restricted hash but 435do not have a value associated to them. Thus if 'foo' is a 436"hidden" key of the %hash it will return false for both C<defined> 437and C<exists> tests. 438 439In the case of an unrestricted hash this will return an empty list. 440 441B<NOTE> this is an experimental feature that is heavily dependent 442on the current implementation of restricted hashes. Should the 443implementation change, this routine may become meaningless, in which 444case it will return an empty list. 445 446=item B<all_keys> 447 448 all_keys(%hash,@keys,@hidden); 449 450Populates the arrays @keys with the all the keys that would pass 451an C<exists> tests, and populates @hidden with the remaining legal 452keys that have not been utilized. 453 454Returns a reference to the hash. 455 456In the case of an unrestricted hash this will be equivalent to 457 458 $ref = do { 459 @keys = keys %hash; 460 @hidden = (); 461 \%hash 462 }; 463 464B<NOTE> this is an experimental feature that is heavily dependent 465on the current implementation of restricted hashes. Should the 466implementation change this routine may become meaningless in which 467case it will behave identically to how it would behave on an 468unrestricted hash. 469 470=item B<hash_seed> 471 472 my $hash_seed = hash_seed(); 473 474hash_seed() returns the seed bytes used to randomise hash ordering. 475 476B<Note that the hash seed is sensitive information>: by knowing it one 477can craft a denial-of-service attack against Perl code, even remotely, 478see L<perlsec/"Algorithmic Complexity Attacks"> for more information. 479B<Do not disclose the hash seed> to people who don't need to know it. 480See also L<perlrun/PERL_HASH_SEED_DEBUG>. 481 482Prior to Perl 5.17.6 this function returned a UV, it now returns a string, 483which may be of nearly any size as determined by the hash function your 484Perl has been built with. Possible sizes may be but are not limited to 4854 bytes (for most hash algorithms) and 16 bytes (for siphash). 486 487=item B<hash_value> 488 489 my $hash_value = hash_value($string); 490 491hash_value() returns the current perl's internal hash value for a given 492string. 493 494Returns a 32 bit integer representing the hash value of the string passed 495in. This value is only reliable for the lifetime of the process. It may 496be different depending on invocation, environment variables, perl version, 497architectures, and build options. 498 499B<Note that the hash value of a given string is sensitive information>: 500by knowing it one can deduce the hash seed which in turn can allow one to 501craft a denial-of-service attack against Perl code, even remotely, 502see L<perlsec/"Algorithmic Complexity Attacks"> for more information. 503B<Do not disclose the hash value of a string> to people who don't need to 504know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. 505 506=item B<bucket_info> 507 508Return a set of basic information about a hash. 509 510 my ($keys, $buckets, $used, @length_counts)= bucket_info($hash); 511 512Fields are as follows: 513 514 0: Number of keys in the hash 515 1: Number of buckets in the hash 516 2: Number of used buckets in the hash 517 rest : list of counts, Kth element is the number of buckets 518 with K keys in it. 519 520See also bucket_stats() and bucket_array(). 521 522=item B<bucket_stats> 523 524Returns a list of statistics about a hash. 525 526 my ($keys, buckets, $used, $utilization_ratio, $collision_pct, 527 $mean, $stddev, @length_counts) = bucket_info($hashref); 528 529 530Fields are as follows: 531 532 533 0: Number of keys in the hash 534 1: Number of buckets in the hash 535 2: Number of used buckets in the hash 536 3: Hash Quality Score 537 4: Percent of buckets used 538 5: Percent of keys which are in collision 539 6: Average bucket length 540 7: Standard Deviation of bucket lengths. 541 rest : list of counts, Kth element is the number of buckets 542 with K keys in it. 543 544See also bucket_info() and bucket_array(). 545 546Note that Hash Quality Score would be 1 for an ideal hash, numbers 547close to and below 1 indicate good hashing, and number significantly 548above indicate a poor score. In practice it should be around 0.95 to 1.05. 549It is defined as: 550 551 $score= sum( $count[$length] * ($length * ($length + 1) / 2) ) 552 / 553 ( ( $keys / 2 * $buckets ) * 554 ( $keys + ( 2 * $buckets ) - 1 ) ) 555 556The formula is from the Red Dragon book (reformulated to use the data available) 557and is documented at L<http://www.strchr.com/hash_functions> 558 559=item B<bucket_array> 560 561 my $array= bucket_array(\%hash); 562 563Returns a packed representation of the bucket array associated with a hash. Each element 564of the array is either an integer K, in which case it represents K empty buckets, or 565a reference to another array which contains the keys that are in that bucket. 566 567B<Note that the information returned by bucket_array is sensitive information>: 568by knowing it one can directly attack perl's hash function which in turn may allow 569one to craft a denial-of-service attack against Perl code, even remotely, 570see L<perlsec/"Algorithmic Complexity Attacks"> for more information. 571B<Do not disclose the output of this function> to people who don't need to 572know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. This function is provided strictly 573for debugging and diagnostics purposes only, it is hard to imagine a reason why it 574would be used in production code. 575 576=cut 577 578 579sub bucket_stats { 580 my ($hash) = @_; 581 my ($keys, $buckets, $used, @length_counts) = bucket_info($hash); 582 my $sum; 583 my $score; 584 for (0 .. $#length_counts) { 585 $sum += ($length_counts[$_] * $_); 586 $score += $length_counts[$_] * ( $_ * ($_ + 1 ) / 2 ); 587 } 588 $score = $score / 589 (( $keys / (2 * $buckets )) * ( $keys + ( 2 * $buckets ) - 1 )) 590 if $keys; 591 my $mean= $sum/$buckets; 592 $sum= 0; 593 $sum += ($length_counts[$_] * (($_-$mean)**2)) for 0 .. $#length_counts; 594 595 my $stddev= sqrt($sum/$buckets); 596 return $keys, $buckets, $used, $keys ? ($score, $used/$buckets, ($keys-$used)/$keys, $mean, $stddev, @length_counts) : (); 597} 598 599=item B<hv_store> 600 601 my $sv = 0; 602 hv_store(%hash,$key,$sv) or die "Failed to alias!"; 603 $hash{$key} = 1; 604 print $sv; # prints 1 605 606Stores an alias to a variable in a hash instead of copying the value. 607 608=item B<hash_traversal_mask> 609 610As of Perl 5.18 every hash has its own hash traversal order, and this order 611changes every time a new element is inserted into the hash. This functionality 612is provided by maintaining an unsigned integer mask (U32) which is xor'ed 613with the actual bucket id during a traversal of the hash buckets using keys(), 614values() or each(). 615 616You can use this subroutine to get and set the traversal mask for a specific 617hash. Setting the mask ensures that a given hash will produce the same key 618order. B<Note> that this does B<not> guarantee that B<two> hashes will produce 619the same key order for the same hash seed and traversal mask, items that 620collide into one bucket may have different orders regardless of this setting. 621 622=back 623 624=head2 Operating on references to hashes. 625 626Most subroutines documented in this module have equivalent versions 627that operate on references to hashes instead of native hashes. 628The following is a list of these subs. They are identical except 629in name and in that instead of taking a %hash they take a $hashref, 630and additionally are not prototyped. 631 632=over 4 633 634=item lock_ref_keys 635 636=item unlock_ref_keys 637 638=item lock_ref_keys_plus 639 640=item lock_ref_value 641 642=item unlock_ref_value 643 644=item lock_hashref 645 646=item unlock_hashref 647 648=item lock_hashref_recurse 649 650=item unlock_hashref_recurse 651 652=item hash_ref_unlocked 653 654=item legal_ref_keys 655 656=item hidden_ref_keys 657 658=back 659 660=head1 CAVEATS 661 662Note that the trapping of the restricted operations is not atomic: 663for example 664 665 eval { %hash = (illegal_key => 1) } 666 667leaves the C<%hash> empty rather than with its original contents. 668 669=head1 BUGS 670 671The interface exposed by this module is very close to the current 672implementation of restricted hashes. Over time it is expected that 673this behavior will be extended and the interface abstracted further. 674 675=head1 AUTHOR 676 677Michael G Schwern <schwern@pobox.com> on top of code by Nick 678Ing-Simmons and Jeffrey Friedl. 679 680hv_store() is from Array::RefElem, Copyright 2000 Gisle Aas. 681 682Additional code by Yves Orton. 683 684=head1 SEE ALSO 685 686L<Scalar::Util>, L<List::Util> and L<perlsec/"Algorithmic Complexity Attacks">. 687 688L<Hash::Util::FieldHash>. 689 690=cut 691 6921; 693