1package Tie::RegexpHash; 2 3require 5.005; 4use strict; 5 6use vars qw( $VERSION @ISA ); 7 8$VERSION = '0.17'; 9 10use Carp; 11use Data::Dumper; 12 13# This is what stringified qrs seem to look like. 14# It captures flags in $1 and pattern in $2 15my $SERIALIZE_RE; 16 17# To try to keep this working as far back as 5.5 we're using $] 18if ($] < 5.013_006) { 19 $SERIALIZE_RE = qr/^\(\?([ismx]{0,4})-[ismx]*:(.*)\)$/; 20} 21else { 22 $SERIALIZE_RE = qr/^\(\?\^([ismx]{0,4}(?:-[ismx]{1,4})?):(.*)\)$/; 23} 24 25# This is what the serialized version looks like. 26# It also captures flags in $1 and pattern in $2 27my $DESERIALIZE_RE = qr/^([ismx]{0,4}):(.*)$/; 28 29# Creates a new 'Tie::RegexpHash' object. We use an underlying array rather 30# than a hash because we want to search through the hash keys in the order 31# that they were added. 32# 33# See the _find() and add() routines for more details. 34sub new { 35 my ($class) = @_; 36 37 my $self = { 38 KEYS => [ ], # array of Regexp keys 39 VALUES => [ ], # array of corresponding values 40 COUNT => 0, # the number of hash/key pairs (is this necessary?) 41 }; 42 43 bless $self, $class; 44} 45 46# Embed any modifiers used with qr// in the pattern. 47sub _convert_key { 48 my ($key) = shift; 49 50 my ($flags,$pat) = ($key =~ $SERIALIZE_RE); 51 ($key = qr/(?$flags:$pat)/) if $flags; 52 return $key; 53} 54 55# Sequentially goes through the hash keys for Regexps which match the given 56# key and returns the index. If the hash is empty, or a matching key was not 57# found, returns undef. 58sub _find { 59 my ($self, $key) = @_; 60 61 unless ($self->{COUNT}) { 62 return; 63 } 64 65 if (ref($key) eq 'Regexp') { 66 my $i = 0; 67 $key = _convert_key($key); 68 while (($i < $self->{COUNT}) and ($key ne $self->{KEYS}->[ $i ])) { 69 $i++; 70 } 71 72 if ($i == $self->{COUNT}) { 73 return; 74 } 75 else { 76 return $i; 77 } 78 } 79 else { 80 my $i = 0; 81 while (($i < $self->{COUNT}) and ($key !~ m/$self->{KEYS}->[ $i ]/)) { 82 $i++; 83 } 84 85 if ($i == $self->{COUNT}) { 86 return; 87 } 88 else { 89 return $i; 90 } 91 } 92} 93 94# If a key exists the value will be replaced. (If the Regexps are not the same 95# but match, a warning is displayed.) If the key is new, then a new key/value 96# pair is added. 97sub add { 98 my ($self, $key, $value) = @_; 99 100 ($key = _convert_key($key)) if (ref($key) eq 'Regexp'); 101 102 my $index = _find $self, $key; 103 if (defined($index)) { 104 if ($key ne $self->{KEYS}->[ $index ]) { 105 carp "\'$key\' is not the same as \'", 106 $self->{KEYS}->[$index], "\'"; 107 } 108 $self->{VALUES}->[ $index ] = $value; 109 } 110 else { 111 $index = $self->{COUNT}++; 112 113 ($key = qr/$key/) unless (ref($key) eq 'Regexp'); 114 115 $self->{KEYS}->[ $index ] = $key; 116 $self->{VALUES}->[ $index ] = $value; 117 } 118} 119 120 121# Does a key exist or does it match any Regexp keys? 122sub match_exists { 123 my ($self, $key) = @_; 124 return defined( _find $self, $key ); 125} 126 127# Returns the value of a key or any matches to Regexp keys. 128sub match { 129 my ($self, $key) = @_; 130 131 my $index = _find $self, $key; 132 133 if (defined($index)) { 134 return $self->{VALUES}->[ $index ]; 135 } 136 else { 137 return; 138 } 139} 140 141# Removes a key or Regexp key and associated value from the hash. If the key 142# is not the same as the Regexp, a warning is displayed. 143sub remove { 144 my ($self, $key) = @_; 145 146 ($key = _convert_key($key)) if (ref($key) eq 'Regexp'); 147 148 my $index = _find $self, $key; 149 150 if (defined($index)) { 151 if ($key ne $self->{KEYS}->[ $index ]) { 152 carp "'`$key\' is not the same as '`", 153 $self->{KEYS}->[$index], "\'"; 154 } 155 156 my $value = $self->{VALUES}->[ $index ]; 157 splice @{ $self->{KEYS} }, $index, 1; 158 splice @{ $self->{VALUES} }, $index, 1; 159 $self->{COUNT}--; 160 return $value; 161 } 162 else { 163 carp "Cannot delete a nonexistent key: \`$key\'"; 164 return; 165 } 166} 167 168# Clears the hash. 169sub clear { 170 my ($self) = @_; 171 172 $self->{KEYS} = [ ]; 173 $self->{VALUES} = [ ]; 174 $self->{COUNT} = 0; 175 176} 177 178BEGIN { 179 # make aliases... 180 no strict; 181 *TIEHASH = \ &new; 182 *STORE = \ &add; 183 *EXISTS = \ &match_exists; 184 *FETCH = \ &match; 185 *DELETE = \ &remove; 186 *CLEAR = \ &clear; 187} 188 189# Returns the first key 190sub FIRSTKEY { 191 my ($self) = @_; 192 193 unless ($self->{COUNT}) { 194 return; 195 } 196 197 return $self->{KEYS}->[0]; 198 199} 200 201# Returns the next key 202sub NEXTKEY { 203 my ($self, $lastkey) = @_; 204 205 unless ($self->{COUNT}) { 206 return; 207 } 208 209 my $index = _find $self, $lastkey; 210 211 unless (defined($index)) { 212 confess "Invalid \$lastkey"; 213 } 214 215 $index++; 216 217 if ($index == $self->{COUNT}) { 218 return; 219 } 220 else { 221 return $self->{KEYS}->[ $index ]; 222 } 223} 224 225# serialize object 226sub STORABLE_freeze { 227 my ($self, $cloning) = @_; 228 229 my @keystrings; 230 231 { 232 local *_; 233 @keystrings = map { join(':', ($_ =~ $SERIALIZE_RE)); } @{$self->{KEYS}}; 234 } 235 236 my $sref = { 237 KEYSTRINGS => \@keystrings, 238 VALUES => $self->{VALUES}, 239 COUNT => $self->{COUNT}, 240 }; 241 242 return (0,$sref); 243} 244 245# deserialize 246sub STORABLE_thaw { 247 my($self, $cloning, $serialized, $sref) = @_; 248 249 $self->{KEYS} = [ ]; 250 $self->{VALUES} = $sref->{VALUES}; 251 $self->{COUNT} = $sref->{COUNT}; 252 253 { 254 local *_; 255 @{$self->{KEYS}} = map { 256 my ($flags,$pat) = ($_ =~ $DESERIALIZE_RE); 257 $pat = ($flags) ? "(?$flags:$pat)" : $pat; 258 qr/$pat/; 259 } @{$sref->{KEYSTRINGS}}; 260 } 261} 262 2631; 264__END__ 265 266=head1 NAME 267 268Tie::RegexpHash - Use regular expressions as hash keys 269 270=begin readme 271 272=head1 REQUIREMENTS 273 274L<Tie::RegexpHash> is written for and tested on Perl 5.14.0, but should run as 275far back as Perl 5.005. (Because it uses Regexp C<qr//> variables it cannot run 276on earlier versions of Perl.) 277 278It uses only standard modules. Serialization is supported through Storable, but 279Storable is not required for normal operation. 280 281=head2 Installation 282 283Installation can be done using the traditional Makefile.PL or the newer Build.PL 284methods. 285 286Using Makefile.PL: 287 288 perl Makefile.PL 289 make test 290 make install 291 292(On Windows platforms you should use C<nmake> instead.) 293 294Using Build.PL (if you have Module::Build installed): 295 296 perl Build.PL 297 perl Build test 298 perl Build install 299 300=end readme 301 302=head1 SYNOPSIS 303 304 use Tie::RegexpHash; 305 306 my %hash; 307 308 tie %hash, 'Tie::RegexpHash'; 309 310 $hash{ qr/^5(\s+|-)?gal(\.|lons?)?/i } = '5-GAL'; 311 312 $hash{'5 gal'}; # returns "5-GAL" 313 $hash{'5GAL'}; # returns "5-GAL" 314 $hash{'5 gallon'}; # also returns "5-GAL" 315 316 my $rehash = Tie::RegexpHash->new(); 317 318 $rehash->add( qr/\d+(\.\d+)?/, "contains a number" ); 319 $rehash->add( qr/s$/, "ends with an \`s\'" ); 320 321 $rehash->match( "foo 123" ); # returns "contains a number" 322 $rehash->match( "examples" ); # returns "ends with an `s'" 323 324=head1 DESCRIPTION 325 326This module allows one to use regular expressions for hash keys, so that 327values can be associated with anything that matches the key. 328 329Hashes can be operated on using the standard tied hash interface in Perl, as 330described in the SYNOPSIS, or using an object-oriented interface described below. 331 332=for readme stop 333 334=head2 Methods 335 336=over 337 338=item new 339 340 my $obj = Tie::RegexpHash->new() 341 342Creates a new "RegexpHash" (Regular Expression Hash) object. 343 344=item add 345 346 $obj->add( $key, $value ); 347 348Adds a new key/value pair to the hash. I<$key> can be a Regexp or a string 349(which is compiled into a Regexp). 350 351If I<$key> is already defined, the value will be changed. If C<$key> matches 352an existing key (but is not the same), a warning will be shown if warnings 353are enabled. 354 355=item match 356 357 $value = $obj->match( $quasikey ); 358 359Returns the value associated with I<$quasikey>. (I<$quasikey> can be a string 360which matches an existing Regexp or an actual Regexp.) Returns 'undef' if 361there is no match. 362 363Regexps are matched in the order they are defined. 364 365=item match_exists 366 367 if ($obj->match_exists( $quasikey )) ... 368 369Returns a true value if there exists a matching key. 370 371=item remove 372 373 $value = $obj->remove( $quasikey ); 374 375Deletes the key associated with I<$quasikey>. If I<$quasikey> matches 376an existing key (but is not the same), a warning will be shown. 377 378Returns the value associated with the key. 379 380=item clear 381 382 $obj->clear(); 383 384Removes all key/value pairs. 385 386=back 387 388=for readme continue 389 390=begin readme 391 392=head1 REVISION HISTORY 393 394A brief list of changes since the previous release: 395 396=for readme include file="Changes" start="0.17" stop="0.14" type="text" 397 398For a detailed history see the F<Changes> file included in this distribution. 399 400=end readme 401 402=head1 AUTHOR 403 404Robert Rothenberg <rrwo at cpan.org>, previous maintainer. 405 406=head1 MAINTAINER 407 408Alastair McGowan-Douglas <altreus@cpan.org> 409 410=for readme stop 411 412=head2 Acknowledgments 413 414Russell Harrison <rch at cpan.org> for patches adding support 415for serialization. 416 417Simon Hanmer <sch at scubaplus.co.uk> & Bart Vetters <robartes at nirya.eb> 418for pointing out a bug in the logic of the _find() routine in v0.10 419 420=for readme continue 421 422=head1 BUGS 423 424Please report bugs on the 425L<github issues tracker|https://github.com/Altreus/Tie-RegexpHash/issues>. 426Request Tracker tickets will probably go unseen. 427 428=head1 LICENSE 429 430 431Copyright (c) 2001-2002, 2005-2006 Robert Rothenberg. All rights reserved. 432 433Portions Copyright (c) 2014-2015 Alastair McGowan-Douglas. 434 435Portions Copyright (c) 2006 Russell Harrison. All rights reserved. 436 437This program is free software. You can redistribute it under the terms of the 438L<Artistic Licence|http://dev.perl.org/licenses/artistic.html>. 439 440=head1 SEE ALSO 441 442L<Tie::Hash::Regex> is a module with a complementary function. Rather than 443a hash with Regexps as keys that match against fetches, it has standard keys 444that are matched by Regexps in fetches. 445 446L<Regexp::Match::Any> matches many Regexps against a variable. 447 448L<Regexp::Match::List> is similar, but supports callbacks and various 449optimizations. 450 451=cut 452