1package Tie::RegexpHash;
2
3require 5.005;
4use strict;
5
6use vars qw( $VERSION @ISA );
7
8$VERSION = '0.17';
9
10use Carp;
11use Data::Dumper;
12
13# This is what stringified qrs seem to look like.
14# It captures flags in $1 and pattern in $2
15my $SERIALIZE_RE;
16
17# To try to keep this working as far back as 5.5 we're using $]
18if ($] < 5.013_006) {
19    $SERIALIZE_RE = qr/^\(\?([ismx]{0,4})-[ismx]*:(.*)\)$/;
20}
21else {
22    $SERIALIZE_RE = qr/^\(\?\^([ismx]{0,4}(?:-[ismx]{1,4})?):(.*)\)$/;
23}
24
25# This is what the serialized version looks like.
26# It also captures flags in $1 and pattern in $2
27my $DESERIALIZE_RE = qr/^([ismx]{0,4}):(.*)$/;
28
29# Creates a new 'Tie::RegexpHash' object. We use an underlying array rather
30# than a hash because we want to search through the hash keys in the order
31# that they were added.
32#
33# See the _find() and add() routines for more details.
34sub new {
35    my ($class) = @_;
36
37    my $self = {
38      KEYS   => [ ], # array of Regexp keys
39      VALUES => [ ], # array of corresponding values
40      COUNT  => 0,   # the number of hash/key pairs (is this necessary?)
41    };
42
43    bless $self, $class;
44}
45
46# Embed any modifiers used with qr// in the pattern.
47sub _convert_key {
48    my ($key) = shift;
49
50    my ($flags,$pat) = ($key =~ $SERIALIZE_RE);
51    ($key = qr/(?$flags:$pat)/) if $flags;
52    return $key;
53}
54
55# Sequentially goes through the hash keys for Regexps which match the given
56# key and returns the index. If the hash is empty, or a matching key was not
57# found, returns undef.
58sub _find {
59    my ($self, $key) = @_;
60
61    unless ($self->{COUNT}) {
62        return;
63    }
64
65    if (ref($key) eq 'Regexp') {
66        my $i = 0;
67        $key = _convert_key($key);
68        while (($i < $self->{COUNT}) and ($key ne $self->{KEYS}->[ $i ])) {
69            $i++;
70        }
71
72        if ($i == $self->{COUNT}) {
73            return;
74        }
75        else {
76            return $i;
77        }
78    }
79    else {
80        my $i = 0;
81        while (($i < $self->{COUNT}) and ($key !~ m/$self->{KEYS}->[ $i ]/)) {
82            $i++;
83        }
84
85        if ($i == $self->{COUNT}) {
86            return;
87        }
88        else {
89            return $i;
90        }
91    }
92}
93
94# If a key exists the value will be replaced. (If the Regexps are not the same
95# but match, a warning is displayed.) If the key is new, then a new key/value
96# pair is added.
97sub add {
98    my ($self, $key, $value) = @_;
99
100    ($key = _convert_key($key)) if (ref($key) eq 'Regexp');
101
102    my $index = _find $self, $key;
103    if (defined($index)) {
104        if ($key ne $self->{KEYS}->[ $index ]) {
105            carp "\'$key\' is not the same as \'",
106                  $self->{KEYS}->[$index], "\'";
107        }
108        $self->{VALUES}->[ $index ] = $value;
109    }
110    else {
111        $index = $self->{COUNT}++;
112
113        ($key = qr/$key/) unless (ref($key) eq 'Regexp');
114
115        $self->{KEYS}->[ $index ]   = $key;
116        $self->{VALUES}->[ $index ] = $value;
117    }
118}
119
120
121# Does a key exist or does it match any Regexp keys?
122sub match_exists {
123    my ($self, $key) = @_;
124    return defined( _find $self, $key );
125}
126
127# Returns the value of a key or any matches to Regexp keys.
128sub match {
129    my ($self, $key) = @_;
130
131    my $index = _find $self, $key;
132
133    if (defined($index)) {
134        return $self->{VALUES}->[ $index ];
135    }
136    else {
137        return;
138    }
139}
140
141# Removes a key or Regexp key and associated value from the hash. If the key
142# is not the same as the Regexp, a warning is displayed.
143sub remove {
144    my ($self, $key) = @_;
145
146    ($key = _convert_key($key)) if (ref($key) eq 'Regexp');
147
148    my $index = _find $self, $key;
149
150    if (defined($index)) {
151        if ($key ne $self->{KEYS}->[ $index ]) {
152            carp "'`$key\' is not the same as '`",
153              $self->{KEYS}->[$index], "\'";
154        }
155
156        my $value = $self->{VALUES}->[ $index ];
157        splice @{ $self->{KEYS} },   $index, 1;
158        splice @{ $self->{VALUES} }, $index, 1;
159        $self->{COUNT}--;
160        return $value;
161    }
162    else {
163        carp "Cannot delete a nonexistent key: \`$key\'";
164        return;
165    }
166}
167
168# Clears the hash.
169sub clear {
170    my ($self) = @_;
171
172    $self->{KEYS}   = [ ];
173    $self->{VALUES} = [ ];
174    $self->{COUNT}  = 0;
175
176}
177
178BEGIN {
179    # make aliases...
180    no strict;
181    *TIEHASH = \ &new;
182    *STORE   = \ &add;
183    *EXISTS  = \ &match_exists;
184    *FETCH   = \ &match;
185    *DELETE  = \ &remove;
186    *CLEAR   = \ &clear;
187}
188
189# Returns the first key
190sub FIRSTKEY {
191    my ($self) = @_;
192
193    unless ($self->{COUNT}) {
194        return;
195    }
196
197    return $self->{KEYS}->[0];
198
199}
200
201# Returns the next key
202sub NEXTKEY {
203    my ($self, $lastkey) = @_;
204
205    unless ($self->{COUNT}) {
206        return;
207    }
208
209    my $index = _find $self, $lastkey;
210
211    unless (defined($index)) {
212        confess "Invalid \$lastkey";
213    }
214
215    $index++;
216
217    if ($index == $self->{COUNT}) {
218        return;
219    }
220    else {
221        return $self->{KEYS}->[ $index ];
222    }
223}
224
225# serialize object
226sub STORABLE_freeze {
227    my ($self, $cloning) = @_;
228
229    my @keystrings;
230
231    {
232        local *_;
233        @keystrings = map { join(':', ($_ =~ $SERIALIZE_RE)); } @{$self->{KEYS}};
234    }
235
236    my $sref = {
237        KEYSTRINGS => \@keystrings,
238        VALUES     => $self->{VALUES},
239        COUNT      => $self->{COUNT},
240    };
241
242    return (0,$sref);
243}
244
245# deserialize
246sub STORABLE_thaw {
247    my($self, $cloning, $serialized, $sref) = @_;
248
249    $self->{KEYS}   = [ ];
250    $self->{VALUES} = $sref->{VALUES};
251    $self->{COUNT}  = $sref->{COUNT};
252
253    {
254        local *_;
255        @{$self->{KEYS}} = map {
256             my ($flags,$pat) = ($_ =~ $DESERIALIZE_RE);
257             $pat = ($flags) ? "(?$flags:$pat)" : $pat;
258             qr/$pat/;
259        } @{$sref->{KEYSTRINGS}};
260    }
261}
262
2631;
264__END__
265
266=head1 NAME
267
268Tie::RegexpHash - Use regular expressions as hash keys
269
270=begin readme
271
272=head1 REQUIREMENTS
273
274L<Tie::RegexpHash> is written for and tested on Perl 5.14.0, but should run as
275far back as Perl 5.005. (Because it uses Regexp C<qr//> variables it cannot run
276on earlier versions of Perl.)
277
278It uses only standard modules. Serialization is supported through Storable, but
279Storable is not required for normal operation.
280
281=head2 Installation
282
283Installation can be done using the traditional Makefile.PL or the newer Build.PL
284methods.
285
286Using Makefile.PL:
287
288  perl Makefile.PL
289  make test
290  make install
291
292(On Windows platforms you should use C<nmake> instead.)
293
294Using Build.PL (if you have Module::Build installed):
295
296  perl Build.PL
297  perl Build test
298  perl Build install
299
300=end readme
301
302=head1 SYNOPSIS
303
304  use Tie::RegexpHash;
305
306  my %hash;
307
308  tie %hash, 'Tie::RegexpHash';
309
310  $hash{ qr/^5(\s+|-)?gal(\.|lons?)?/i } = '5-GAL';
311
312  $hash{'5 gal'};     # returns "5-GAL"
313  $hash{'5GAL'};      # returns "5-GAL"
314  $hash{'5  gallon'}; # also returns "5-GAL"
315
316  my $rehash = Tie::RegexpHash->new();
317
318  $rehash->add( qr/\d+(\.\d+)?/, "contains a number" );
319  $rehash->add( qr/s$/,          "ends with an \`s\'" );
320
321  $rehash->match( "foo 123" );  # returns "contains a number"
322  $rehash->match( "examples" ); # returns "ends with an `s'"
323
324=head1 DESCRIPTION
325
326This module allows one to use regular expressions for hash keys, so that
327values can be associated with anything that matches the key.
328
329Hashes can be operated on using the standard tied hash interface in Perl, as
330described in the SYNOPSIS, or using an object-oriented interface described below.
331
332=for readme stop
333
334=head2 Methods
335
336=over
337
338=item new
339
340  my $obj = Tie::RegexpHash->new()
341
342Creates a new "RegexpHash" (Regular Expression Hash) object.
343
344=item add
345
346  $obj->add( $key, $value );
347
348Adds a new key/value pair to the hash. I<$key> can be a Regexp or a string
349(which is compiled into a Regexp).
350
351If I<$key> is already defined, the value will be changed. If C<$key> matches
352an existing key (but is not the same), a warning will be shown if warnings
353are enabled.
354
355=item match
356
357  $value = $obj->match( $quasikey );
358
359Returns the value associated with I<$quasikey>. (I<$quasikey> can be a string
360which matches an existing Regexp or an actual Regexp.)  Returns 'undef' if
361there is no match.
362
363Regexps are matched in the order they are defined.
364
365=item match_exists
366
367  if ($obj->match_exists( $quasikey )) ...
368
369Returns a true value if there exists a matching key.
370
371=item remove
372
373  $value = $obj->remove( $quasikey );
374
375Deletes the key associated with I<$quasikey>.  If I<$quasikey> matches
376an existing key (but is not the same), a warning will be shown.
377
378Returns the value associated with the key.
379
380=item clear
381
382  $obj->clear();
383
384Removes all key/value pairs.
385
386=back
387
388=for readme continue
389
390=begin readme
391
392=head1 REVISION HISTORY
393
394A brief list of changes since the previous release:
395
396=for readme include file="Changes" start="0.17" stop="0.14" type="text"
397
398For a detailed history see the F<Changes> file included in this distribution.
399
400=end readme
401
402=head1 AUTHOR
403
404Robert Rothenberg <rrwo at cpan.org>, previous maintainer.
405
406=head1 MAINTAINER
407
408Alastair McGowan-Douglas <altreus@cpan.org>
409
410=for readme stop
411
412=head2 Acknowledgments
413
414Russell Harrison <rch at cpan.org> for patches adding support
415for serialization.
416
417Simon Hanmer <sch at scubaplus.co.uk> & Bart Vetters <robartes at nirya.eb>
418for pointing out a bug in the logic of the _find() routine in v0.10
419
420=for readme continue
421
422=head1 BUGS
423
424Please report bugs on the
425L<github issues tracker|https://github.com/Altreus/Tie-RegexpHash/issues>.
426Request Tracker tickets will probably go unseen.
427
428=head1 LICENSE
429
430
431Copyright (c) 2001-2002, 2005-2006 Robert Rothenberg. All rights reserved.
432
433Portions Copyright (c) 2014-2015 Alastair McGowan-Douglas.
434
435Portions Copyright (c) 2006 Russell Harrison. All rights reserved.
436
437This program is free software. You can redistribute it under the terms of the
438L<Artistic Licence|http://dev.perl.org/licenses/artistic.html>.
439
440=head1 SEE ALSO
441
442L<Tie::Hash::Regex> is a module with a complementary function. Rather than
443a hash with Regexps as keys that match against fetches, it has standard keys
444that are matched by Regexps in fetches.
445
446L<Regexp::Match::Any> matches many Regexps against a variable.
447
448L<Regexp::Match::List> is similar, but supports callbacks and various
449optimizations.
450
451=cut
452