1package Text::Glob;
2use strict;
3use Exporter;
4use vars qw/$VERSION @ISA @EXPORT_OK
5            $strict_leading_dot $strict_wildcard_slash/;
6$VERSION = '0.11';
7@ISA = 'Exporter';
8@EXPORT_OK = qw( glob_to_regex glob_to_regex_string match_glob );
9
10$strict_leading_dot    = 1;
11$strict_wildcard_slash = 1;
12
13use constant debug => 0;
14
15sub glob_to_regex {
16    my $glob = shift;
17    my $regex = glob_to_regex_string($glob);
18    return qr/^$regex$/;
19}
20
21sub glob_to_regex_string
22{
23    my $glob = shift;
24
25    my $seperator = $Text::Glob::seperator;
26    $seperator = "/" unless defined $seperator;
27    $seperator = quotemeta($seperator);
28
29    my ($regex, $in_curlies, $escaping);
30    local $_;
31    my $first_byte = 1;
32    for ($glob =~ m/(.)/gs) {
33        if ($first_byte) {
34            if ($strict_leading_dot) {
35                $regex .= '(?=[^\.])' unless $_ eq '.';
36            }
37            $first_byte = 0;
38        }
39        if ($_ eq '/') {
40            $first_byte = 1;
41        }
42        if ($_ eq '.' || $_ eq '(' || $_ eq ')' || $_ eq '|' ||
43            $_ eq '+' || $_ eq '^' || $_ eq '$' || $_ eq '@' || $_ eq '%' ) {
44            $regex .= "\\$_";
45        }
46        elsif ($_ eq '*') {
47            $regex .= $escaping ? "\\*" :
48              $strict_wildcard_slash ? "(?:(?!$seperator).)*" : ".*";
49        }
50        elsif ($_ eq '?') {
51            $regex .= $escaping ? "\\?" :
52              $strict_wildcard_slash ? "(?!$seperator)." : ".";
53        }
54        elsif ($_ eq '{') {
55            $regex .= $escaping ? "\\{" : "(";
56            ++$in_curlies unless $escaping;
57        }
58        elsif ($_ eq '}' && $in_curlies) {
59            $regex .= $escaping ? "}" : ")";
60            --$in_curlies unless $escaping;
61        }
62        elsif ($_ eq ',' && $in_curlies) {
63            $regex .= $escaping ? "," : "|";
64        }
65        elsif ($_ eq "\\") {
66            if ($escaping) {
67                $regex .= "\\\\";
68                $escaping = 0;
69            }
70            else {
71                $escaping = 1;
72            }
73            next;
74        }
75        else {
76            $regex .= $_;
77            $escaping = 0;
78        }
79        $escaping = 0;
80    }
81    print "# $glob $regex\n" if debug;
82
83    return $regex;
84}
85
86sub match_glob {
87    print "# ", join(', ', map { "'$_'" } @_), "\n" if debug;
88    my $glob = shift;
89    my $regex = glob_to_regex $glob;
90    local $_;
91    grep { $_ =~ $regex } @_;
92}
93
941;
95__END__
96
97=head1 NAME
98
99Text::Glob - match globbing patterns against text
100
101=head1 SYNOPSIS
102
103 use Text::Glob qw( match_glob glob_to_regex );
104
105 print "matched\n" if match_glob( "foo.*", "foo.bar" );
106
107 # prints foo.bar and foo.baz
108 my $regex = glob_to_regex( "foo.*" );
109 for ( qw( foo.bar foo.baz foo bar ) ) {
110     print "matched: $_\n" if /$regex/;
111 }
112
113=head1 DESCRIPTION
114
115Text::Glob implements glob(3) style matching that can be used to match
116against text, rather than fetching names from a filesystem.  If you
117want to do full file globbing use the File::Glob module instead.
118
119=head2 Routines
120
121=over
122
123=item match_glob( $glob, @things_to_test )
124
125Returns the list of things which match the glob from the source list.
126
127=item glob_to_regex( $glob )
128
129Returns a compiled regex which is the equivalent of the globbing
130pattern.
131
132=item glob_to_regex_string( $glob )
133
134Returns a regex string which is the equivalent of the globbing
135pattern.
136
137=back
138
139=head1 SYNTAX
140
141The following metacharacters and rules are respected.
142
143=over
144
145=item C<*> - match zero or more characters
146
147C<a*> matches C<a>, C<aa>, C<aaaa> and many many more.
148
149=item C<?> - match exactly one character
150
151C<a?> matches C<aa>, but not C<a>, or C<aaa>
152
153=item Character sets/ranges
154
155C<example.[ch]> matches C<example.c> and C<example.h>
156
157C<demo.[a-c]> matches C<demo.a>, C<demo.b>, and C<demo.c>
158
159=item alternation
160
161C<example.{foo,bar,baz}> matches C<example.foo>, C<example.bar>, and
162C<example.baz>
163
164=item leading . must be explicitly matched
165
166C<*.foo> does not match C<.bar.foo>.  For this you must either specify
167the leading . in the glob pattern (C<.*.foo>), or set
168C<$Text::Glob::strict_leading_dot> to a false value while compiling
169the regex.
170
171=item C<*> and C<?> do not match the seperator (i.e. do not match C</>)
172
173C<*.foo> does not match C<bar/baz.foo>.  For this you must either
174explicitly match the / in the glob (C<*/*.foo>), or set
175C<$Text::Glob::strict_wildcard_slash> to a false value while compiling
176the regex, or change the seperator that Text::Glob uses by setting
177C<$Text::Glob::seperator> to an alternative value while compiling the
178the regex.
179
180=back
181
182=head1 BUGS
183
184The code uses qr// to produce compiled regexes, therefore this module
185requires perl version 5.005_03 or newer.
186
187=head1 AUTHOR
188
189Richard Clamp <richardc@unixbeard.net>
190
191=head1 COPYRIGHT
192
193Copyright (C) 2002, 2003, 2006, 2007 Richard Clamp.  All Rights Reserved.
194
195This module is free software; you can redistribute it and/or modify it
196under the same terms as Perl itself.
197
198=head1 SEE ALSO
199
200L<File::Glob>, glob(3)
201
202=cut
203