1b39c5158Smillertpackage Filter::Simple;
2b39c5158Smillert
3b39c5158Smillertuse Text::Balanced ':ALL';
4b39c5158Smillert
5*56d68f1eSafresh1our $VERSION = '0.96';
6b39c5158Smillert
7b39c5158Smillertuse Filter::Util::Call;
8b39c5158Smillertuse Carp;
9b39c5158Smillert
109f11ffb7Safresh1our @EXPORT = qw( FILTER FILTER_ONLY );
11b39c5158Smillert
12b39c5158Smillert
13b39c5158Smillertsub import {
14b39c5158Smillert    if (@_>1) { shift; goto &FILTER }
15b39c5158Smillert    else      { *{caller()."::$_"} = \&$_ foreach @EXPORT }
16b39c5158Smillert}
17b39c5158Smillert
18b39c5158Smillertsub fail {
19b39c5158Smillert    croak "FILTER_ONLY: ", @_;
20b39c5158Smillert}
21b39c5158Smillert
22b39c5158Smillertmy $exql = sub {
23b39c5158Smillert    my @bits = extract_quotelike $_[0], qr//;
24b39c5158Smillert    return unless $bits[0];
25b39c5158Smillert    return \@bits;
26b39c5158Smillert};
27b39c5158Smillert
28b39c5158Smillertmy $ncws = qr/\s+/;
29b39c5158Smillertmy $comment = qr/(?<![\$\@%])#.*/;
30b39c5158Smillertmy $ws = qr/(?:$ncws|$comment)+/;
31b39c5158Smillertmy $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/;
32b39c5158Smillertmy $EOP = qr/\n\n|\Z/;
33b39c5158Smillertmy $CUT = qr/\n=cut.*$EOP/;
34b39c5158Smillertmy $pod_or_DATA = qr/
35b39c5158Smillert              ^=(?:head[1-4]|item) .*? $CUT
36b39c5158Smillert            | ^=pod .*? $CUT
37898184e3Ssthen            | ^=for .*? $CUT
38898184e3Ssthen            | ^=begin .*? $CUT
39b39c5158Smillert            | ^__(DATA|END)__\r?\n.*
40b39c5158Smillert            /smx;
41898184e3Ssthenmy $variable = qr{
42898184e3Ssthen        [\$*\@%]\s*
43898184e3Ssthen            \{\s*(?!::)(?:\d+|[][&`'#+*./|,";%=~:?!\@<>()-]|\^[A-Z]?)\}
44898184e3Ssthen      | (?:\$#?|[*\@\%]|\\&)\$*\s*
45898184e3Ssthen               (?:  \{\s*(?:\^(?=[A-Z_]))?(?:\w|::|'\w)*\s*\}
46898184e3Ssthen                  |      (?:\^(?=[A-Z_]))?(?:\w|::|'\w)*
47898184e3Ssthen                  | (?=\{)  # ${ block }
48898184e3Ssthen               )
49898184e3Ssthen        )
50898184e3Ssthen      | \$\s*(?!::)(?:\d+|[][&`'#+*./|,";%=~:?!\@<>()-]|\^[A-Z]?)
51898184e3Ssthen   }x;
52b39c5158Smillert
53b39c5158Smillertmy %extractor_for = (
54898184e3Ssthen    quotelike  => [ $ws,  $variable, $id, { MATCH  => \&extract_quotelike } ],
55b39c5158Smillert    regex      => [ $ws,  $pod_or_DATA, $id, $exql           ],
56b39c5158Smillert    string     => [ $ws,  $pod_or_DATA, $id, $exql           ],
57898184e3Ssthen    code       => [ $ws, { DONT_MATCH => $pod_or_DATA }, $variable,
58b39c5158Smillert                    $id, { DONT_MATCH => \&extract_quotelike }   ],
59b39c5158Smillert    code_no_comments
60b39c5158Smillert               => [ { DONT_MATCH => $comment },
61898184e3Ssthen                    $ncws, { DONT_MATCH => $pod_or_DATA }, $variable,
62b39c5158Smillert                    $id, { DONT_MATCH => \&extract_quotelike }   ],
63b39c5158Smillert    executable => [ $ws, { DONT_MATCH => $pod_or_DATA }      ],
64b39c5158Smillert    executable_no_comments
65b39c5158Smillert               => [ { DONT_MATCH => $comment },
66b39c5158Smillert                    $ncws, { DONT_MATCH => $pod_or_DATA }      ],
67b39c5158Smillert    all        => [        { MATCH  => qr/(?s:.*)/         } ],
68b39c5158Smillert);
69b39c5158Smillert
70b39c5158Smillertmy %selector_for = (
71b39c5158Smillert    all   => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} },
72b39c5158Smillert    executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} },
73*56d68f1eSafresh1    executable_no_comments=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} },
74b39c5158Smillert    quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} },
75b39c5158Smillert    regex     => sub { my ($t)=@_;
76b39c5158Smillert               sub{ref() or return $_;
77b39c5158Smillert                   my ($ql,undef,$pre,$op,$ld,$pat) = @$_;
78b39c5158Smillert                   return $_->[0] unless $op =~ /^(qr|m|s)/
79b39c5158Smillert                         || !$op && ($ld eq '/' || $ld eq '?');
80b39c5158Smillert                   $_ = $pat;
81b39c5158Smillert                   $t->(@_);
82b39c5158Smillert                   $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/;
83b39c5158Smillert                   return "$pre$ql";
84b39c5158Smillert                  };
85b39c5158Smillert            },
86b39c5158Smillert    string     => sub { my ($t)=@_;
87b39c5158Smillert               sub{ref() or return $_;
88b39c5158Smillert                   local *args = \@_;
89b39c5158Smillert                   my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10];
90b39c5158Smillert                   return $_->[0] if $op =~ /^(qr|m)/
91b39c5158Smillert                         || !$op && ($ld1 eq '/' || $ld1 eq '?');
92b39c5158Smillert                   if (!$op || $op eq 'tr' || $op eq 'y') {
93b39c5158Smillert                       local *_ = \$str1;
94b39c5158Smillert                       $t->(@args);
95b39c5158Smillert                   }
96b39c5158Smillert                   if ($op =~ /^(tr|y|s)/) {
97b39c5158Smillert                       local *_ = \$str2;
98b39c5158Smillert                       $t->(@args);
99b39c5158Smillert                   }
100b39c5158Smillert                   my $result = "$pre$op$ld1$str1$rd1";
101b39c5158Smillert                   $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}>
102b39c5158Smillert                   $result .= "$str2$rd2$flg";
103b39c5158Smillert                   return $result;
104b39c5158Smillert                  };
105b39c5158Smillert              },
106b39c5158Smillert);
107b39c5158Smillert
108b39c5158Smillert
109b39c5158Smillertsub gen_std_filter_for {
110b39c5158Smillert    my ($type, $transform) = @_;
111b39c5158Smillert    return sub {
112b39c5158Smillert        my $instr;
113b39c5158Smillert        local @components;
114b39c5158Smillert		for (extract_multiple($_,$extractor_for{$type})) {
115b39c5158Smillert            if (ref())     { push @components, $_; $instr=0 }
116b39c5158Smillert            elsif ($instr) { $components[-1] .= $_ }
117b39c5158Smillert            else           { push @components, $_; $instr=1 }
118b39c5158Smillert        }
119b39c5158Smillert        if ($type =~ /^code/) {
120b39c5158Smillert            my $count = 0;
121b8851fccSafresh1            local $placeholder = qr/\Q$;\E(.{4})\Q$;\E/s;
122b8851fccSafresh1            my $extractor =      qr/\Q$;\E(.{4})\Q$;\E/s;
123b39c5158Smillert            $_ = join "",
124b39c5158Smillert                  map { ref $_ ? $;.pack('N',$count++).$; : $_ }
125b39c5158Smillert                      @components;
126b39c5158Smillert            @components = grep { ref $_ } @components;
127b39c5158Smillert            $transform->(@_);
128b39c5158Smillert            s/$extractor/${$components[unpack('N',$1)]}/g;
129b39c5158Smillert        }
130b39c5158Smillert        else {
131b39c5158Smillert            my $selector = $selector_for{$type}->($transform);
132b39c5158Smillert            $_ = join "", map $selector->(@_), @components;
133b39c5158Smillert        }
134b39c5158Smillert    }
135b39c5158Smillert};
136b39c5158Smillert
137b39c5158Smillertsub FILTER (&;$) {
138b39c5158Smillert    my $caller = caller;
139b39c5158Smillert    my ($filter, $terminator) = @_;
140b39c5158Smillert    no warnings 'redefine';
141b39c5158Smillert    *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator);
142b39c5158Smillert    *{"${caller}::unimport"} = gen_filter_unimport($caller);
143b39c5158Smillert}
144b39c5158Smillert
145b39c5158Smillertsub FILTER_ONLY {
146b39c5158Smillert    my $caller = caller;
147b39c5158Smillert    while (@_ > 1) {
148b39c5158Smillert        my ($what, $how) = splice(@_, 0, 2);
149b39c5158Smillert        fail "Unknown selector: $what"
150b39c5158Smillert            unless exists $extractor_for{$what};
151b39c5158Smillert        fail "Filter for $what is not a subroutine reference"
152b39c5158Smillert            unless ref $how eq 'CODE';
153b39c5158Smillert        push @transforms, gen_std_filter_for($what,$how);
154b39c5158Smillert    }
155b39c5158Smillert    my $terminator = shift;
156b39c5158Smillert
157b39c5158Smillert    my $multitransform = sub {
158b39c5158Smillert        foreach my $transform ( @transforms ) {
159b39c5158Smillert            $transform->(@_);
160b39c5158Smillert        }
161b39c5158Smillert    };
162b39c5158Smillert    no warnings 'redefine';
163b39c5158Smillert    *{"${caller}::import"} =
164b39c5158Smillert        gen_filter_import($caller,$multitransform,$terminator);
165b39c5158Smillert    *{"${caller}::unimport"} = gen_filter_unimport($caller);
166b39c5158Smillert}
167b39c5158Smillert
168b39c5158Smillertmy $ows    = qr/(?:[ \t]+|#[^\n]*)*/;
169b39c5158Smillert
170b39c5158Smillertsub gen_filter_import {
171b39c5158Smillert    my ($class, $filter, $terminator) = @_;
172b39c5158Smillert    my %terminator;
173b39c5158Smillert    my $prev_import = *{$class."::import"}{CODE};
174b39c5158Smillert    return sub {
175b39c5158Smillert        my ($imported_class, @args) = @_;
176b39c5158Smillert        my $def_terminator =
177b39c5158Smillert            qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/;
178b39c5158Smillert        if (!defined $terminator) {
179b39c5158Smillert            $terminator{terminator} = $def_terminator;
180b39c5158Smillert        }
181b39c5158Smillert        elsif (!ref $terminator || ref $terminator eq 'Regexp') {
182b39c5158Smillert            $terminator{terminator} = $terminator;
183b39c5158Smillert        }
184b39c5158Smillert        elsif (ref $terminator ne 'HASH') {
185b39c5158Smillert            croak "Terminator must be specified as scalar or hash ref"
186b39c5158Smillert        }
187b39c5158Smillert        elsif (!exists $terminator->{terminator}) {
188b39c5158Smillert            $terminator{terminator} = $def_terminator;
189b39c5158Smillert        }
190b39c5158Smillert        filter_add(
191b39c5158Smillert            sub {
192b39c5158Smillert                my ($status, $lastline);
193b39c5158Smillert                my $count = 0;
194b39c5158Smillert                my $data = "";
195b39c5158Smillert                while ($status = filter_read()) {
196b39c5158Smillert                    return $status if $status < 0;
197b39c5158Smillert                    if ($terminator{terminator} &&
198b39c5158Smillert                        m/$terminator{terminator}/) {
199b39c5158Smillert                        $lastline = $_;
2009f11ffb7Safresh1                        $count++;
201b39c5158Smillert                        last;
202b39c5158Smillert                    }
203b39c5158Smillert                    $data .= $_;
204b39c5158Smillert                    $count++;
205b39c5158Smillert                    $_ = "";
206b39c5158Smillert                }
207b39c5158Smillert                return $count if not $count;
208b39c5158Smillert                $_ = $data;
209b39c5158Smillert                $filter->($imported_class, @args) unless $status < 0;
210b39c5158Smillert                if (defined $lastline) {
211b39c5158Smillert                    if (defined $terminator{becomes}) {
212b39c5158Smillert                        $_ .= $terminator{becomes};
213b39c5158Smillert                    }
214b39c5158Smillert                    elsif ($lastline =~ $def_terminator) {
215b39c5158Smillert                        $_ .= $lastline;
216b39c5158Smillert                    }
217b39c5158Smillert                }
218b39c5158Smillert                return $count;
219b39c5158Smillert            }
220b39c5158Smillert        );
221b39c5158Smillert        if ($prev_import) {
222b39c5158Smillert            goto &$prev_import;
223b39c5158Smillert        }
224b39c5158Smillert        elsif ($class->isa('Exporter')) {
225b39c5158Smillert            $class->export_to_level(1,@_);
226b39c5158Smillert        }
227b39c5158Smillert    }
228b39c5158Smillert}
229b39c5158Smillert
230b39c5158Smillertsub gen_filter_unimport {
231b39c5158Smillert    my ($class) = @_;
232b39c5158Smillert    return sub {
233b39c5158Smillert        filter_del();
234b39c5158Smillert        goto &$prev_unimport if $prev_unimport;
235b39c5158Smillert    }
236b39c5158Smillert}
237b39c5158Smillert
238b39c5158Smillert1;
239b39c5158Smillert
240b39c5158Smillert__END__
241b39c5158Smillert
242b39c5158Smillert=head1 NAME
243b39c5158Smillert
244b39c5158SmillertFilter::Simple - Simplified source filtering
245b39c5158Smillert
246b39c5158Smillert=head1 SYNOPSIS
247b39c5158Smillert
248b39c5158Smillert # in MyFilter.pm:
249b39c5158Smillert
250b39c5158Smillert     package MyFilter;
251b39c5158Smillert
252b39c5158Smillert     use Filter::Simple;
253b39c5158Smillert
254b39c5158Smillert     FILTER { ... };
255b39c5158Smillert
256b39c5158Smillert     # or just:
257b39c5158Smillert     #
258b39c5158Smillert     # use Filter::Simple sub { ... };
259b39c5158Smillert
260b39c5158Smillert # in user's code:
261b39c5158Smillert
262b39c5158Smillert     use MyFilter;
263b39c5158Smillert
264b39c5158Smillert     # this code is filtered
265b39c5158Smillert
266b39c5158Smillert     no MyFilter;
267b39c5158Smillert
268b39c5158Smillert     # this code is not
269b39c5158Smillert
270b39c5158Smillert
271b39c5158Smillert=head1 DESCRIPTION
272b39c5158Smillert
273b39c5158Smillert=head2 The Problem
274b39c5158Smillert
275b39c5158SmillertSource filtering is an immensely powerful feature of recent versions of Perl.
276b39c5158SmillertIt allows one to extend the language itself (e.g. the Switch module), to
277b39c5158Smillertsimplify the language (e.g. Language::Pythonesque), or to completely recast the
278b39c5158Smillertlanguage (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use
279b39c5158Smillertthe full power of Perl as its own, recursively applied, macro language.
280b39c5158Smillert
281b39c5158SmillertThe excellent Filter::Util::Call module (by Paul Marquess) provides a
282b39c5158Smillertusable Perl interface to source filtering, but it is often too powerful
283b39c5158Smillertand not nearly as simple as it could be.
284b39c5158Smillert
285b39c5158SmillertTo use the module it is necessary to do the following:
286b39c5158Smillert
287b39c5158Smillert=over 4
288b39c5158Smillert
289b39c5158Smillert=item 1.
290b39c5158Smillert
291b39c5158SmillertDownload, build, and install the Filter::Util::Call module.
292b39c5158Smillert(If you have Perl 5.7.1 or later, this is already done for you.)
293b39c5158Smillert
294b39c5158Smillert=item 2.
295b39c5158Smillert
296b39c5158SmillertSet up a module that does a C<use Filter::Util::Call>.
297b39c5158Smillert
298b39c5158Smillert=item 3.
299b39c5158Smillert
300b39c5158SmillertWithin that module, create an C<import> subroutine.
301b39c5158Smillert
302b39c5158Smillert=item 4.
303b39c5158Smillert
304b39c5158SmillertWithin the C<import> subroutine do a call to C<filter_add>, passing
305b39c5158Smillertit either a subroutine reference.
306b39c5158Smillert
307b39c5158Smillert=item 5.
308b39c5158Smillert
309b39c5158SmillertWithin the subroutine reference, call C<filter_read> or C<filter_read_exact>
310b39c5158Smillertto "prime" $_ with source code data from the source file that will
311b39c5158SmillertC<use> your module. Check the status value returned to see if any
312b39c5158Smillertsource code was actually read in.
313b39c5158Smillert
314b39c5158Smillert=item 6.
315b39c5158Smillert
316b39c5158SmillertProcess the contents of $_ to change the source code in the desired manner.
317b39c5158Smillert
318b39c5158Smillert=item 7.
319b39c5158Smillert
320b39c5158SmillertReturn the status value.
321b39c5158Smillert
322b39c5158Smillert=item 8.
323b39c5158Smillert
324b39c5158SmillertIf the act of unimporting your module (via a C<no>) should cause source
325b39c5158Smillertcode filtering to cease, create an C<unimport> subroutine, and have it call
326b39c5158SmillertC<filter_del>. Make sure that the call to C<filter_read> or
327b39c5158SmillertC<filter_read_exact> in step 5 will not accidentally read past the
328b39c5158SmillertC<no>. Effectively this limits source code filters to line-by-line
329b39c5158Smillertoperation, unless the C<import> subroutine does some fancy
330b39c5158Smillertpre-pre-parsing of the source code it's filtering.
331b39c5158Smillert
332b39c5158Smillert=back
333b39c5158Smillert
334b39c5158SmillertFor example, here is a minimal source code filter in a module named
335b39c5158SmillertBANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG>
336b39c5158Smillertto the sequence C<die 'BANG' if $BANG> in any piece of code following a
337b39c5158SmillertC<use BANG;> statement (until the next C<no BANG;> statement, if any):
338b39c5158Smillert
339b39c5158Smillert    package BANG;
340b39c5158Smillert
341b39c5158Smillert    use Filter::Util::Call ;
342b39c5158Smillert
343b39c5158Smillert    sub import {
344b39c5158Smillert        filter_add( sub {
345b39c5158Smillert        my $caller = caller;
346b39c5158Smillert        my ($status, $no_seen, $data);
347b39c5158Smillert        while ($status = filter_read()) {
348b39c5158Smillert            if (/^\s*no\s+$caller\s*;\s*?$/) {
349b39c5158Smillert                $no_seen=1;
350b39c5158Smillert                last;
351b39c5158Smillert            }
352b39c5158Smillert            $data .= $_;
353b39c5158Smillert            $_ = "";
354b39c5158Smillert        }
355b39c5158Smillert        $_ = $data;
356b39c5158Smillert        s/BANG\s+BANG/die 'BANG' if \$BANG/g
357b39c5158Smillert            unless $status < 0;
358b39c5158Smillert        $_ .= "no $class;\n" if $no_seen;
359b39c5158Smillert        return 1;
360b39c5158Smillert        })
361b39c5158Smillert    }
362b39c5158Smillert
363b39c5158Smillert    sub unimport {
364b39c5158Smillert        filter_del();
365b39c5158Smillert    }
366b39c5158Smillert
367b39c5158Smillert    1 ;
368b39c5158Smillert
369b39c5158SmillertThis level of sophistication puts filtering out of the reach of
370b39c5158Smillertmany programmers.
371b39c5158Smillert
372b39c5158Smillert
373b39c5158Smillert=head2 A Solution
374b39c5158Smillert
375b39c5158SmillertThe Filter::Simple module provides a simplified interface to
376b39c5158SmillertFilter::Util::Call; one that is sufficient for most common cases.
377b39c5158Smillert
378b39c5158SmillertInstead of the above process, with Filter::Simple the task of setting up
379b39c5158Smillerta source code filter is reduced to:
380b39c5158Smillert
381b39c5158Smillert=over 4
382b39c5158Smillert
383b39c5158Smillert=item 1.
384b39c5158Smillert
385b39c5158SmillertDownload and install the Filter::Simple module.
386b39c5158Smillert(If you have Perl 5.7.1 or later, this is already done for you.)
387b39c5158Smillert
388b39c5158Smillert=item 2.
389b39c5158Smillert
390b39c5158SmillertSet up a module that does a C<use Filter::Simple> and then
391b39c5158Smillertcalls C<FILTER { ... }>.
392b39c5158Smillert
393b39c5158Smillert=item 3.
394b39c5158Smillert
395b39c5158SmillertWithin the anonymous subroutine or block that is passed to
396b39c5158SmillertC<FILTER>, process the contents of $_ to change the source code in
397b39c5158Smillertthe desired manner.
398b39c5158Smillert
399b39c5158Smillert=back
400b39c5158Smillert
401b39c5158SmillertIn other words, the previous example, would become:
402b39c5158Smillert
403b39c5158Smillert    package BANG;
404b39c5158Smillert    use Filter::Simple;
405b39c5158Smillert
406b39c5158Smillert    FILTER {
407b39c5158Smillert        s/BANG\s+BANG/die 'BANG' if \$BANG/g;
408b39c5158Smillert    };
409b39c5158Smillert
410b39c5158Smillert    1 ;
411b39c5158Smillert
412b39c5158SmillertNote that the source code is passed as a single string, so any regex that
413b39c5158Smillertuses C<^> or C<$> to detect line boundaries will need the C</m> flag.
414b39c5158Smillert
415b39c5158Smillert=head2 Disabling or changing <no> behaviour
416b39c5158Smillert
417b39c5158SmillertBy default, the installed filter only filters up to a line consisting of one of
418b39c5158Smillertthe three standard source "terminators":
419b39c5158Smillert
420b39c5158Smillert    no ModuleName;  # optional comment
421b39c5158Smillert
422b39c5158Smillertor:
423b39c5158Smillert
424b39c5158Smillert    __END__
425b39c5158Smillert
426b39c5158Smillertor:
427b39c5158Smillert
428b39c5158Smillert    __DATA__
429b39c5158Smillert
430b39c5158Smillertbut this can be altered by passing a second argument to C<use Filter::Simple>
431b39c5158Smillertor C<FILTER> (just remember: there's I<no> comma after the initial block when
432b39c5158Smillertyou use C<FILTER>).
433b39c5158Smillert
434b39c5158SmillertThat second argument may be either a C<qr>'d regular expression (which is then
435b39c5158Smillertused to match the terminator line), or a defined false value (which indicates
436b39c5158Smillertthat no terminator line should be looked for), or a reference to a hash
437b39c5158Smillert(in which case the terminator is the value associated with the key
438b39c5158SmillertC<'terminator'>.
439b39c5158Smillert
440b39c5158SmillertFor example, to cause the previous filter to filter only up to a line of the
441b39c5158Smillertform:
442b39c5158Smillert
443b39c5158Smillert    GNAB esu;
444b39c5158Smillert
445b39c5158Smillertyou would write:
446b39c5158Smillert
447b39c5158Smillert    package BANG;
448b39c5158Smillert    use Filter::Simple;
449b39c5158Smillert
450b39c5158Smillert    FILTER {
451b39c5158Smillert        s/BANG\s+BANG/die 'BANG' if \$BANG/g;
452b39c5158Smillert    }
453b39c5158Smillert    qr/^\s*GNAB\s+esu\s*;\s*?$/;
454b39c5158Smillert
455b39c5158Smillertor:
456b39c5158Smillert
457b39c5158Smillert    FILTER {
458b39c5158Smillert        s/BANG\s+BANG/die 'BANG' if \$BANG/g;
459b39c5158Smillert    }
460b39c5158Smillert    { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ };
461b39c5158Smillert
462b39c5158Smillertand to prevent the filter's being turned off in any way:
463b39c5158Smillert
464b39c5158Smillert    package BANG;
465b39c5158Smillert    use Filter::Simple;
466b39c5158Smillert
467b39c5158Smillert    FILTER {
468b39c5158Smillert        s/BANG\s+BANG/die 'BANG' if \$BANG/g;
469b39c5158Smillert    }
470b39c5158Smillert    "";    # or: 0
471b39c5158Smillert
472b39c5158Smillertor:
473b39c5158Smillert
474b39c5158Smillert    FILTER {
475b39c5158Smillert        s/BANG\s+BANG/die 'BANG' if \$BANG/g;
476b39c5158Smillert    }
477b39c5158Smillert    { terminator => "" };
478b39c5158Smillert
479b39c5158SmillertB<Note that, no matter what you set the terminator pattern to,
480b39c5158Smillertthe actual terminator itself I<must> be contained on a single source line.>
481b39c5158Smillert
482b39c5158Smillert
483b39c5158Smillert=head2 All-in-one interface
484b39c5158Smillert
485b39c5158SmillertSeparating the loading of Filter::Simple:
486b39c5158Smillert
487b39c5158Smillert    use Filter::Simple;
488b39c5158Smillert
489b39c5158Smillertfrom the setting up of the filtering:
490b39c5158Smillert
491b39c5158Smillert    FILTER { ... };
492b39c5158Smillert
493b39c5158Smillertis useful because it allows other code (typically parser support code
494b39c5158Smillertor caching variables) to be defined before the filter is invoked.
495b39c5158SmillertHowever, there is often no need for such a separation.
496b39c5158Smillert
497b39c5158SmillertIn those cases, it is easier to just append the filtering subroutine and
498b39c5158Smillertany terminator specification directly to the C<use> statement that loads
499b39c5158SmillertFilter::Simple, like so:
500b39c5158Smillert
501b39c5158Smillert    use Filter::Simple sub {
502b39c5158Smillert        s/BANG\s+BANG/die 'BANG' if \$BANG/g;
503b39c5158Smillert    };
504b39c5158Smillert
505b39c5158SmillertThis is exactly the same as:
506b39c5158Smillert
507b39c5158Smillert    use Filter::Simple;
508b39c5158Smillert    BEGIN {
509b39c5158Smillert        Filter::Simple::FILTER {
510b39c5158Smillert            s/BANG\s+BANG/die 'BANG' if \$BANG/g;
511b39c5158Smillert        };
512b39c5158Smillert    }
513b39c5158Smillert
514b39c5158Smillertexcept that the C<FILTER> subroutine is not exported by Filter::Simple.
515b39c5158Smillert
516b39c5158Smillert
517b39c5158Smillert=head2 Filtering only specific components of source code
518b39c5158Smillert
519b39c5158SmillertOne of the problems with a filter like:
520b39c5158Smillert
521b39c5158Smillert    use Filter::Simple;
522b39c5158Smillert
523b39c5158Smillert    FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g };
524b39c5158Smillert
525b39c5158Smillertis that it indiscriminately applies the specified transformation to
526b39c5158Smillertthe entire text of your source program. So something like:
527b39c5158Smillert
528b39c5158Smillert    warn 'BANG BANG, YOU'RE DEAD';
529b39c5158Smillert    BANG BANG;
530b39c5158Smillert
531b39c5158Smillertwill become:
532b39c5158Smillert
533b39c5158Smillert    warn 'die 'BANG' if $BANG, YOU'RE DEAD';
534b39c5158Smillert    die 'BANG' if $BANG;
535b39c5158Smillert
536b39c5158SmillertIt is very common when filtering source to only want to apply the filter
537b39c5158Smillertto the non-character-string parts of the code, or alternatively to I<only>
538b39c5158Smillertthe character strings.
539b39c5158Smillert
540b39c5158SmillertFilter::Simple supports this type of filtering by automatically
541b39c5158Smillertexporting the C<FILTER_ONLY> subroutine.
542b39c5158Smillert
543b39c5158SmillertC<FILTER_ONLY> takes a sequence of specifiers that install separate
544b39c5158Smillert(and possibly multiple) filters that act on only parts of the source code.
545b39c5158SmillertFor example:
546b39c5158Smillert
547b39c5158Smillert    use Filter::Simple;
548b39c5158Smillert
549b39c5158Smillert    FILTER_ONLY
550b39c5158Smillert        code      => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g },
551b39c5158Smillert        quotelike => sub { s/BANG\s+BANG/CHITTY CHITTY/g };
552b39c5158Smillert
553b39c5158SmillertThe C<"code"> subroutine will only be used to filter parts of the source
554b39c5158Smillertcode that are not quotelikes, POD, or C<__DATA__>. The C<quotelike>
555b39c5158Smillertsubroutine only filters Perl quotelikes (including here documents).
556b39c5158Smillert
557b39c5158SmillertThe full list of alternatives is:
558b39c5158Smillert
559b39c5158Smillert=over
560b39c5158Smillert
561b39c5158Smillert=item C<"code">
562b39c5158Smillert
563b39c5158SmillertFilters only those sections of the source code that are not quotelikes, POD, or
564b39c5158SmillertC<__DATA__>.
565b39c5158Smillert
566b39c5158Smillert=item C<"code_no_comments">
567b39c5158Smillert
568b39c5158SmillertFilters only those sections of the source code that are not quotelikes, POD,
569b39c5158Smillertcomments, or C<__DATA__>.
570b39c5158Smillert
571b39c5158Smillert=item C<"executable">
572b39c5158Smillert
573b39c5158SmillertFilters only those sections of the source code that are not POD or C<__DATA__>.
574b39c5158Smillert
575b39c5158Smillert=item C<"executable_no_comments">
576b39c5158Smillert
577b39c5158SmillertFilters only those sections of the source code that are not POD, comments, or C<__DATA__>.
578b39c5158Smillert
579b39c5158Smillert=item C<"quotelike">
580b39c5158Smillert
581b39c5158SmillertFilters only Perl quotelikes (as interpreted by
582b39c5158SmillertC<&Text::Balanced::extract_quotelike>).
583b39c5158Smillert
584b39c5158Smillert=item C<"string">
585b39c5158Smillert
586b39c5158SmillertFilters only the string literal parts of a Perl quotelike (i.e. the
587b39c5158Smillertcontents of a string literal, either half of a C<tr///>, the second
588b39c5158Smillerthalf of an C<s///>).
589b39c5158Smillert
590b39c5158Smillert=item C<"regex">
591b39c5158Smillert
592b39c5158SmillertFilters only the pattern literal parts of a Perl quotelike (i.e. the
593b39c5158Smillertcontents of a C<qr//> or an C<m//>, the first half of an C<s///>).
594b39c5158Smillert
595b39c5158Smillert=item C<"all">
596b39c5158Smillert
597b39c5158SmillertFilters everything. Identical in effect to C<FILTER>.
598b39c5158Smillert
599b39c5158Smillert=back
600b39c5158Smillert
601b39c5158SmillertExcept for C<< FILTER_ONLY code => sub {...} >>, each of
602b39c5158Smillertthe component filters is called repeatedly, once for each component
603b39c5158Smillertfound in the source code.
604b39c5158Smillert
605b39c5158SmillertNote that you can also apply two or more of the same type of filter in
606b39c5158Smillerta single C<FILTER_ONLY>. For example, here's a simple
607b39c5158Smillertmacro-preprocessor that is only applied within regexes,
608b39c5158Smillertwith a final debugging pass that prints the resulting source code:
609b39c5158Smillert
610b39c5158Smillert    use Regexp::Common;
611b39c5158Smillert    FILTER_ONLY
612b39c5158Smillert        regex => sub { s/!\[/[^/g },
613b39c5158Smillert        regex => sub { s/%d/$RE{num}{int}/g },
614b39c5158Smillert        regex => sub { s/%f/$RE{num}{real}/g },
615b39c5158Smillert        all   => sub { print if $::DEBUG };
616b39c5158Smillert
617b39c5158Smillert
618b39c5158Smillert
619b39c5158Smillert=head2 Filtering only the code parts of source code
620b39c5158Smillert
621b39c5158SmillertMost source code ceases to be grammatically correct when it is broken up
622b39c5158Smillertinto the pieces between string literals and regexes. So the C<'code'>
623b39c5158Smillertand C<'code_no_comments'> component filter behave slightly differently
624b39c5158Smillertfrom the other partial filters described in the previous section.
625b39c5158Smillert
626b39c5158SmillertRather than calling the specified processor on each individual piece of
627b39c5158Smillertcode (i.e. on the bits between quotelikes), the C<'code...'> partial
628b39c5158Smillertfilters operate on the entire source code, but with the quotelike bits
629b39c5158Smillert(and, in the case of C<'code_no_comments'>, the comments) "blanked out".
630b39c5158Smillert
631b39c5158SmillertThat is, a C<'code...'> filter I<replaces> each quoted string, quotelike,
632b39c5158Smillertregex, POD, and __DATA__ section with a placeholder. The
633b39c5158Smillertdelimiters of this placeholder are the contents of the C<$;> variable
634b39c5158Smillertat the time the filter is applied (normally C<"\034">). The remaining
635b39c5158Smillertfour bytes are a unique identifier for the component being replaced.
636b39c5158Smillert
637b39c5158SmillertThis approach makes it comparatively easy to write code preprocessors
638b39c5158Smillertwithout worrying about the form or contents of strings, regexes, etc.
639b39c5158Smillert
640b39c5158SmillertFor convenience, during a C<'code...'> filtering operation, Filter::Simple
641b39c5158Smillertprovides a package variable (C<$Filter::Simple::placeholder>) that
642b39c5158Smillertcontains a pre-compiled regex that matches any placeholder...and
643b39c5158Smillertcaptures the identifier within the placeholder. Placeholders can be
644b39c5158Smillertmoved and re-ordered within the source code as needed.
645b39c5158Smillert
646b39c5158SmillertIn addition, a second package variable (C<@Filter::Simple::components>)
647b39c5158Smillertcontains a list of the various pieces of C<$_>, as they were originally split
648b39c5158Smillertup to allow placeholders to be inserted.
649b39c5158Smillert
650b39c5158SmillertOnce the filtering has been applied, the original strings, regexes, POD,
651b39c5158Smillertetc. are re-inserted into the code, by replacing each placeholder with
652b39c5158Smillertthe corresponding original component (from C<@components>). Note that
653b39c5158Smillertthis means that the C<@components> variable must be treated with extreme
654b39c5158Smillertcare within the filter. The C<@components> array stores the "back-
655b39c5158Smillerttranslations" of each placeholder inserted into C<$_>, as well as the
656b39c5158Smillertinterstitial source code between placeholders. If the placeholder
657b39c5158Smillertbacktranslations are altered in C<@components>, they will be similarly
658b39c5158Smillertchanged when the placeholders are removed from C<$_> after the filter
659b39c5158Smillertis complete.
660b39c5158Smillert
661b39c5158SmillertFor example, the following filter detects concatenated pairs of
662b39c5158Smillertstrings/quotelikes and reverses the order in which they are
663b39c5158Smillertconcatenated:
664b39c5158Smillert
665b39c5158Smillert    package DemoRevCat;
666b39c5158Smillert    use Filter::Simple;
667b39c5158Smillert
668b39c5158Smillert    FILTER_ONLY code => sub {
669b39c5158Smillert        my $ph = $Filter::Simple::placeholder;
670b39c5158Smillert        s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx
671b39c5158Smillert    };
672b39c5158Smillert
673b39c5158SmillertThus, the following code:
674b39c5158Smillert
675b39c5158Smillert    use DemoRevCat;
676b39c5158Smillert
677b39c5158Smillert    my $str = "abc" . q(def);
678b39c5158Smillert
679b39c5158Smillert    print "$str\n";
680b39c5158Smillert
681b39c5158Smillertwould become:
682b39c5158Smillert
683b39c5158Smillert    my $str = q(def)."abc";
684b39c5158Smillert
685b39c5158Smillert    print "$str\n";
686b39c5158Smillert
687b39c5158Smillertand hence print:
688b39c5158Smillert
689b39c5158Smillert    defabc
690b39c5158Smillert
691b39c5158Smillert
692b39c5158Smillert=head2 Using Filter::Simple with an explicit C<import> subroutine
693b39c5158Smillert
694b39c5158SmillertFilter::Simple generates a special C<import> subroutine for
695b39c5158Smillertyour module (see L<"How it works">) which would normally replace any
696b39c5158SmillertC<import> subroutine you might have explicitly declared.
697b39c5158Smillert
698b39c5158SmillertHowever, Filter::Simple is smart enough to notice your existing
699b39c5158SmillertC<import> and Do The Right Thing with it.
700b39c5158SmillertThat is, if you explicitly define an C<import> subroutine in a package
701b39c5158Smillertthat's using Filter::Simple, that C<import> subroutine will still
702b39c5158Smillertbe invoked immediately after any filter you install.
703b39c5158Smillert
704b39c5158SmillertThe only thing you have to remember is that the C<import> subroutine
705b39c5158SmillertI<must> be declared I<before> the filter is installed. If you use C<FILTER>
706b39c5158Smillertto install the filter:
707b39c5158Smillert
708b39c5158Smillert    package Filter::TurnItUpTo11;
709b39c5158Smillert
710b39c5158Smillert    use Filter::Simple;
711b39c5158Smillert
712b39c5158Smillert    FILTER { s/(\w+)/\U$1/ };
713b39c5158Smillert
714b39c5158Smillertthat will almost never be a problem, but if you install a filtering
715b39c5158Smillertsubroutine by passing it directly to the C<use Filter::Simple>
716b39c5158Smillertstatement:
717b39c5158Smillert
718b39c5158Smillert    package Filter::TurnItUpTo11;
719b39c5158Smillert
720b39c5158Smillert    use Filter::Simple sub{ s/(\w+)/\U$1/ };
721b39c5158Smillert
722b39c5158Smillertthen you must make sure that your C<import> subroutine appears before
723b39c5158Smillertthat C<use> statement.
724b39c5158Smillert
725b39c5158Smillert
726b39c5158Smillert=head2 Using Filter::Simple and Exporter together
727b39c5158Smillert
728b39c5158SmillertLikewise, Filter::Simple is also smart enough
729b39c5158Smillertto Do The Right Thing if you use Exporter:
730b39c5158Smillert
731b39c5158Smillert    package Switch;
732b39c5158Smillert    use base Exporter;
733b39c5158Smillert    use Filter::Simple;
734b39c5158Smillert
735b39c5158Smillert    @EXPORT    = qw(switch case);
736b39c5158Smillert    @EXPORT_OK = qw(given  when);
737b39c5158Smillert
738b39c5158Smillert    FILTER { $_ = magic_Perl_filter($_) }
739b39c5158Smillert
740b39c5158SmillertImmediately after the filter has been applied to the source,
741b39c5158SmillertFilter::Simple will pass control to Exporter, so it can do its magic too.
742b39c5158Smillert
743b39c5158SmillertOf course, here too, Filter::Simple has to know you're using Exporter
744b39c5158Smillertbefore it applies the filter. That's almost never a problem, but if you're
745b39c5158Smillertnervous about it, you can guarantee that things will work correctly by
746b39c5158Smillertensuring that your C<use base Exporter> always precedes your
747b39c5158SmillertC<use Filter::Simple>.
748b39c5158Smillert
749b39c5158Smillert
750b39c5158Smillert=head2 How it works
751b39c5158Smillert
752b39c5158SmillertThe Filter::Simple module exports into the package that calls C<FILTER>
753b39c5158Smillert(or C<use>s it directly) -- such as package "BANG" in the above example --
754b39c5158Smillerttwo automagically constructed
755b39c5158Smillertsubroutines -- C<import> and C<unimport> -- which take care of all the
756b39c5158Smillertnasty details.
757b39c5158Smillert
758b39c5158SmillertIn addition, the generated C<import> subroutine passes its own argument
759b39c5158Smillertlist to the filtering subroutine, so the BANG.pm filter could easily
760b39c5158Smillertbe made parametric:
761b39c5158Smillert
762b39c5158Smillert    package BANG;
763b39c5158Smillert
764b39c5158Smillert    use Filter::Simple;
765b39c5158Smillert
766b39c5158Smillert    FILTER {
767b39c5158Smillert        my ($die_msg, $var_name) = @_;
768b39c5158Smillert        s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g;
769b39c5158Smillert    };
770b39c5158Smillert
771b39c5158Smillert    # and in some user code:
772b39c5158Smillert
773b39c5158Smillert    use BANG "BOOM", "BAM";  # "BANG BANG" becomes: die 'BOOM' if $BAM
774b39c5158Smillert
775b39c5158Smillert
776b39c5158SmillertThe specified filtering subroutine is called every time a C<use BANG> is
777b39c5158Smillertencountered, and passed all the source code following that call, up to
778b39c5158Smillerteither the next C<no BANG;> (or whatever terminator you've set) or the
779b39c5158Smillertend of the source file, whichever occurs first. By default, any C<no
780b39c5158SmillertBANG;> call must appear by itself on a separate line, or it is ignored.
781b39c5158Smillert
782b39c5158Smillert
783b39c5158Smillert=head1 AUTHOR
784b39c5158Smillert
785b39c5158SmillertDamian Conway
786b39c5158Smillert
787b39c5158Smillert=head1 CONTACT
788b39c5158Smillert
789b39c5158SmillertFilter::Simple is now maintained by the Perl5-Porters.
790b39c5158SmillertPlease submit bug via the C<perlbug> tool that comes with your perl.
791b39c5158SmillertFor usage instructions, read C<perldoc perlbug> or possibly C<man perlbug>.
792b39c5158SmillertFor mostly anything else, please contact E<lt>perl5-porters@perl.orgE<gt>.
793b39c5158Smillert
794b39c5158SmillertMaintainer of the CPAN release is Steffen Mueller E<lt>smueller@cpan.orgE<gt>.
795b39c5158SmillertContact him with technical difficulties with respect to the packaging of the
796b39c5158SmillertCPAN module.
797b39c5158Smillert
798b39c5158SmillertPraise of the module, flowers, and presents still go to the author,
799b39c5158SmillertDamian Conway E<lt>damian@conway.orgE<gt>.
800b39c5158Smillert
801b39c5158Smillert=head1 COPYRIGHT AND LICENSE
802b39c5158Smillert
8036fb12b70Safresh1    Copyright (c) 2000-2014, Damian Conway. All Rights Reserved.
804b39c5158Smillert    This module is free software. It may be used, redistributed
805b39c5158Smillert    and/or modified under the same terms as Perl itself.
806