1b39c5158Smillertpackage Filter::Simple; 2b39c5158Smillert 3b39c5158Smillertuse Text::Balanced ':ALL'; 4b39c5158Smillert 5*56d68f1eSafresh1our $VERSION = '0.96'; 6b39c5158Smillert 7b39c5158Smillertuse Filter::Util::Call; 8b39c5158Smillertuse Carp; 9b39c5158Smillert 109f11ffb7Safresh1our @EXPORT = qw( FILTER FILTER_ONLY ); 11b39c5158Smillert 12b39c5158Smillert 13b39c5158Smillertsub import { 14b39c5158Smillert if (@_>1) { shift; goto &FILTER } 15b39c5158Smillert else { *{caller()."::$_"} = \&$_ foreach @EXPORT } 16b39c5158Smillert} 17b39c5158Smillert 18b39c5158Smillertsub fail { 19b39c5158Smillert croak "FILTER_ONLY: ", @_; 20b39c5158Smillert} 21b39c5158Smillert 22b39c5158Smillertmy $exql = sub { 23b39c5158Smillert my @bits = extract_quotelike $_[0], qr//; 24b39c5158Smillert return unless $bits[0]; 25b39c5158Smillert return \@bits; 26b39c5158Smillert}; 27b39c5158Smillert 28b39c5158Smillertmy $ncws = qr/\s+/; 29b39c5158Smillertmy $comment = qr/(?<![\$\@%])#.*/; 30b39c5158Smillertmy $ws = qr/(?:$ncws|$comment)+/; 31b39c5158Smillertmy $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/; 32b39c5158Smillertmy $EOP = qr/\n\n|\Z/; 33b39c5158Smillertmy $CUT = qr/\n=cut.*$EOP/; 34b39c5158Smillertmy $pod_or_DATA = qr/ 35b39c5158Smillert ^=(?:head[1-4]|item) .*? $CUT 36b39c5158Smillert | ^=pod .*? $CUT 37898184e3Ssthen | ^=for .*? $CUT 38898184e3Ssthen | ^=begin .*? $CUT 39b39c5158Smillert | ^__(DATA|END)__\r?\n.* 40b39c5158Smillert /smx; 41898184e3Ssthenmy $variable = qr{ 42898184e3Ssthen [\$*\@%]\s* 43898184e3Ssthen \{\s*(?!::)(?:\d+|[][&`'#+*./|,";%=~:?!\@<>()-]|\^[A-Z]?)\} 44898184e3Ssthen | (?:\$#?|[*\@\%]|\\&)\$*\s* 45898184e3Ssthen (?: \{\s*(?:\^(?=[A-Z_]))?(?:\w|::|'\w)*\s*\} 46898184e3Ssthen | (?:\^(?=[A-Z_]))?(?:\w|::|'\w)* 47898184e3Ssthen | (?=\{) # ${ block } 48898184e3Ssthen ) 49898184e3Ssthen ) 50898184e3Ssthen | \$\s*(?!::)(?:\d+|[][&`'#+*./|,";%=~:?!\@<>()-]|\^[A-Z]?) 51898184e3Ssthen }x; 52b39c5158Smillert 53b39c5158Smillertmy %extractor_for = ( 54898184e3Ssthen quotelike => [ $ws, $variable, $id, { MATCH => \&extract_quotelike } ], 55b39c5158Smillert regex => [ $ws, $pod_or_DATA, $id, $exql ], 56b39c5158Smillert string => [ $ws, $pod_or_DATA, $id, $exql ], 57898184e3Ssthen code => [ $ws, { DONT_MATCH => $pod_or_DATA }, $variable, 58b39c5158Smillert $id, { DONT_MATCH => \&extract_quotelike } ], 59b39c5158Smillert code_no_comments 60b39c5158Smillert => [ { DONT_MATCH => $comment }, 61898184e3Ssthen $ncws, { DONT_MATCH => $pod_or_DATA }, $variable, 62b39c5158Smillert $id, { DONT_MATCH => \&extract_quotelike } ], 63b39c5158Smillert executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ], 64b39c5158Smillert executable_no_comments 65b39c5158Smillert => [ { DONT_MATCH => $comment }, 66b39c5158Smillert $ncws, { DONT_MATCH => $pod_or_DATA } ], 67b39c5158Smillert all => [ { MATCH => qr/(?s:.*)/ } ], 68b39c5158Smillert); 69b39c5158Smillert 70b39c5158Smillertmy %selector_for = ( 71b39c5158Smillert all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} }, 72b39c5158Smillert executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} }, 73*56d68f1eSafresh1 executable_no_comments=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} }, 74b39c5158Smillert quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} }, 75b39c5158Smillert regex => sub { my ($t)=@_; 76b39c5158Smillert sub{ref() or return $_; 77b39c5158Smillert my ($ql,undef,$pre,$op,$ld,$pat) = @$_; 78b39c5158Smillert return $_->[0] unless $op =~ /^(qr|m|s)/ 79b39c5158Smillert || !$op && ($ld eq '/' || $ld eq '?'); 80b39c5158Smillert $_ = $pat; 81b39c5158Smillert $t->(@_); 82b39c5158Smillert $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/; 83b39c5158Smillert return "$pre$ql"; 84b39c5158Smillert }; 85b39c5158Smillert }, 86b39c5158Smillert string => sub { my ($t)=@_; 87b39c5158Smillert sub{ref() or return $_; 88b39c5158Smillert local *args = \@_; 89b39c5158Smillert my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10]; 90b39c5158Smillert return $_->[0] if $op =~ /^(qr|m)/ 91b39c5158Smillert || !$op && ($ld1 eq '/' || $ld1 eq '?'); 92b39c5158Smillert if (!$op || $op eq 'tr' || $op eq 'y') { 93b39c5158Smillert local *_ = \$str1; 94b39c5158Smillert $t->(@args); 95b39c5158Smillert } 96b39c5158Smillert if ($op =~ /^(tr|y|s)/) { 97b39c5158Smillert local *_ = \$str2; 98b39c5158Smillert $t->(@args); 99b39c5158Smillert } 100b39c5158Smillert my $result = "$pre$op$ld1$str1$rd1"; 101b39c5158Smillert $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}> 102b39c5158Smillert $result .= "$str2$rd2$flg"; 103b39c5158Smillert return $result; 104b39c5158Smillert }; 105b39c5158Smillert }, 106b39c5158Smillert); 107b39c5158Smillert 108b39c5158Smillert 109b39c5158Smillertsub gen_std_filter_for { 110b39c5158Smillert my ($type, $transform) = @_; 111b39c5158Smillert return sub { 112b39c5158Smillert my $instr; 113b39c5158Smillert local @components; 114b39c5158Smillert for (extract_multiple($_,$extractor_for{$type})) { 115b39c5158Smillert if (ref()) { push @components, $_; $instr=0 } 116b39c5158Smillert elsif ($instr) { $components[-1] .= $_ } 117b39c5158Smillert else { push @components, $_; $instr=1 } 118b39c5158Smillert } 119b39c5158Smillert if ($type =~ /^code/) { 120b39c5158Smillert my $count = 0; 121b8851fccSafresh1 local $placeholder = qr/\Q$;\E(.{4})\Q$;\E/s; 122b8851fccSafresh1 my $extractor = qr/\Q$;\E(.{4})\Q$;\E/s; 123b39c5158Smillert $_ = join "", 124b39c5158Smillert map { ref $_ ? $;.pack('N',$count++).$; : $_ } 125b39c5158Smillert @components; 126b39c5158Smillert @components = grep { ref $_ } @components; 127b39c5158Smillert $transform->(@_); 128b39c5158Smillert s/$extractor/${$components[unpack('N',$1)]}/g; 129b39c5158Smillert } 130b39c5158Smillert else { 131b39c5158Smillert my $selector = $selector_for{$type}->($transform); 132b39c5158Smillert $_ = join "", map $selector->(@_), @components; 133b39c5158Smillert } 134b39c5158Smillert } 135b39c5158Smillert}; 136b39c5158Smillert 137b39c5158Smillertsub FILTER (&;$) { 138b39c5158Smillert my $caller = caller; 139b39c5158Smillert my ($filter, $terminator) = @_; 140b39c5158Smillert no warnings 'redefine'; 141b39c5158Smillert *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator); 142b39c5158Smillert *{"${caller}::unimport"} = gen_filter_unimport($caller); 143b39c5158Smillert} 144b39c5158Smillert 145b39c5158Smillertsub FILTER_ONLY { 146b39c5158Smillert my $caller = caller; 147b39c5158Smillert while (@_ > 1) { 148b39c5158Smillert my ($what, $how) = splice(@_, 0, 2); 149b39c5158Smillert fail "Unknown selector: $what" 150b39c5158Smillert unless exists $extractor_for{$what}; 151b39c5158Smillert fail "Filter for $what is not a subroutine reference" 152b39c5158Smillert unless ref $how eq 'CODE'; 153b39c5158Smillert push @transforms, gen_std_filter_for($what,$how); 154b39c5158Smillert } 155b39c5158Smillert my $terminator = shift; 156b39c5158Smillert 157b39c5158Smillert my $multitransform = sub { 158b39c5158Smillert foreach my $transform ( @transforms ) { 159b39c5158Smillert $transform->(@_); 160b39c5158Smillert } 161b39c5158Smillert }; 162b39c5158Smillert no warnings 'redefine'; 163b39c5158Smillert *{"${caller}::import"} = 164b39c5158Smillert gen_filter_import($caller,$multitransform,$terminator); 165b39c5158Smillert *{"${caller}::unimport"} = gen_filter_unimport($caller); 166b39c5158Smillert} 167b39c5158Smillert 168b39c5158Smillertmy $ows = qr/(?:[ \t]+|#[^\n]*)*/; 169b39c5158Smillert 170b39c5158Smillertsub gen_filter_import { 171b39c5158Smillert my ($class, $filter, $terminator) = @_; 172b39c5158Smillert my %terminator; 173b39c5158Smillert my $prev_import = *{$class."::import"}{CODE}; 174b39c5158Smillert return sub { 175b39c5158Smillert my ($imported_class, @args) = @_; 176b39c5158Smillert my $def_terminator = 177b39c5158Smillert qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/; 178b39c5158Smillert if (!defined $terminator) { 179b39c5158Smillert $terminator{terminator} = $def_terminator; 180b39c5158Smillert } 181b39c5158Smillert elsif (!ref $terminator || ref $terminator eq 'Regexp') { 182b39c5158Smillert $terminator{terminator} = $terminator; 183b39c5158Smillert } 184b39c5158Smillert elsif (ref $terminator ne 'HASH') { 185b39c5158Smillert croak "Terminator must be specified as scalar or hash ref" 186b39c5158Smillert } 187b39c5158Smillert elsif (!exists $terminator->{terminator}) { 188b39c5158Smillert $terminator{terminator} = $def_terminator; 189b39c5158Smillert } 190b39c5158Smillert filter_add( 191b39c5158Smillert sub { 192b39c5158Smillert my ($status, $lastline); 193b39c5158Smillert my $count = 0; 194b39c5158Smillert my $data = ""; 195b39c5158Smillert while ($status = filter_read()) { 196b39c5158Smillert return $status if $status < 0; 197b39c5158Smillert if ($terminator{terminator} && 198b39c5158Smillert m/$terminator{terminator}/) { 199b39c5158Smillert $lastline = $_; 2009f11ffb7Safresh1 $count++; 201b39c5158Smillert last; 202b39c5158Smillert } 203b39c5158Smillert $data .= $_; 204b39c5158Smillert $count++; 205b39c5158Smillert $_ = ""; 206b39c5158Smillert } 207b39c5158Smillert return $count if not $count; 208b39c5158Smillert $_ = $data; 209b39c5158Smillert $filter->($imported_class, @args) unless $status < 0; 210b39c5158Smillert if (defined $lastline) { 211b39c5158Smillert if (defined $terminator{becomes}) { 212b39c5158Smillert $_ .= $terminator{becomes}; 213b39c5158Smillert } 214b39c5158Smillert elsif ($lastline =~ $def_terminator) { 215b39c5158Smillert $_ .= $lastline; 216b39c5158Smillert } 217b39c5158Smillert } 218b39c5158Smillert return $count; 219b39c5158Smillert } 220b39c5158Smillert ); 221b39c5158Smillert if ($prev_import) { 222b39c5158Smillert goto &$prev_import; 223b39c5158Smillert } 224b39c5158Smillert elsif ($class->isa('Exporter')) { 225b39c5158Smillert $class->export_to_level(1,@_); 226b39c5158Smillert } 227b39c5158Smillert } 228b39c5158Smillert} 229b39c5158Smillert 230b39c5158Smillertsub gen_filter_unimport { 231b39c5158Smillert my ($class) = @_; 232b39c5158Smillert return sub { 233b39c5158Smillert filter_del(); 234b39c5158Smillert goto &$prev_unimport if $prev_unimport; 235b39c5158Smillert } 236b39c5158Smillert} 237b39c5158Smillert 238b39c5158Smillert1; 239b39c5158Smillert 240b39c5158Smillert__END__ 241b39c5158Smillert 242b39c5158Smillert=head1 NAME 243b39c5158Smillert 244b39c5158SmillertFilter::Simple - Simplified source filtering 245b39c5158Smillert 246b39c5158Smillert=head1 SYNOPSIS 247b39c5158Smillert 248b39c5158Smillert # in MyFilter.pm: 249b39c5158Smillert 250b39c5158Smillert package MyFilter; 251b39c5158Smillert 252b39c5158Smillert use Filter::Simple; 253b39c5158Smillert 254b39c5158Smillert FILTER { ... }; 255b39c5158Smillert 256b39c5158Smillert # or just: 257b39c5158Smillert # 258b39c5158Smillert # use Filter::Simple sub { ... }; 259b39c5158Smillert 260b39c5158Smillert # in user's code: 261b39c5158Smillert 262b39c5158Smillert use MyFilter; 263b39c5158Smillert 264b39c5158Smillert # this code is filtered 265b39c5158Smillert 266b39c5158Smillert no MyFilter; 267b39c5158Smillert 268b39c5158Smillert # this code is not 269b39c5158Smillert 270b39c5158Smillert 271b39c5158Smillert=head1 DESCRIPTION 272b39c5158Smillert 273b39c5158Smillert=head2 The Problem 274b39c5158Smillert 275b39c5158SmillertSource filtering is an immensely powerful feature of recent versions of Perl. 276b39c5158SmillertIt allows one to extend the language itself (e.g. the Switch module), to 277b39c5158Smillertsimplify the language (e.g. Language::Pythonesque), or to completely recast the 278b39c5158Smillertlanguage (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use 279b39c5158Smillertthe full power of Perl as its own, recursively applied, macro language. 280b39c5158Smillert 281b39c5158SmillertThe excellent Filter::Util::Call module (by Paul Marquess) provides a 282b39c5158Smillertusable Perl interface to source filtering, but it is often too powerful 283b39c5158Smillertand not nearly as simple as it could be. 284b39c5158Smillert 285b39c5158SmillertTo use the module it is necessary to do the following: 286b39c5158Smillert 287b39c5158Smillert=over 4 288b39c5158Smillert 289b39c5158Smillert=item 1. 290b39c5158Smillert 291b39c5158SmillertDownload, build, and install the Filter::Util::Call module. 292b39c5158Smillert(If you have Perl 5.7.1 or later, this is already done for you.) 293b39c5158Smillert 294b39c5158Smillert=item 2. 295b39c5158Smillert 296b39c5158SmillertSet up a module that does a C<use Filter::Util::Call>. 297b39c5158Smillert 298b39c5158Smillert=item 3. 299b39c5158Smillert 300b39c5158SmillertWithin that module, create an C<import> subroutine. 301b39c5158Smillert 302b39c5158Smillert=item 4. 303b39c5158Smillert 304b39c5158SmillertWithin the C<import> subroutine do a call to C<filter_add>, passing 305b39c5158Smillertit either a subroutine reference. 306b39c5158Smillert 307b39c5158Smillert=item 5. 308b39c5158Smillert 309b39c5158SmillertWithin the subroutine reference, call C<filter_read> or C<filter_read_exact> 310b39c5158Smillertto "prime" $_ with source code data from the source file that will 311b39c5158SmillertC<use> your module. Check the status value returned to see if any 312b39c5158Smillertsource code was actually read in. 313b39c5158Smillert 314b39c5158Smillert=item 6. 315b39c5158Smillert 316b39c5158SmillertProcess the contents of $_ to change the source code in the desired manner. 317b39c5158Smillert 318b39c5158Smillert=item 7. 319b39c5158Smillert 320b39c5158SmillertReturn the status value. 321b39c5158Smillert 322b39c5158Smillert=item 8. 323b39c5158Smillert 324b39c5158SmillertIf the act of unimporting your module (via a C<no>) should cause source 325b39c5158Smillertcode filtering to cease, create an C<unimport> subroutine, and have it call 326b39c5158SmillertC<filter_del>. Make sure that the call to C<filter_read> or 327b39c5158SmillertC<filter_read_exact> in step 5 will not accidentally read past the 328b39c5158SmillertC<no>. Effectively this limits source code filters to line-by-line 329b39c5158Smillertoperation, unless the C<import> subroutine does some fancy 330b39c5158Smillertpre-pre-parsing of the source code it's filtering. 331b39c5158Smillert 332b39c5158Smillert=back 333b39c5158Smillert 334b39c5158SmillertFor example, here is a minimal source code filter in a module named 335b39c5158SmillertBANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG> 336b39c5158Smillertto the sequence C<die 'BANG' if $BANG> in any piece of code following a 337b39c5158SmillertC<use BANG;> statement (until the next C<no BANG;> statement, if any): 338b39c5158Smillert 339b39c5158Smillert package BANG; 340b39c5158Smillert 341b39c5158Smillert use Filter::Util::Call ; 342b39c5158Smillert 343b39c5158Smillert sub import { 344b39c5158Smillert filter_add( sub { 345b39c5158Smillert my $caller = caller; 346b39c5158Smillert my ($status, $no_seen, $data); 347b39c5158Smillert while ($status = filter_read()) { 348b39c5158Smillert if (/^\s*no\s+$caller\s*;\s*?$/) { 349b39c5158Smillert $no_seen=1; 350b39c5158Smillert last; 351b39c5158Smillert } 352b39c5158Smillert $data .= $_; 353b39c5158Smillert $_ = ""; 354b39c5158Smillert } 355b39c5158Smillert $_ = $data; 356b39c5158Smillert s/BANG\s+BANG/die 'BANG' if \$BANG/g 357b39c5158Smillert unless $status < 0; 358b39c5158Smillert $_ .= "no $class;\n" if $no_seen; 359b39c5158Smillert return 1; 360b39c5158Smillert }) 361b39c5158Smillert } 362b39c5158Smillert 363b39c5158Smillert sub unimport { 364b39c5158Smillert filter_del(); 365b39c5158Smillert } 366b39c5158Smillert 367b39c5158Smillert 1 ; 368b39c5158Smillert 369b39c5158SmillertThis level of sophistication puts filtering out of the reach of 370b39c5158Smillertmany programmers. 371b39c5158Smillert 372b39c5158Smillert 373b39c5158Smillert=head2 A Solution 374b39c5158Smillert 375b39c5158SmillertThe Filter::Simple module provides a simplified interface to 376b39c5158SmillertFilter::Util::Call; one that is sufficient for most common cases. 377b39c5158Smillert 378b39c5158SmillertInstead of the above process, with Filter::Simple the task of setting up 379b39c5158Smillerta source code filter is reduced to: 380b39c5158Smillert 381b39c5158Smillert=over 4 382b39c5158Smillert 383b39c5158Smillert=item 1. 384b39c5158Smillert 385b39c5158SmillertDownload and install the Filter::Simple module. 386b39c5158Smillert(If you have Perl 5.7.1 or later, this is already done for you.) 387b39c5158Smillert 388b39c5158Smillert=item 2. 389b39c5158Smillert 390b39c5158SmillertSet up a module that does a C<use Filter::Simple> and then 391b39c5158Smillertcalls C<FILTER { ... }>. 392b39c5158Smillert 393b39c5158Smillert=item 3. 394b39c5158Smillert 395b39c5158SmillertWithin the anonymous subroutine or block that is passed to 396b39c5158SmillertC<FILTER>, process the contents of $_ to change the source code in 397b39c5158Smillertthe desired manner. 398b39c5158Smillert 399b39c5158Smillert=back 400b39c5158Smillert 401b39c5158SmillertIn other words, the previous example, would become: 402b39c5158Smillert 403b39c5158Smillert package BANG; 404b39c5158Smillert use Filter::Simple; 405b39c5158Smillert 406b39c5158Smillert FILTER { 407b39c5158Smillert s/BANG\s+BANG/die 'BANG' if \$BANG/g; 408b39c5158Smillert }; 409b39c5158Smillert 410b39c5158Smillert 1 ; 411b39c5158Smillert 412b39c5158SmillertNote that the source code is passed as a single string, so any regex that 413b39c5158Smillertuses C<^> or C<$> to detect line boundaries will need the C</m> flag. 414b39c5158Smillert 415b39c5158Smillert=head2 Disabling or changing <no> behaviour 416b39c5158Smillert 417b39c5158SmillertBy default, the installed filter only filters up to a line consisting of one of 418b39c5158Smillertthe three standard source "terminators": 419b39c5158Smillert 420b39c5158Smillert no ModuleName; # optional comment 421b39c5158Smillert 422b39c5158Smillertor: 423b39c5158Smillert 424b39c5158Smillert __END__ 425b39c5158Smillert 426b39c5158Smillertor: 427b39c5158Smillert 428b39c5158Smillert __DATA__ 429b39c5158Smillert 430b39c5158Smillertbut this can be altered by passing a second argument to C<use Filter::Simple> 431b39c5158Smillertor C<FILTER> (just remember: there's I<no> comma after the initial block when 432b39c5158Smillertyou use C<FILTER>). 433b39c5158Smillert 434b39c5158SmillertThat second argument may be either a C<qr>'d regular expression (which is then 435b39c5158Smillertused to match the terminator line), or a defined false value (which indicates 436b39c5158Smillertthat no terminator line should be looked for), or a reference to a hash 437b39c5158Smillert(in which case the terminator is the value associated with the key 438b39c5158SmillertC<'terminator'>. 439b39c5158Smillert 440b39c5158SmillertFor example, to cause the previous filter to filter only up to a line of the 441b39c5158Smillertform: 442b39c5158Smillert 443b39c5158Smillert GNAB esu; 444b39c5158Smillert 445b39c5158Smillertyou would write: 446b39c5158Smillert 447b39c5158Smillert package BANG; 448b39c5158Smillert use Filter::Simple; 449b39c5158Smillert 450b39c5158Smillert FILTER { 451b39c5158Smillert s/BANG\s+BANG/die 'BANG' if \$BANG/g; 452b39c5158Smillert } 453b39c5158Smillert qr/^\s*GNAB\s+esu\s*;\s*?$/; 454b39c5158Smillert 455b39c5158Smillertor: 456b39c5158Smillert 457b39c5158Smillert FILTER { 458b39c5158Smillert s/BANG\s+BANG/die 'BANG' if \$BANG/g; 459b39c5158Smillert } 460b39c5158Smillert { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ }; 461b39c5158Smillert 462b39c5158Smillertand to prevent the filter's being turned off in any way: 463b39c5158Smillert 464b39c5158Smillert package BANG; 465b39c5158Smillert use Filter::Simple; 466b39c5158Smillert 467b39c5158Smillert FILTER { 468b39c5158Smillert s/BANG\s+BANG/die 'BANG' if \$BANG/g; 469b39c5158Smillert } 470b39c5158Smillert ""; # or: 0 471b39c5158Smillert 472b39c5158Smillertor: 473b39c5158Smillert 474b39c5158Smillert FILTER { 475b39c5158Smillert s/BANG\s+BANG/die 'BANG' if \$BANG/g; 476b39c5158Smillert } 477b39c5158Smillert { terminator => "" }; 478b39c5158Smillert 479b39c5158SmillertB<Note that, no matter what you set the terminator pattern to, 480b39c5158Smillertthe actual terminator itself I<must> be contained on a single source line.> 481b39c5158Smillert 482b39c5158Smillert 483b39c5158Smillert=head2 All-in-one interface 484b39c5158Smillert 485b39c5158SmillertSeparating the loading of Filter::Simple: 486b39c5158Smillert 487b39c5158Smillert use Filter::Simple; 488b39c5158Smillert 489b39c5158Smillertfrom the setting up of the filtering: 490b39c5158Smillert 491b39c5158Smillert FILTER { ... }; 492b39c5158Smillert 493b39c5158Smillertis useful because it allows other code (typically parser support code 494b39c5158Smillertor caching variables) to be defined before the filter is invoked. 495b39c5158SmillertHowever, there is often no need for such a separation. 496b39c5158Smillert 497b39c5158SmillertIn those cases, it is easier to just append the filtering subroutine and 498b39c5158Smillertany terminator specification directly to the C<use> statement that loads 499b39c5158SmillertFilter::Simple, like so: 500b39c5158Smillert 501b39c5158Smillert use Filter::Simple sub { 502b39c5158Smillert s/BANG\s+BANG/die 'BANG' if \$BANG/g; 503b39c5158Smillert }; 504b39c5158Smillert 505b39c5158SmillertThis is exactly the same as: 506b39c5158Smillert 507b39c5158Smillert use Filter::Simple; 508b39c5158Smillert BEGIN { 509b39c5158Smillert Filter::Simple::FILTER { 510b39c5158Smillert s/BANG\s+BANG/die 'BANG' if \$BANG/g; 511b39c5158Smillert }; 512b39c5158Smillert } 513b39c5158Smillert 514b39c5158Smillertexcept that the C<FILTER> subroutine is not exported by Filter::Simple. 515b39c5158Smillert 516b39c5158Smillert 517b39c5158Smillert=head2 Filtering only specific components of source code 518b39c5158Smillert 519b39c5158SmillertOne of the problems with a filter like: 520b39c5158Smillert 521b39c5158Smillert use Filter::Simple; 522b39c5158Smillert 523b39c5158Smillert FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g }; 524b39c5158Smillert 525b39c5158Smillertis that it indiscriminately applies the specified transformation to 526b39c5158Smillertthe entire text of your source program. So something like: 527b39c5158Smillert 528b39c5158Smillert warn 'BANG BANG, YOU'RE DEAD'; 529b39c5158Smillert BANG BANG; 530b39c5158Smillert 531b39c5158Smillertwill become: 532b39c5158Smillert 533b39c5158Smillert warn 'die 'BANG' if $BANG, YOU'RE DEAD'; 534b39c5158Smillert die 'BANG' if $BANG; 535b39c5158Smillert 536b39c5158SmillertIt is very common when filtering source to only want to apply the filter 537b39c5158Smillertto the non-character-string parts of the code, or alternatively to I<only> 538b39c5158Smillertthe character strings. 539b39c5158Smillert 540b39c5158SmillertFilter::Simple supports this type of filtering by automatically 541b39c5158Smillertexporting the C<FILTER_ONLY> subroutine. 542b39c5158Smillert 543b39c5158SmillertC<FILTER_ONLY> takes a sequence of specifiers that install separate 544b39c5158Smillert(and possibly multiple) filters that act on only parts of the source code. 545b39c5158SmillertFor example: 546b39c5158Smillert 547b39c5158Smillert use Filter::Simple; 548b39c5158Smillert 549b39c5158Smillert FILTER_ONLY 550b39c5158Smillert code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g }, 551b39c5158Smillert quotelike => sub { s/BANG\s+BANG/CHITTY CHITTY/g }; 552b39c5158Smillert 553b39c5158SmillertThe C<"code"> subroutine will only be used to filter parts of the source 554b39c5158Smillertcode that are not quotelikes, POD, or C<__DATA__>. The C<quotelike> 555b39c5158Smillertsubroutine only filters Perl quotelikes (including here documents). 556b39c5158Smillert 557b39c5158SmillertThe full list of alternatives is: 558b39c5158Smillert 559b39c5158Smillert=over 560b39c5158Smillert 561b39c5158Smillert=item C<"code"> 562b39c5158Smillert 563b39c5158SmillertFilters only those sections of the source code that are not quotelikes, POD, or 564b39c5158SmillertC<__DATA__>. 565b39c5158Smillert 566b39c5158Smillert=item C<"code_no_comments"> 567b39c5158Smillert 568b39c5158SmillertFilters only those sections of the source code that are not quotelikes, POD, 569b39c5158Smillertcomments, or C<__DATA__>. 570b39c5158Smillert 571b39c5158Smillert=item C<"executable"> 572b39c5158Smillert 573b39c5158SmillertFilters only those sections of the source code that are not POD or C<__DATA__>. 574b39c5158Smillert 575b39c5158Smillert=item C<"executable_no_comments"> 576b39c5158Smillert 577b39c5158SmillertFilters only those sections of the source code that are not POD, comments, or C<__DATA__>. 578b39c5158Smillert 579b39c5158Smillert=item C<"quotelike"> 580b39c5158Smillert 581b39c5158SmillertFilters only Perl quotelikes (as interpreted by 582b39c5158SmillertC<&Text::Balanced::extract_quotelike>). 583b39c5158Smillert 584b39c5158Smillert=item C<"string"> 585b39c5158Smillert 586b39c5158SmillertFilters only the string literal parts of a Perl quotelike (i.e. the 587b39c5158Smillertcontents of a string literal, either half of a C<tr///>, the second 588b39c5158Smillerthalf of an C<s///>). 589b39c5158Smillert 590b39c5158Smillert=item C<"regex"> 591b39c5158Smillert 592b39c5158SmillertFilters only the pattern literal parts of a Perl quotelike (i.e. the 593b39c5158Smillertcontents of a C<qr//> or an C<m//>, the first half of an C<s///>). 594b39c5158Smillert 595b39c5158Smillert=item C<"all"> 596b39c5158Smillert 597b39c5158SmillertFilters everything. Identical in effect to C<FILTER>. 598b39c5158Smillert 599b39c5158Smillert=back 600b39c5158Smillert 601b39c5158SmillertExcept for C<< FILTER_ONLY code => sub {...} >>, each of 602b39c5158Smillertthe component filters is called repeatedly, once for each component 603b39c5158Smillertfound in the source code. 604b39c5158Smillert 605b39c5158SmillertNote that you can also apply two or more of the same type of filter in 606b39c5158Smillerta single C<FILTER_ONLY>. For example, here's a simple 607b39c5158Smillertmacro-preprocessor that is only applied within regexes, 608b39c5158Smillertwith a final debugging pass that prints the resulting source code: 609b39c5158Smillert 610b39c5158Smillert use Regexp::Common; 611b39c5158Smillert FILTER_ONLY 612b39c5158Smillert regex => sub { s/!\[/[^/g }, 613b39c5158Smillert regex => sub { s/%d/$RE{num}{int}/g }, 614b39c5158Smillert regex => sub { s/%f/$RE{num}{real}/g }, 615b39c5158Smillert all => sub { print if $::DEBUG }; 616b39c5158Smillert 617b39c5158Smillert 618b39c5158Smillert 619b39c5158Smillert=head2 Filtering only the code parts of source code 620b39c5158Smillert 621b39c5158SmillertMost source code ceases to be grammatically correct when it is broken up 622b39c5158Smillertinto the pieces between string literals and regexes. So the C<'code'> 623b39c5158Smillertand C<'code_no_comments'> component filter behave slightly differently 624b39c5158Smillertfrom the other partial filters described in the previous section. 625b39c5158Smillert 626b39c5158SmillertRather than calling the specified processor on each individual piece of 627b39c5158Smillertcode (i.e. on the bits between quotelikes), the C<'code...'> partial 628b39c5158Smillertfilters operate on the entire source code, but with the quotelike bits 629b39c5158Smillert(and, in the case of C<'code_no_comments'>, the comments) "blanked out". 630b39c5158Smillert 631b39c5158SmillertThat is, a C<'code...'> filter I<replaces> each quoted string, quotelike, 632b39c5158Smillertregex, POD, and __DATA__ section with a placeholder. The 633b39c5158Smillertdelimiters of this placeholder are the contents of the C<$;> variable 634b39c5158Smillertat the time the filter is applied (normally C<"\034">). The remaining 635b39c5158Smillertfour bytes are a unique identifier for the component being replaced. 636b39c5158Smillert 637b39c5158SmillertThis approach makes it comparatively easy to write code preprocessors 638b39c5158Smillertwithout worrying about the form or contents of strings, regexes, etc. 639b39c5158Smillert 640b39c5158SmillertFor convenience, during a C<'code...'> filtering operation, Filter::Simple 641b39c5158Smillertprovides a package variable (C<$Filter::Simple::placeholder>) that 642b39c5158Smillertcontains a pre-compiled regex that matches any placeholder...and 643b39c5158Smillertcaptures the identifier within the placeholder. Placeholders can be 644b39c5158Smillertmoved and re-ordered within the source code as needed. 645b39c5158Smillert 646b39c5158SmillertIn addition, a second package variable (C<@Filter::Simple::components>) 647b39c5158Smillertcontains a list of the various pieces of C<$_>, as they were originally split 648b39c5158Smillertup to allow placeholders to be inserted. 649b39c5158Smillert 650b39c5158SmillertOnce the filtering has been applied, the original strings, regexes, POD, 651b39c5158Smillertetc. are re-inserted into the code, by replacing each placeholder with 652b39c5158Smillertthe corresponding original component (from C<@components>). Note that 653b39c5158Smillertthis means that the C<@components> variable must be treated with extreme 654b39c5158Smillertcare within the filter. The C<@components> array stores the "back- 655b39c5158Smillerttranslations" of each placeholder inserted into C<$_>, as well as the 656b39c5158Smillertinterstitial source code between placeholders. If the placeholder 657b39c5158Smillertbacktranslations are altered in C<@components>, they will be similarly 658b39c5158Smillertchanged when the placeholders are removed from C<$_> after the filter 659b39c5158Smillertis complete. 660b39c5158Smillert 661b39c5158SmillertFor example, the following filter detects concatenated pairs of 662b39c5158Smillertstrings/quotelikes and reverses the order in which they are 663b39c5158Smillertconcatenated: 664b39c5158Smillert 665b39c5158Smillert package DemoRevCat; 666b39c5158Smillert use Filter::Simple; 667b39c5158Smillert 668b39c5158Smillert FILTER_ONLY code => sub { 669b39c5158Smillert my $ph = $Filter::Simple::placeholder; 670b39c5158Smillert s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx 671b39c5158Smillert }; 672b39c5158Smillert 673b39c5158SmillertThus, the following code: 674b39c5158Smillert 675b39c5158Smillert use DemoRevCat; 676b39c5158Smillert 677b39c5158Smillert my $str = "abc" . q(def); 678b39c5158Smillert 679b39c5158Smillert print "$str\n"; 680b39c5158Smillert 681b39c5158Smillertwould become: 682b39c5158Smillert 683b39c5158Smillert my $str = q(def)."abc"; 684b39c5158Smillert 685b39c5158Smillert print "$str\n"; 686b39c5158Smillert 687b39c5158Smillertand hence print: 688b39c5158Smillert 689b39c5158Smillert defabc 690b39c5158Smillert 691b39c5158Smillert 692b39c5158Smillert=head2 Using Filter::Simple with an explicit C<import> subroutine 693b39c5158Smillert 694b39c5158SmillertFilter::Simple generates a special C<import> subroutine for 695b39c5158Smillertyour module (see L<"How it works">) which would normally replace any 696b39c5158SmillertC<import> subroutine you might have explicitly declared. 697b39c5158Smillert 698b39c5158SmillertHowever, Filter::Simple is smart enough to notice your existing 699b39c5158SmillertC<import> and Do The Right Thing with it. 700b39c5158SmillertThat is, if you explicitly define an C<import> subroutine in a package 701b39c5158Smillertthat's using Filter::Simple, that C<import> subroutine will still 702b39c5158Smillertbe invoked immediately after any filter you install. 703b39c5158Smillert 704b39c5158SmillertThe only thing you have to remember is that the C<import> subroutine 705b39c5158SmillertI<must> be declared I<before> the filter is installed. If you use C<FILTER> 706b39c5158Smillertto install the filter: 707b39c5158Smillert 708b39c5158Smillert package Filter::TurnItUpTo11; 709b39c5158Smillert 710b39c5158Smillert use Filter::Simple; 711b39c5158Smillert 712b39c5158Smillert FILTER { s/(\w+)/\U$1/ }; 713b39c5158Smillert 714b39c5158Smillertthat will almost never be a problem, but if you install a filtering 715b39c5158Smillertsubroutine by passing it directly to the C<use Filter::Simple> 716b39c5158Smillertstatement: 717b39c5158Smillert 718b39c5158Smillert package Filter::TurnItUpTo11; 719b39c5158Smillert 720b39c5158Smillert use Filter::Simple sub{ s/(\w+)/\U$1/ }; 721b39c5158Smillert 722b39c5158Smillertthen you must make sure that your C<import> subroutine appears before 723b39c5158Smillertthat C<use> statement. 724b39c5158Smillert 725b39c5158Smillert 726b39c5158Smillert=head2 Using Filter::Simple and Exporter together 727b39c5158Smillert 728b39c5158SmillertLikewise, Filter::Simple is also smart enough 729b39c5158Smillertto Do The Right Thing if you use Exporter: 730b39c5158Smillert 731b39c5158Smillert package Switch; 732b39c5158Smillert use base Exporter; 733b39c5158Smillert use Filter::Simple; 734b39c5158Smillert 735b39c5158Smillert @EXPORT = qw(switch case); 736b39c5158Smillert @EXPORT_OK = qw(given when); 737b39c5158Smillert 738b39c5158Smillert FILTER { $_ = magic_Perl_filter($_) } 739b39c5158Smillert 740b39c5158SmillertImmediately after the filter has been applied to the source, 741b39c5158SmillertFilter::Simple will pass control to Exporter, so it can do its magic too. 742b39c5158Smillert 743b39c5158SmillertOf course, here too, Filter::Simple has to know you're using Exporter 744b39c5158Smillertbefore it applies the filter. That's almost never a problem, but if you're 745b39c5158Smillertnervous about it, you can guarantee that things will work correctly by 746b39c5158Smillertensuring that your C<use base Exporter> always precedes your 747b39c5158SmillertC<use Filter::Simple>. 748b39c5158Smillert 749b39c5158Smillert 750b39c5158Smillert=head2 How it works 751b39c5158Smillert 752b39c5158SmillertThe Filter::Simple module exports into the package that calls C<FILTER> 753b39c5158Smillert(or C<use>s it directly) -- such as package "BANG" in the above example -- 754b39c5158Smillerttwo automagically constructed 755b39c5158Smillertsubroutines -- C<import> and C<unimport> -- which take care of all the 756b39c5158Smillertnasty details. 757b39c5158Smillert 758b39c5158SmillertIn addition, the generated C<import> subroutine passes its own argument 759b39c5158Smillertlist to the filtering subroutine, so the BANG.pm filter could easily 760b39c5158Smillertbe made parametric: 761b39c5158Smillert 762b39c5158Smillert package BANG; 763b39c5158Smillert 764b39c5158Smillert use Filter::Simple; 765b39c5158Smillert 766b39c5158Smillert FILTER { 767b39c5158Smillert my ($die_msg, $var_name) = @_; 768b39c5158Smillert s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g; 769b39c5158Smillert }; 770b39c5158Smillert 771b39c5158Smillert # and in some user code: 772b39c5158Smillert 773b39c5158Smillert use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM 774b39c5158Smillert 775b39c5158Smillert 776b39c5158SmillertThe specified filtering subroutine is called every time a C<use BANG> is 777b39c5158Smillertencountered, and passed all the source code following that call, up to 778b39c5158Smillerteither the next C<no BANG;> (or whatever terminator you've set) or the 779b39c5158Smillertend of the source file, whichever occurs first. By default, any C<no 780b39c5158SmillertBANG;> call must appear by itself on a separate line, or it is ignored. 781b39c5158Smillert 782b39c5158Smillert 783b39c5158Smillert=head1 AUTHOR 784b39c5158Smillert 785b39c5158SmillertDamian Conway 786b39c5158Smillert 787b39c5158Smillert=head1 CONTACT 788b39c5158Smillert 789b39c5158SmillertFilter::Simple is now maintained by the Perl5-Porters. 790b39c5158SmillertPlease submit bug via the C<perlbug> tool that comes with your perl. 791b39c5158SmillertFor usage instructions, read C<perldoc perlbug> or possibly C<man perlbug>. 792b39c5158SmillertFor mostly anything else, please contact E<lt>perl5-porters@perl.orgE<gt>. 793b39c5158Smillert 794b39c5158SmillertMaintainer of the CPAN release is Steffen Mueller E<lt>smueller@cpan.orgE<gt>. 795b39c5158SmillertContact him with technical difficulties with respect to the packaging of the 796b39c5158SmillertCPAN module. 797b39c5158Smillert 798b39c5158SmillertPraise of the module, flowers, and presents still go to the author, 799b39c5158SmillertDamian Conway E<lt>damian@conway.orgE<gt>. 800b39c5158Smillert 801b39c5158Smillert=head1 COPYRIGHT AND LICENSE 802b39c5158Smillert 8036fb12b70Safresh1 Copyright (c) 2000-2014, Damian Conway. All Rights Reserved. 804b39c5158Smillert This module is free software. It may be used, redistributed 805b39c5158Smillert and/or modified under the same terms as Perl itself. 806