1package File::GlobMapper; 2 3use strict; 4use warnings; 5use Carp; 6 7our ($CSH_GLOB); 8 9BEGIN 10{ 11 if ($] < 5.006) 12 { 13 require File::BSDGlob; import File::BSDGlob qw(:glob) ; 14 $CSH_GLOB = File::BSDGlob::GLOB_CSH() ; 15 *globber = \&File::BSDGlob::csh_glob; 16 } 17 else 18 { 19 require File::Glob; import File::Glob qw(:glob) ; 20 $CSH_GLOB = File::Glob::GLOB_CSH() ; 21 #*globber = \&File::Glob::bsd_glob; 22 *globber = \&File::Glob::csh_glob; 23 } 24} 25 26our ($Error); 27 28our ($VERSION, @EXPORT_OK); 29$VERSION = '1.001'; 30@EXPORT_OK = qw( globmap ); 31 32 33our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount); 34$noPreBS = '(?<!\\\)' ; # no preceding backslash 35$metachars = '.*?[](){}'; 36$matchMetaRE = '[' . quotemeta($metachars) . ']'; 37 38%mapping = ( 39 '*' => '([^/]*)', 40 '?' => '([^/])', 41 '.' => '\.', 42 '[' => '([', 43 '(' => '(', 44 ')' => ')', 45 ); 46 47%wildCount = map { $_ => 1 } qw/ * ? . { ( [ /; 48 49sub globmap ($$;) 50{ 51 my $inputGlob = shift ; 52 my $outputGlob = shift ; 53 54 my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_) 55 or croak "globmap: $Error" ; 56 return $obj->getFileMap(); 57} 58 59sub new 60{ 61 my $class = shift ; 62 my $inputGlob = shift ; 63 my $outputGlob = shift ; 64 # TODO -- flags needs to default to whatever File::Glob does 65 my $flags = shift || $CSH_GLOB ; 66 #my $flags = shift ; 67 68 $inputGlob =~ s/^\s*\<\s*//; 69 $inputGlob =~ s/\s*\>\s*$//; 70 71 $outputGlob =~ s/^\s*\<\s*//; 72 $outputGlob =~ s/\s*\>\s*$//; 73 74 my %object = 75 ( InputGlob => $inputGlob, 76 OutputGlob => $outputGlob, 77 GlobFlags => $flags, 78 Braces => 0, 79 WildCount => 0, 80 Pairs => [], 81 Sigil => '#', 82 ); 83 84 my $self = bless \%object, ref($class) || $class ; 85 86 $self->_parseInputGlob() 87 or return undef ; 88 89 $self->_parseOutputGlob() 90 or return undef ; 91 92 my @inputFiles = globber($self->{InputGlob}, $flags) ; 93 94 if (GLOB_ERROR) 95 { 96 $Error = $!; 97 return undef ; 98 } 99 100 #if (whatever) 101 { 102 my $missing = grep { ! -e $_ } @inputFiles ; 103 104 if ($missing) 105 { 106 $Error = "$missing input files do not exist"; 107 return undef ; 108 } 109 } 110 111 $self->{InputFiles} = \@inputFiles ; 112 113 $self->_getFiles() 114 or return undef ; 115 116 return $self; 117} 118 119sub _retError 120{ 121 my $string = shift ; 122 $Error = "$string in input fileglob" ; 123 return undef ; 124} 125 126sub _unmatched 127{ 128 my $delimeter = shift ; 129 130 _retError("Unmatched $delimeter"); 131 return undef ; 132} 133 134sub _parseBit 135{ 136 my $self = shift ; 137 138 my $string = shift ; 139 140 my $out = ''; 141 my $depth = 0 ; 142 143 while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//) 144 { 145 $out .= quotemeta($1) ; 146 $out .= $mapping{$2} if defined $mapping{$2}; 147 148 ++ $self->{WildCount} if $wildCount{$2} ; 149 150 if ($2 eq ',') 151 { 152 return _unmatched("(") 153 if $depth ; 154 155 $out .= '|'; 156 } 157 elsif ($2 eq '(') 158 { 159 ++ $depth ; 160 } 161 elsif ($2 eq ')') 162 { 163 return _unmatched(")") 164 if ! $depth ; 165 166 -- $depth ; 167 } 168 elsif ($2 eq '[') 169 { 170 # TODO -- quotemeta & check no '/' 171 # TODO -- check for \] & other \ within the [] 172 $string =~ s#(.*?\])## 173 or return _unmatched("["); 174 $out .= "$1)" ; 175 } 176 elsif ($2 eq ']') 177 { 178 return _unmatched("]"); 179 } 180 elsif ($2 eq '{' || $2 eq '}') 181 { 182 return _retError("Nested {} not allowed"); 183 } 184 } 185 186 $out .= quotemeta $string; 187 188 return _unmatched("(") 189 if $depth ; 190 191 return $out ; 192} 193 194sub _parseInputGlob 195{ 196 my $self = shift ; 197 198 my $string = $self->{InputGlob} ; 199 my $inGlob = ''; 200 201 # Multiple concatenated *'s don't make sense 202 #$string =~ s#\*\*+#*# ; 203 204 # TODO -- Allow space to delimit patterns? 205 #my @strings = split /\s+/, $string ; 206 #for my $str (@strings) 207 my $out = ''; 208 my $depth = 0 ; 209 210 while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//) 211 { 212 $out .= quotemeta($1) ; 213 $out .= $mapping{$2} if defined $mapping{$2}; 214 ++ $self->{WildCount} if $wildCount{$2} ; 215 216 if ($2 eq '(') 217 { 218 ++ $depth ; 219 } 220 elsif ($2 eq ')') 221 { 222 return _unmatched(")") 223 if ! $depth ; 224 225 -- $depth ; 226 } 227 elsif ($2 eq '[') 228 { 229 # TODO -- quotemeta & check no '/' or '(' or ')' 230 # TODO -- check for \] & other \ within the [] 231 $string =~ s#(.*?\])## 232 or return _unmatched("["); 233 $out .= "$1)" ; 234 } 235 elsif ($2 eq ']') 236 { 237 return _unmatched("]"); 238 } 239 elsif ($2 eq '}') 240 { 241 return _unmatched("}"); 242 } 243 elsif ($2 eq '{') 244 { 245 # TODO -- check no '/' within the {} 246 # TODO -- check for \} & other \ within the {} 247 248 my $tmp ; 249 unless ( $string =~ s/(.*?)$noPreBS\}//) 250 { 251 return _unmatched("{"); 252 } 253 #$string =~ s#(.*?)\}##; 254 255 #my $alt = join '|', 256 # map { quotemeta $_ } 257 # split "$noPreBS,", $1 ; 258 my $alt = $self->_parseBit($1); 259 defined $alt or return 0 ; 260 $out .= "($alt)" ; 261 262 ++ $self->{Braces} ; 263 } 264 } 265 266 return _unmatched("(") 267 if $depth ; 268 269 $out .= quotemeta $string ; 270 271 272 $self->{InputGlob} =~ s/$noPreBS[\(\)]//g; 273 $self->{InputPattern} = $out ; 274 275 #print "# INPUT '$self->{InputGlob}' => '$out'\n"; 276 277 return 1 ; 278 279} 280 281sub _parseOutputGlob 282{ 283 my $self = shift ; 284 285 my $string = $self->{OutputGlob} ; 286 my $maxwild = $self->{WildCount}; 287 288 if ($self->{GlobFlags} & GLOB_TILDE) 289 #if (1) 290 { 291 $string =~ s{ 292 ^ ~ # find a leading tilde 293 ( # save this in $1 294 [^/] # a non-slash character 295 * # repeated 0 or more times (0 means me) 296 ) 297 }{ 298 $1 299 ? (getpwnam($1))[7] 300 : ( $ENV{HOME} || $ENV{LOGDIR} ) 301 }ex; 302 303 } 304 305 # max #1 must be == to max no of '*' in input 306 while ( $string =~ m/#(\d)/g ) 307 { 308 croak "Max wild is #$maxwild, you tried #$1" 309 if $1 > $maxwild ; 310 } 311 312 my $noPreBS = '(?<!\\\)' ; # no preceding backslash 313 #warn "noPreBS = '$noPreBS'\n"; 314 315 #$string =~ s/${noPreBS}\$(\d)/\${$1}/g; 316 $string =~ s/${noPreBS}#(\d)/\${$1}/g; 317 $string =~ s#${noPreBS}\*#\${inFile}#g; 318 $string = '"' . $string . '"'; 319 320 #print "OUTPUT '$self->{OutputGlob}' => '$string'\n"; 321 $self->{OutputPattern} = $string ; 322 323 return 1 ; 324} 325 326sub _getFiles 327{ 328 my $self = shift ; 329 330 my %outInMapping = (); 331 my %inFiles = () ; 332 333 foreach my $inFile (@{ $self->{InputFiles} }) 334 { 335 next if $inFiles{$inFile} ++ ; 336 337 my $outFile = $inFile ; 338 339 if ( $inFile =~ m/$self->{InputPattern}/ ) 340 { 341 no warnings 'uninitialized'; 342 eval "\$outFile = $self->{OutputPattern};" ; 343 344 if (defined $outInMapping{$outFile}) 345 { 346 $Error = "multiple input files map to one output file"; 347 return undef ; 348 } 349 $outInMapping{$outFile} = $inFile; 350 push @{ $self->{Pairs} }, [$inFile, $outFile]; 351 } 352 } 353 354 return 1 ; 355} 356 357sub getFileMap 358{ 359 my $self = shift ; 360 361 return $self->{Pairs} ; 362} 363 364sub getHash 365{ 366 my $self = shift ; 367 368 return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ; 369} 370 3711; 372 373__END__ 374 375=head1 NAME 376 377File::GlobMapper - Extend File Glob to Allow Input and Output Files 378 379=head1 SYNOPSIS 380 381 use File::GlobMapper qw( globmap ); 382 383 my $aref = globmap $input => $output 384 or die $File::GlobMapper::Error ; 385 386 my $gm = new File::GlobMapper $input => $output 387 or die $File::GlobMapper::Error ; 388 389 390=head1 DESCRIPTION 391 392This module needs Perl5.005 or better. 393 394This module takes the existing C<File::Glob> module as a starting point and 395extends it to allow new filenames to be derived from the files matched by 396C<File::Glob>. 397 398This can be useful when carrying out batch operations on multiple files that 399have both an input filename and output filename and the output file can be 400derived from the input filename. Examples of operations where this can be 401useful include, file renaming, file copying and file compression. 402 403 404=head2 Behind The Scenes 405 406To help explain what C<File::GlobMapper> does, consider what code you 407would write if you wanted to rename all files in the current directory 408that ended in C<.tar.gz> to C<.tgz>. So say these files are in the 409current directory 410 411 alpha.tar.gz 412 beta.tar.gz 413 gamma.tar.gz 414 415and they need renamed to this 416 417 alpha.tgz 418 beta.tgz 419 gamma.tgz 420 421Below is a possible implementation of a script to carry out the rename 422(error cases have been omitted) 423 424 foreach my $old ( glob "*.tar.gz" ) 425 { 426 my $new = $old; 427 $new =~ s#(.*)\.tar\.gz$#$1.tgz# ; 428 429 rename $old => $new 430 or die "Cannot rename '$old' to '$new': $!\n; 431 } 432 433Notice that a file glob pattern C<*.tar.gz> was used to match the 434C<.tar.gz> files, then a fairly similar regular expression was used in 435the substitute to allow the new filename to be created. 436 437Given that the file glob is just a cut-down regular expression and that it 438has already done a lot of the hard work in pattern matching the filenames, 439wouldn't it be handy to be able to use the patterns in the fileglob to 440drive the new filename? 441 442Well, that's I<exactly> what C<File::GlobMapper> does. 443 444Here is same snippet of code rewritten using C<globmap> 445 446 for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' ) 447 { 448 my ($from, $to) = @$pair; 449 rename $from => $to 450 or die "Cannot rename '$old' to '$new': $!\n; 451 } 452 453So how does it work? 454 455Behind the scenes the C<globmap> function does a combination of a 456file glob to match existing filenames followed by a substitute 457to create the new filenames. 458 459Notice how both parameters to C<globmap> are strings that are delimited by <>. 460This is done to make them look more like file globs - it is just syntactic 461sugar, but it can be handy when you want the strings to be visually 462distinctive. The enclosing <> are optional, so you don't have to use them - in 463fact the first thing globmap will do is remove these delimiters if they are 464present. 465 466The first parameter to C<globmap>, C<*.tar.gz>, is an I<Input File Glob>. 467Once the enclosing "< ... >" is removed, this is passed (more or 468less) unchanged to C<File::Glob> to carry out a file match. 469 470Next the fileglob C<*.tar.gz> is transformed behind the scenes into a 471full Perl regular expression, with the additional step of wrapping each 472transformed wildcard metacharacter sequence in parenthesis. 473 474In this case the input fileglob C<*.tar.gz> will be transformed into 475this Perl regular expression 476 477 ([^/]*)\.tar\.gz 478 479Wrapping with parenthesis allows the wildcard parts of the Input File 480Glob to be referenced by the second parameter to C<globmap>, C<#1.tgz>, 481the I<Output File Glob>. This parameter operates just like the replacement 482part of a substitute command. The difference is that the C<#1> syntax 483is used to reference sub-patterns matched in the input fileglob, rather 484than the C<$1> syntax that is used with perl regular expressions. In 485this case C<#1> is used to refer to the text matched by the C<*> in the 486Input File Glob. This makes it easier to use this module where the 487parameters to C<globmap> are typed at the command line. 488 489The final step involves passing each filename matched by the C<*.tar.gz> 490file glob through the derived Perl regular expression in turn and 491expanding the output fileglob using it. 492 493The end result of all this is a list of pairs of filenames. By default 494that is what is returned by C<globmap>. In this example the data structure 495returned will look like this 496 497 ( ['alpha.tar.gz' => 'alpha.tgz'], 498 ['beta.tar.gz' => 'beta.tgz' ], 499 ['gamma.tar.gz' => 'gamma.tgz'] 500 ) 501 502 503Each pair is an array reference with two elements - namely the I<from> 504filename, that C<File::Glob> has matched, and a I<to> filename that is 505derived from the I<from> filename. 506 507 508 509=head2 Limitations 510 511C<File::GlobMapper> has been kept simple deliberately, so it isn't intended to 512solve all filename mapping operations. Under the hood C<File::Glob> (or for 513older versions of Perl, C<File::BSDGlob>) is used to match the files, so you 514will never have the flexibility of full Perl regular expression. 515 516=head2 Input File Glob 517 518The syntax for an Input FileGlob is identical to C<File::Glob>, except 519for the following 520 521=over 5 522 523=item 1. 524 525No nested {} 526 527=item 2. 528 529Whitespace does not delimit fileglobs. 530 531=item 3. 532 533The use of parenthesis can be used to capture parts of the input filename. 534 535=item 4. 536 537If an Input glob matches the same file more than once, only the first 538will be used. 539 540=back 541 542The syntax 543 544=over 5 545 546=item B<~> 547 548=item B<~user> 549 550 551=item B<.> 552 553Matches a literal '.'. 554Equivalent to the Perl regular expression 555 556 \. 557 558=item B<*> 559 560Matches zero or more characters, except '/'. Equivalent to the Perl 561regular expression 562 563 [^/]* 564 565=item B<?> 566 567Matches zero or one character, except '/'. Equivalent to the Perl 568regular expression 569 570 [^/]? 571 572=item B<\> 573 574Backslash is used, as usual, to escape the next character. 575 576=item B<[]> 577 578Character class. 579 580=item B<{,}> 581 582Alternation 583 584=item B<()> 585 586Capturing parenthesis that work just like perl 587 588=back 589 590Any other character it taken literally. 591 592=head2 Output File Glob 593 594The Output File Glob is a normal string, with 2 glob-like features. 595 596The first is the '*' metacharacter. This will be replaced by the complete 597filename matched by the input file glob. So 598 599 *.c *.Z 600 601The second is 602 603Output FileGlobs take the 604 605=over 5 606 607=item "*" 608 609The "*" character will be replaced with the complete input filename. 610 611=item #1 612 613Patterns of the form /#\d/ will be replaced with the 614 615=back 616 617=head2 Returned Data 618 619 620=head1 EXAMPLES 621 622=head2 A Rename script 623 624Below is a simple "rename" script that uses C<globmap> to determine the 625source and destination filenames. 626 627 use File::GlobMapper qw(globmap) ; 628 use File::Copy; 629 630 die "rename: Usage rename 'from' 'to'\n" 631 unless @ARGV == 2 ; 632 633 my $fromGlob = shift @ARGV; 634 my $toGlob = shift @ARGV; 635 636 my $pairs = globmap($fromGlob, $toGlob) 637 or die $File::GlobMapper::Error; 638 639 for my $pair (@$pairs) 640 { 641 my ($from, $to) = @$pair; 642 move $from => $to ; 643 } 644 645 646 647Here is an example that renames all c files to cpp. 648 649 $ rename '*.c' '#1.cpp' 650 651=head2 A few example globmaps 652 653Below are a few examples of globmaps 654 655To copy all your .c file to a backup directory 656 657 '</my/home/*.c>' '</my/backup/#1.c>' 658 659If you want to compress all 660 661 '</my/home/*.[ch]>' '<*.gz>' 662 663To uncompress 664 665 '</my/home/*.[ch].gz>' '</my/home/#1.#2>' 666 667=head1 SEE ALSO 668 669L<File::Glob|File::Glob> 670 671=head1 AUTHOR 672 673The I<File::GlobMapper> module was written by Paul Marquess, F<pmqs@cpan.org>. 674 675=head1 COPYRIGHT AND LICENSE 676 677Copyright (c) 2005 Paul Marquess. All rights reserved. 678This program is free software; you can redistribute it and/or 679modify it under the same terms as Perl itself. 680