1package File::Find; 2use 5.006; 3use strict; 4use warnings; 5use warnings::register; 6our $VERSION = '1.39_01'; 7use Exporter 'import'; 8require Cwd; 9 10our @EXPORT = qw(find finddepth); 11 12 13use strict; 14my $Is_VMS = $^O eq 'VMS'; 15my $Is_Win32 = $^O eq 'MSWin32'; 16 17require File::Basename; 18require File::Spec; 19 20# Should ideally be my() not our() but local() currently 21# refuses to operate on lexicals 22 23our %SLnkSeen; 24our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 25 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 26 $pre_process, $post_process, $dangling_symlinks); 27 28sub contract_name { 29 my ($cdir,$fn) = @_; 30 31 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir; 32 33 $cdir = substr($cdir,0,rindex($cdir,'/')+1); 34 35 $fn =~ s|^\./||; 36 37 my $abs_name= $cdir . $fn; 38 39 if (substr($fn,0,3) eq '../') { 40 1 while $abs_name =~ s!/[^/]*/\.\./+!/!; 41 } 42 43 return $abs_name; 44} 45 46sub PathCombine($$) { 47 my ($Base,$Name) = @_; 48 my $AbsName; 49 50 if (substr($Name,0,1) eq '/') { 51 $AbsName= $Name; 52 } 53 else { 54 $AbsName= contract_name($Base,$Name); 55 } 56 57 # (simple) check for recursion 58 my $newlen= length($AbsName); 59 if ($newlen <= length($Base)) { 60 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/') 61 && $AbsName eq substr($Base,0,$newlen)) 62 { 63 return undef; 64 } 65 } 66 return $AbsName; 67} 68 69sub Follow_SymLink($) { 70 my ($AbsName) = @_; 71 72 my ($NewName,$DEV, $INO); 73 ($DEV, $INO)= lstat $AbsName; 74 75 while (-l _) { 76 if ($SLnkSeen{$DEV, $INO}++) { 77 if ($follow_skip < 2) { 78 die "$AbsName is encountered a second time"; 79 } 80 else { 81 return undef; 82 } 83 } 84 $NewName= PathCombine($AbsName, readlink($AbsName)); 85 unless(defined $NewName) { 86 if ($follow_skip < 2) { 87 die "$AbsName is a recursive symbolic link"; 88 } 89 else { 90 return undef; 91 } 92 } 93 else { 94 $AbsName= $NewName; 95 } 96 ($DEV, $INO) = lstat($AbsName); 97 return undef unless defined $DEV; # dangling symbolic link 98 } 99 100 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) { 101 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) { 102 die "$AbsName encountered a second time"; 103 } 104 else { 105 return undef; 106 } 107 } 108 109 return $AbsName; 110} 111 112our($dir, $name, $fullname, $prune); 113sub _find_dir_symlnk($$$); 114sub _find_dir($$$); 115 116# check whether or not a scalar variable is tainted 117# (code straight from the Camel, 3rd ed., page 561) 118sub is_tainted_pp { 119 my $arg = shift; 120 my $nada = substr($arg, 0, 0); # zero-length 121 local $@; 122 eval { eval "# $nada" }; 123 return length($@) != 0; 124} 125 126sub _find_opt { 127 my $wanted = shift; 128 return unless @_; 129 die "invalid top directory" unless defined $_[0]; 130 131 # This function must local()ize everything because callbacks may 132 # call find() or finddepth() 133 134 local %SLnkSeen; 135 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 136 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 137 $pre_process, $post_process, $dangling_symlinks); 138 local($dir, $name, $fullname, $prune); 139 local *_ = \my $a; 140 141 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd(); 142 if ($Is_VMS) { 143 # VMS returns this by default in VMS format which just doesn't 144 # work for the rest of this module. 145 $cwd = VMS::Filespec::unixpath($cwd); 146 147 # Apparently this is not expected to have a trailing space. 148 # To attempt to make VMS/UNIX conversions mostly reversible, 149 # a trailing slash is needed. The run-time functions ignore the 150 # resulting double slash, but it causes the perl tests to fail. 151 $cwd =~ s#/\z##; 152 153 # This comes up in upper case now, but should be lower. 154 # In the future this could be exact case, no need to change. 155 } 156 my $cwd_untainted = $cwd; 157 my $check_t_cwd = 1; 158 $wanted_callback = $wanted->{wanted}; 159 $bydepth = $wanted->{bydepth}; 160 $pre_process = $wanted->{preprocess}; 161 $post_process = $wanted->{postprocess}; 162 $no_chdir = $wanted->{no_chdir}; 163 $full_check = $wanted->{follow}; 164 $follow = $full_check || $wanted->{follow_fast}; 165 $follow_skip = $wanted->{follow_skip}; 166 $untaint = $wanted->{untaint}; 167 $untaint_pat = $wanted->{untaint_pattern}; 168 $untaint_skip = $wanted->{untaint_skip}; 169 $dangling_symlinks = $wanted->{dangling_symlinks}; 170 171 # for compatibility reasons (find.pl, find2perl) 172 local our ($topdir, $topdev, $topino, $topmode, $topnlink); 173 174 # a symbolic link to a directory doesn't increase the link count 175 $avoid_nlink = $follow || $File::Find::dont_use_nlink; 176 177 my ($abs_dir, $Is_Dir); 178 179 Proc_Top_Item: 180 foreach my $TOP (@_) { 181 my $top_item = $TOP; 182 $top_item = VMS::Filespec::unixify($top_item) if $Is_VMS; 183 184 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; 185 186 if ($Is_Win32) { 187 $top_item =~ s|[/\\]\z|| 188 unless $top_item =~ m{^(?:\w:)?[/\\]$}; 189 } 190 else { 191 $top_item =~ s|/\z|| unless $top_item eq '/'; 192 } 193 194 $Is_Dir= 0; 195 196 if ($follow) { 197 198 if (substr($top_item,0,1) eq '/') { 199 $abs_dir = $top_item; 200 } 201 elsif ($top_item eq $File::Find::current_dir) { 202 $abs_dir = $cwd; 203 } 204 else { # care about any ../ 205 $top_item =~ s/\.dir\z//i if $Is_VMS; 206 $abs_dir = contract_name("$cwd/",$top_item); 207 } 208 $abs_dir= Follow_SymLink($abs_dir); 209 unless (defined $abs_dir) { 210 if ($dangling_symlinks) { 211 if (ref $dangling_symlinks eq 'CODE') { 212 $dangling_symlinks->($top_item, $cwd); 213 } else { 214 warnings::warnif "$top_item is a dangling symbolic link\n"; 215 } 216 } 217 next Proc_Top_Item; 218 } 219 220 if (-d _) { 221 $top_item =~ s/\.dir\z//i if $Is_VMS; 222 _find_dir_symlnk($wanted, $abs_dir, $top_item); 223 $Is_Dir= 1; 224 } 225 } 226 else { # no follow 227 $topdir = $top_item; 228 unless (defined $topnlink) { 229 warnings::warnif "Can't stat $top_item: $!\n"; 230 next Proc_Top_Item; 231 } 232 if (-d _) { 233 $top_item =~ s/\.dir\z//i if $Is_VMS; 234 _find_dir($wanted, $top_item, $topnlink); 235 $Is_Dir= 1; 236 } 237 else { 238 $abs_dir= $top_item; 239 } 240 } 241 242 unless ($Is_Dir) { 243 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { 244 ($dir,$_) = ('./', $top_item); 245 } 246 247 $abs_dir = $dir; 248 if (( $untaint ) && (is_tainted($dir) )) { 249 ( $abs_dir ) = $dir =~ m|$untaint_pat|; 250 unless (defined $abs_dir) { 251 if ($untaint_skip == 0) { 252 die "directory $dir is still tainted"; 253 } 254 else { 255 next Proc_Top_Item; 256 } 257 } 258 } 259 260 unless ($no_chdir || chdir $abs_dir) { 261 warnings::warnif "Couldn't chdir $abs_dir: $!\n"; 262 next Proc_Top_Item; 263 } 264 265 $name = $abs_dir . $_; # $File::Find::name 266 $_ = $name if $no_chdir; 267 268 { $wanted_callback->() }; # protect against wild "next" 269 270 } 271 272 unless ( $no_chdir ) { 273 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) { 274 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|; 275 unless (defined $cwd_untainted) { 276 die "insecure cwd in find(depth)"; 277 } 278 $check_t_cwd = 0; 279 } 280 unless (chdir $cwd_untainted) { 281 die "Can't cd to $cwd: $!\n"; 282 } 283 } 284 } 285} 286 287# API: 288# $wanted 289# $p_dir : "parent directory" 290# $nlink : what came back from the stat 291# preconditions: 292# chdir (if not no_chdir) to dir 293 294sub _find_dir($$$) { 295 my ($wanted, $p_dir, $nlink) = @_; 296 my ($CdLvl,$Level) = (0,0); 297 my @Stack; 298 my @filenames; 299 my ($subcount,$sub_nlink); 300 my $SE= []; 301 my $dir_name= $p_dir; 302 my $dir_pref; 303 my $dir_rel = $File::Find::current_dir; 304 my $tainted = 0; 305 my $no_nlink; 306 307 if ($Is_Win32) { 308 $dir_pref 309 = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? $p_dir : "$p_dir/" ); 310 } elsif ($Is_VMS) { 311 312 # VMS is returning trailing .dir on directories 313 # and trailing . on files and symbolic links 314 # in UNIX syntax. 315 # 316 317 $p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.'; 318 319 $dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" ); 320 } 321 else { 322 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" ); 323 } 324 325 local ($dir, $name, $prune, *DIR); 326 327 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) { 328 my $udir = $p_dir; 329 if (( $untaint ) && (is_tainted($p_dir) )) { 330 ( $udir ) = $p_dir =~ m|$untaint_pat|; 331 unless (defined $udir) { 332 if ($untaint_skip == 0) { 333 die "directory $p_dir is still tainted"; 334 } 335 else { 336 return; 337 } 338 } 339 } 340 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 341 warnings::warnif "Can't cd to $udir: $!\n"; 342 return; 343 } 344 } 345 346 # push the starting directory 347 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 348 349 while (defined $SE) { 350 unless ($bydepth) { 351 $dir= $p_dir; # $File::Find::dir 352 $name= $dir_name; # $File::Find::name 353 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 354 # prune may happen here 355 $prune= 0; 356 { $wanted_callback->() }; # protect against wild "next" 357 next if $prune; 358 } 359 360 # change to that directory 361 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 362 my $udir= $dir_rel; 363 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) { 364 ( $udir ) = $dir_rel =~ m|$untaint_pat|; 365 unless (defined $udir) { 366 if ($untaint_skip == 0) { 367 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted"; 368 } else { # $untaint_skip == 1 369 next; 370 } 371 } 372 } 373 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 374 warnings::warnif "Can't cd to (" . 375 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n"; 376 next; 377 } 378 $CdLvl++; 379 } 380 381 $dir= $dir_name; # $File::Find::dir 382 383 # Get the list of files in the current directory. 384 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) { 385 warnings::warnif "Can't opendir($dir_name): $!\n"; 386 next; 387 } 388 @filenames = readdir DIR; 389 closedir(DIR); 390 @filenames = $pre_process->(@filenames) if $pre_process; 391 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; 392 393 # default: use whatever was specified 394 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back) 395 $no_nlink = $avoid_nlink; 396 # if dir has wrong nlink count, force switch to slower stat method 397 $no_nlink = 1 if ($nlink < 2); 398 399 if ($nlink == 2 && !$no_nlink) { 400 # This dir has no subdirectories. 401 for my $FN (@filenames) { 402 if ($Is_VMS) { 403 # Big hammer here - Compensate for VMS trailing . and .dir 404 # No win situation until this is changed, but this 405 # will handle the majority of the cases with breaking the fewest 406 407 $FN =~ s/\.dir\z//i; 408 $FN =~ s#\.$## if ($FN ne '.'); 409 } 410 next if $FN =~ $File::Find::skip_pattern; 411 412 $name = $dir_pref . $FN; # $File::Find::name 413 $_ = ($no_chdir ? $name : $FN); # $_ 414 { $wanted_callback->() }; # protect against wild "next" 415 } 416 417 } 418 else { 419 # This dir has subdirectories. 420 $subcount = $nlink - 2; 421 422 # HACK: insert directories at this position, so as to preserve 423 # the user pre-processed ordering of files (thus ensuring 424 # directory traversal is in user sorted order, not at random). 425 my $stack_top = @Stack; 426 427 for my $FN (@filenames) { 428 next if $FN =~ $File::Find::skip_pattern; 429 if ($subcount > 0 || $no_nlink) { 430 # Seen all the subdirs? 431 # check for directoriness. 432 # stat is faster for a file in the current directory 433 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3]; 434 435 if (-d _) { 436 --$subcount; 437 $FN =~ s/\.dir\z//i if $Is_VMS; 438 # HACK: replace push to preserve dir traversal order 439 #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink]; 440 splice @Stack, $stack_top, 0, 441 [$CdLvl,$dir_name,$FN,$sub_nlink]; 442 } 443 else { 444 $name = $dir_pref . $FN; # $File::Find::name 445 $_= ($no_chdir ? $name : $FN); # $_ 446 { $wanted_callback->() }; # protect against wild "next" 447 } 448 } 449 else { 450 $name = $dir_pref . $FN; # $File::Find::name 451 $_= ($no_chdir ? $name : $FN); # $_ 452 { $wanted_callback->() }; # protect against wild "next" 453 } 454 } 455 } 456 } 457 continue { 458 while ( defined ($SE = pop @Stack) ) { 459 ($Level, $p_dir, $dir_rel, $nlink) = @$SE; 460 if ($CdLvl > $Level && !$no_chdir) { 461 my $tmp; 462 if ($Is_VMS) { 463 $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']'; 464 } 465 else { 466 $tmp = join('/',('..') x ($CdLvl-$Level)); 467 } 468 die "Can't cd to $tmp from $dir_name: $!" 469 unless chdir ($tmp); 470 $CdLvl = $Level; 471 } 472 473 if ($Is_Win32) { 474 $dir_name = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} 475 ? "$p_dir$dir_rel" : "$p_dir/$dir_rel"); 476 $dir_pref = "$dir_name/"; 477 } 478 elsif ($^O eq 'VMS') { 479 if ($p_dir =~ m/[\]>]+$/) { 480 $dir_name = $p_dir; 481 $dir_name =~ s/([\]>]+)$/.$dir_rel$1/; 482 $dir_pref = $dir_name; 483 } 484 else { 485 $dir_name = "$p_dir/$dir_rel"; 486 $dir_pref = "$dir_name/"; 487 } 488 } 489 else { 490 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); 491 $dir_pref = "$dir_name/"; 492 } 493 494 if ( $nlink == -2 ) { 495 $name = $dir = $p_dir; # $File::Find::name / dir 496 $_ = $File::Find::current_dir; 497 $post_process->(); # End-of-directory processing 498 } 499 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now 500 $name = $dir_name; 501 if ( substr($name,-2) eq '/.' ) { 502 substr($name, length($name) == 2 ? -1 : -2) = ''; 503 } 504 $dir = $p_dir; 505 $_ = ($no_chdir ? $dir_name : $dir_rel ); 506 if ( substr($_,-2) eq '/.' ) { 507 substr($_, length($_) == 2 ? -1 : -2) = ''; 508 } 509 { $wanted_callback->() }; # protect against wild "next" 510 } 511 else { 512 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 513 last; 514 } 515 } 516 } 517} 518 519 520# API: 521# $wanted 522# $dir_loc : absolute location of a dir 523# $p_dir : "parent directory" 524# preconditions: 525# chdir (if not no_chdir) to dir 526 527sub _find_dir_symlnk($$$) { 528 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory 529 my @Stack; 530 my @filenames; 531 my $new_loc; 532 my $updir_loc = $dir_loc; # untainted parent directory 533 my $SE = []; 534 my $dir_name = $p_dir; 535 my $dir_pref; 536 my $loc_pref; 537 my $dir_rel = $File::Find::current_dir; 538 my $byd_flag; # flag for pending stack entry if $bydepth 539 my $tainted = 0; 540 my $ok = 1; 541 542 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" ); 543 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" ); 544 545 local ($dir, $name, $fullname, $prune, *DIR); 546 547 unless ($no_chdir) { 548 # untaint the topdir 549 if (( $untaint ) && (is_tainted($dir_loc) )) { 550 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted 551 # once untainted, $updir_loc is pushed on the stack (as parent directory); 552 # hence, we don't need to untaint the parent directory every time we chdir 553 # to it later 554 unless (defined $updir_loc) { 555 if ($untaint_skip == 0) { 556 die "directory $dir_loc is still tainted"; 557 } 558 else { 559 return; 560 } 561 } 562 } 563 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir); 564 unless ($ok) { 565 warnings::warnif "Can't cd to $updir_loc: $!\n"; 566 return; 567 } 568 } 569 570 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth; 571 572 while (defined $SE) { 573 574 unless ($bydepth) { 575 # change (back) to parent directory (always untainted) 576 unless ($no_chdir) { 577 unless (chdir $updir_loc) { 578 warnings::warnif "Can't cd to $updir_loc: $!\n"; 579 next; 580 } 581 } 582 $dir= $p_dir; # $File::Find::dir 583 $name= $dir_name; # $File::Find::name 584 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 585 $fullname= $dir_loc; # $File::Find::fullname 586 # prune may happen here 587 $prune= 0; 588 lstat($_); # make sure file tests with '_' work 589 { $wanted_callback->() }; # protect against wild "next" 590 next if $prune; 591 } 592 593 # change to that directory 594 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 595 $updir_loc = $dir_loc; 596 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) { 597 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir 598 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; 599 unless (defined $updir_loc) { 600 if ($untaint_skip == 0) { 601 die "directory $dir_loc is still tainted"; 602 } 603 else { 604 next; 605 } 606 } 607 } 608 unless (chdir $updir_loc) { 609 warnings::warnif "Can't cd to $updir_loc: $!\n"; 610 next; 611 } 612 } 613 614 $dir = $dir_name; # $File::Find::dir 615 616 # Get the list of files in the current directory. 617 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) { 618 warnings::warnif "Can't opendir($dir_loc): $!\n"; 619 next; 620 } 621 @filenames = readdir DIR; 622 closedir(DIR); 623 624 for my $FN (@filenames) { 625 if ($Is_VMS) { 626 # Big hammer here - Compensate for VMS trailing . and .dir 627 # No win situation until this is changed, but this 628 # will handle the majority of the cases with breaking the fewest. 629 630 $FN =~ s/\.dir\z//i; 631 $FN =~ s#\.$## if ($FN ne '.'); 632 } 633 next if $FN =~ $File::Find::skip_pattern; 634 635 # follow symbolic links / do an lstat 636 $new_loc = Follow_SymLink($loc_pref.$FN); 637 638 # ignore if invalid symlink 639 unless (defined $new_loc) { 640 if (!defined -l _ && $dangling_symlinks) { 641 $fullname = undef; 642 if (ref $dangling_symlinks eq 'CODE') { 643 $dangling_symlinks->($FN, $dir_pref); 644 } else { 645 warnings::warnif "$dir_pref$FN is a dangling symbolic link\n"; 646 } 647 } 648 else { 649 $fullname = $loc_pref . $FN; 650 } 651 $name = $dir_pref . $FN; 652 $_ = ($no_chdir ? $name : $FN); 653 { $wanted_callback->() }; 654 next; 655 } 656 657 if (-d _) { 658 if ($Is_VMS) { 659 $FN =~ s/\.dir\z//i; 660 $FN =~ s#\.$## if ($FN ne '.'); 661 $new_loc =~ s/\.dir\z//i; 662 $new_loc =~ s#\.$## if ($new_loc ne '.'); 663 } 664 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1]; 665 } 666 else { 667 $fullname = $new_loc; # $File::Find::fullname 668 $name = $dir_pref . $FN; # $File::Find::name 669 $_ = ($no_chdir ? $name : $FN); # $_ 670 { $wanted_callback->() }; # protect against wild "next" 671 } 672 } 673 674 } 675 continue { 676 while (defined($SE = pop @Stack)) { 677 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE; 678 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); 679 $dir_pref = "$dir_name/"; 680 $loc_pref = "$dir_loc/"; 681 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now 682 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 683 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted 684 warnings::warnif "Can't cd to $updir_loc: $!\n"; 685 next; 686 } 687 } 688 $fullname = $dir_loc; # $File::Find::fullname 689 $name = $dir_name; # $File::Find::name 690 if ( substr($name,-2) eq '/.' ) { 691 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name 692 } 693 $dir = $p_dir; # $File::Find::dir 694 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_ 695 if ( substr($_,-2) eq '/.' ) { 696 substr($_, length($_) == 2 ? -1 : -2) = ''; 697 } 698 699 lstat($_); # make sure file tests with '_' work 700 { $wanted_callback->() }; # protect against wild "next" 701 } 702 else { 703 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth; 704 last; 705 } 706 } 707 } 708} 709 710 711sub wrap_wanted { 712 my $wanted = shift; 713 if ( ref($wanted) eq 'HASH' ) { 714 # RT #122547 715 my %valid_options = map {$_ => 1} qw( 716 wanted 717 bydepth 718 preprocess 719 postprocess 720 follow 721 follow_fast 722 follow_skip 723 dangling_symlinks 724 no_chdir 725 untaint 726 untaint_pattern 727 untaint_skip 728 ); 729 my @invalid_options = (); 730 for my $v (keys %{$wanted}) { 731 push @invalid_options, $v unless exists $valid_options{$v}; 732 } 733 warn "Invalid option(s): @invalid_options" if @invalid_options; 734 735 unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) { 736 die 'no &wanted subroutine given'; 737 } 738 if ( $wanted->{follow} || $wanted->{follow_fast}) { 739 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip}; 740 } 741 if ( $wanted->{untaint} ) { 742 $wanted->{untaint_pattern} = $File::Find::untaint_pattern 743 unless defined $wanted->{untaint_pattern}; 744 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip}; 745 } 746 return $wanted; 747 } 748 elsif( ref( $wanted ) eq 'CODE' ) { 749 return { wanted => $wanted }; 750 } 751 else { 752 die 'no &wanted subroutine given'; 753 } 754} 755 756sub find { 757 my $wanted = shift; 758 _find_opt(wrap_wanted($wanted), @_); 759} 760 761sub finddepth { 762 my $wanted = wrap_wanted(shift); 763 $wanted->{bydepth} = 1; 764 _find_opt($wanted, @_); 765} 766 767# default 768$File::Find::skip_pattern = qr/^\.{1,2}\z/; 769$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|; 770 771# this _should_ work properly on all platforms 772# where File::Find can be expected to work 773$File::Find::current_dir = File::Spec->curdir || '.'; 774 775$File::Find::dont_use_nlink = 1; 776 777# We need a function that checks if a scalar is tainted. Either use the 778# Scalar::Util module's tainted() function or our (slower) pure Perl 779# fallback is_tainted_pp() 780{ 781 local $@; 782 eval { require Scalar::Util }; 783 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted; 784} 785 7861; 787 788__END__ 789 790=head1 NAME 791 792File::Find - Traverse a directory tree. 793 794=head1 SYNOPSIS 795 796 use File::Find; 797 find(\&wanted, @directories_to_search); 798 sub wanted { ... } 799 800 use File::Find; 801 finddepth(\&wanted, @directories_to_search); 802 sub wanted { ... } 803 804 use File::Find; 805 find({ wanted => \&process, follow => 1 }, '.'); 806 807=head1 DESCRIPTION 808 809These are functions for searching through directory trees doing work 810on each file found similar to the Unix I<find> command. File::Find 811exports two functions, C<find> and C<finddepth>. They work similarly 812but have subtle differences. 813 814=over 4 815 816=item B<find> 817 818 find(\&wanted, @directories); 819 find(\%options, @directories); 820 821C<find()> does a depth-first search over the given C<@directories> in 822the order they are given. For each file or directory found, it calls 823the C<&wanted> subroutine. (See below for details on how to use the 824C<&wanted> function). Additionally, for each directory found, it will 825C<chdir()> into that directory and continue the search, invoking the 826C<&wanted> function on each file or subdirectory in the directory. 827 828=item B<finddepth> 829 830 finddepth(\&wanted, @directories); 831 finddepth(\%options, @directories); 832 833C<finddepth()> works just like C<find()> except that it invokes the 834C<&wanted> function for a directory I<after> invoking it for the 835directory's contents. It does a postorder traversal instead of a 836preorder traversal, working from the bottom of the directory tree up 837where C<find()> works from the top of the tree down. 838 839=back 840 841Despite the name of the C<finddepth()> function, both C<find()> and 842C<finddepth()> perform a depth-first search of the directory hierarchy. 843 844=head2 %options 845 846The first argument to C<find()> is either a code reference to your 847C<&wanted> function, or a hash reference describing the operations 848to be performed for each file. The 849code reference is described in L</The wanted function> below. 850 851Here are the possible keys for the hash: 852 853=over 4 854 855=item C<wanted> 856 857The value should be a code reference. This code reference is 858described in L</The wanted function> below. The C<&wanted> subroutine is 859mandatory. 860 861=item C<bydepth> 862 863Reports the name of a directory only AFTER all its entries 864have been reported. Entry point C<finddepth()> is a shortcut for 865specifying C<< { bydepth => 1 } >> in the first argument of C<find()>. 866 867=item C<preprocess> 868 869The value should be a code reference. This code reference is used to 870preprocess the current directory. The name of the currently processed 871directory is in C<$File::Find::dir>. Your preprocessing function is 872called after C<readdir()>, but before the loop that calls the C<wanted()> 873function. It is called with a list of strings (actually file/directory 874names) and is expected to return a list of strings. The code can be 875used to sort the file/directory names alphabetically, numerically, 876or to filter out directory entries based on their name alone. When 877I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op. 878 879=item C<postprocess> 880 881The value should be a code reference. It is invoked just before leaving 882the currently processed directory. It is called in void context with no 883arguments. The name of the current directory is in C<$File::Find::dir>. This 884hook is handy for summarizing a directory, such as calculating its disk 885usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a 886no-op. 887 888=item C<follow> 889 890Causes symbolic links to be followed. Since directory trees with symbolic 891links (followed) may contain files more than once and may even have 892cycles, a hash has to be built up with an entry for each file. 893This might be expensive both in space and time for a large 894directory tree. See L</follow_fast> and L</follow_skip> below. 895If either I<follow> or I<follow_fast> is in effect: 896 897=over 4 898 899=item * 900 901It is guaranteed that an I<lstat> has been called before the user's 902C<wanted()> function is called. This enables fast file checks involving C<_>. 903Note that this guarantee no longer holds if I<follow> or I<follow_fast> 904are not set. 905 906=item * 907 908There is a variable C<$File::Find::fullname> which holds the absolute 909pathname of the file with all symbolic links resolved. If the link is 910a dangling symbolic link, then fullname will be set to C<undef>. 911 912=back 913 914This is a no-op on Win32. 915 916=item C<follow_fast> 917 918This is similar to I<follow> except that it may report some files more 919than once. It does detect cycles, however. Since only symbolic links 920have to be hashed, this is much cheaper both in space and time. If 921processing a file more than once (by the user's C<wanted()> function) 922is worse than just taking time, the option I<follow> should be used. 923 924This is also a no-op on Win32. 925 926=item C<follow_skip> 927 928C<follow_skip==1>, which is the default, causes all files which are 929neither directories nor symbolic links to be ignored if they are about 930to be processed a second time. If a directory or a symbolic link 931are about to be processed a second time, File::Find dies. 932 933C<follow_skip==0> causes File::Find to die if any file is about to be 934processed a second time. 935 936C<follow_skip==2> causes File::Find to ignore any duplicate files and 937directories but to proceed normally otherwise. 938 939=item C<dangling_symlinks> 940 941Specifies what to do with symbolic links whose target doesn't exist. 942If true and a code reference, will be called with the symbolic link 943name and the directory it lives in as arguments. Otherwise, if true 944and warnings are on, a warning of the form C<"symbolic_link_name is a dangling 945symbolic link\n"> will be issued. If false, the dangling symbolic link 946will be silently ignored. 947 948=item C<no_chdir> 949 950Does not C<chdir()> to each directory as it recurses. The C<wanted()> 951function will need to be aware of this, of course. In this case, 952C<$_> will be the same as C<$File::Find::name>. 953 954=item C<untaint> 955 956If find is used in L<taint-mode|perlsec/Taint mode> (-T command line switch or 957if EUID != UID or if EGID != GID), then internally directory names have to be 958untainted before they can be C<chdir>'d to. Therefore they are checked against 959a regular expression I<untaint_pattern>. Note that all names passed to the 960user's C<wanted()> function are still tainted. If this option is used while not 961in taint-mode, C<untaint> is a no-op. 962 963=item C<untaint_pattern> 964 965See above. This should be set using the C<qr> quoting operator. 966The default is set to C<qr|^([-+@\w./]+)$|>. 967Note that the parentheses are vital. 968 969=item C<untaint_skip> 970 971If set, a directory which fails the I<untaint_pattern> is skipped, 972including all its sub-directories. The default is to C<die> in such a case. 973 974=back 975 976=head2 The wanted function 977 978The C<wanted()> function does whatever verifications you want on 979each file and directory. Note that despite its name, the C<wanted()> 980function is a generic callback function, and does B<not> tell 981File::Find if a file is "wanted" or not. In fact, its return value 982is ignored. 983 984The wanted function takes no arguments but rather does its work 985through a collection of variables. 986 987=over 4 988 989=item C<$File::Find::dir> is the current directory name, 990 991=item C<$_> is the current filename within that directory 992 993=item C<$File::Find::name> is the complete pathname to the file. 994 995=back 996 997The above variables have all been localized and may be changed without 998affecting data outside of the wanted function. 999 1000For example, when examining the file F</some/path/foo.ext> you will have: 1001 1002 $File::Find::dir = /some/path/ 1003 $_ = foo.ext 1004 $File::Find::name = /some/path/foo.ext 1005 1006You are chdir()'d to C<$File::Find::dir> when the function is called, 1007unless C<no_chdir> was specified. Note that when changing to 1008directories is in effect, the root directory (F</>) is a somewhat 1009special case inasmuch as the concatenation of C<$File::Find::dir>, 1010C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The 1011table below summarizes all variants: 1012 1013 $File::Find::name $File::Find::dir $_ 1014 default / / . 1015 no_chdir=>0 /etc / etc 1016 /etc/x /etc x 1017 1018 no_chdir=>1 / / / 1019 /etc / /etc 1020 /etc/x /etc /etc/x 1021 1022 1023When C<follow> or C<follow_fast> are in effect, there is 1024also a C<$File::Find::fullname>. The function may set 1025C<$File::Find::prune> to prune the tree unless C<bydepth> was 1026specified. Unless C<follow> or C<follow_fast> is specified, for 1027compatibility reasons (find.pl, find2perl) there are in addition the 1028following globals available: C<$File::Find::topdir>, 1029C<$File::Find::topdev>, C<$File::Find::topino>, 1030C<$File::Find::topmode> and C<$File::Find::topnlink>. 1031 1032This library is useful for the C<find2perl> tool (distributed as part of the 1033App-find2perl CPAN distribution), which when fed, 1034 1035 find2perl / -name .nfs\* -mtime +7 \ 1036 -exec rm -f {} \; -o -fstype nfs -prune 1037 1038produces something like: 1039 1040 sub wanted { 1041 /^\.nfs.*\z/s && 1042 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) && 1043 int(-M _) > 7 && 1044 unlink($_) 1045 || 1046 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) && 1047 $dev < 0 && 1048 ($File::Find::prune = 1); 1049 } 1050 1051Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical 1052filehandle that caches the information from the preceding 1053C<stat()>, C<lstat()>, or filetest. 1054 1055Here's another interesting wanted function. It will find all symbolic 1056links that don't resolve: 1057 1058 sub wanted { 1059 -l && !-e && print "bogus link: $File::Find::name\n"; 1060 } 1061 1062Note that you may mix directories and (non-directory) files in the list of 1063directories to be searched by the C<wanted()> function. 1064 1065 find(\&wanted, "./foo", "./bar", "./baz/epsilon"); 1066 1067In the example above, no file in F<./baz/> other than F<./baz/epsilon> will be 1068evaluated by C<wanted()>. 1069 1070See also the script C<pfind> on CPAN for a nice application of this 1071module. 1072 1073=head1 WARNINGS 1074 1075If you run your program with the C<-w> switch, or if you use the 1076C<warnings> pragma, File::Find will report warnings for several weird 1077situations. You can disable these warnings by putting the statement 1078 1079 no warnings 'File::Find'; 1080 1081in the appropriate scope. See L<warnings> for more info about lexical 1082warnings. 1083 1084=head1 BUGS AND CAVEATS 1085 1086=over 4 1087 1088=item $dont_use_nlink 1089 1090You can set the variable C<$File::Find::dont_use_nlink> to 0 if you 1091are sure the filesystem you are scanning reflects the number of 1092subdirectories in the parent directory's C<nlink> count. 1093 1094If you do set C<$File::Find::dont_use_nlink> to 0, you may notice an 1095improvement in speed at the risk of not recursing into subdirectories 1096if a filesystem doesn't populate C<nlink> as expected. 1097 1098C<$File::Find::dont_use_nlink> now defaults to 1 on all platforms. 1099 1100=item symlinks 1101 1102Be aware that the option to follow symbolic links can be dangerous. 1103Depending on the structure of the directory tree (including symbolic 1104links to directories) you might traverse a given (physical) directory 1105more than once (only if C<follow_fast> is in effect). 1106Furthermore, deleting or changing files in a symbolically linked directory 1107might cause very unpleasant surprises, since you delete or change files 1108in an unknown directory. 1109 1110=back 1111 1112=head1 HISTORY 1113 1114File::Find used to produce incorrect results if called recursively. 1115During the development of perl 5.8 this bug was fixed. 1116The first fixed version of File::Find was 1.01. 1117 1118=head1 SEE ALSO 1119 1120L<find(1)>, find2perl. 1121 1122=cut 1123