1package File::Find; 2use 5.006; 3use strict; 4use warnings; 5use warnings::register; 6our $VERSION = '1.43'; 7use Exporter 'import'; 8require Cwd; 9 10our @EXPORT = qw(find finddepth); 11 12 13use strict; 14my $Is_VMS = $^O eq 'VMS'; 15my $Is_Win32 = $^O eq 'MSWin32'; 16 17require File::Basename; 18require File::Spec; 19 20# Should ideally be my() not our() but local() currently 21# refuses to operate on lexicals 22 23our %SLnkSeen; 24our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 25 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 26 $pre_process, $post_process, $dangling_symlinks); 27 28sub contract_name { 29 my ($cdir,$fn) = @_; 30 31 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir; 32 33 $cdir = substr($cdir,0,rindex($cdir,'/')+1); 34 35 $fn =~ s|^\./||; 36 37 my $abs_name= $cdir . $fn; 38 39 if (substr($fn,0,3) eq '../') { 40 1 while $abs_name =~ s!/[^/]*/\.\./+!/!; 41 } 42 43 return $abs_name; 44} 45 46sub _is_absolute { 47 return $_[0] =~ m|^(?:[A-Za-z]:)?/| if $Is_Win32; 48 return substr($_[0], 0, 1) eq '/'; 49} 50 51sub _is_root { 52 return $_[0] =~ m|^(?:[A-Za-z]:)?/\z| if $Is_Win32; 53 return $_[0] eq '/'; 54} 55 56sub PathCombine($$) { 57 my ($Base,$Name) = @_; 58 my $AbsName; 59 60 if (_is_absolute($Name)) { 61 $AbsName= $Name; 62 } 63 else { 64 $AbsName= contract_name($Base,$Name); 65 } 66 67 # (simple) check for recursion 68 my $newlen= length($AbsName); 69 if ($newlen <= length($Base)) { 70 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/') 71 && $AbsName eq substr($Base,0,$newlen)) 72 { 73 return undef; 74 } 75 } 76 return $AbsName; 77} 78 79sub Follow_SymLink($) { 80 my ($AbsName) = @_; 81 82 my ($NewName,$DEV, $INO); 83 ($DEV, $INO)= lstat $AbsName; 84 85 while (-l _) { 86 if ($SLnkSeen{$DEV, $INO}++) { 87 if ($follow_skip < 2) { 88 die "$AbsName is encountered a second time"; 89 } 90 else { 91 return undef; 92 } 93 } 94 my $Link = readlink($AbsName); 95 # canonicalize directory separators 96 $Link =~ s|\\|/|g if $Is_Win32; 97 $NewName= PathCombine($AbsName, $Link); 98 unless(defined $NewName) { 99 if ($follow_skip < 2) { 100 die "$AbsName is a recursive symbolic link"; 101 } 102 else { 103 return undef; 104 } 105 } 106 else { 107 $AbsName= $NewName; 108 } 109 ($DEV, $INO) = lstat($AbsName); 110 return undef unless defined $DEV; # dangling symbolic link 111 } 112 113 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) { 114 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) { 115 die "$AbsName encountered a second time"; 116 } 117 else { 118 return undef; 119 } 120 } 121 122 return $AbsName; 123} 124 125our($dir, $name, $fullname, $prune); 126sub _find_dir_symlnk($$$); 127sub _find_dir($$$); 128 129# check whether or not a scalar variable is tainted 130# (code straight from the Camel, 3rd ed., page 561) 131sub is_tainted_pp { 132 my $arg = shift; 133 my $nada = substr($arg, 0, 0); # zero-length 134 local $@; 135 eval { eval "# $nada" }; 136 return length($@) != 0; 137} 138 139 140sub _find_opt { 141 my $wanted = shift; 142 return unless @_; 143 die "invalid top directory" unless defined $_[0]; 144 145 # This function must local()ize everything because callbacks may 146 # call find() or finddepth() 147 148 local %SLnkSeen; 149 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 150 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 151 $pre_process, $post_process, $dangling_symlinks); 152 local($dir, $name, $fullname, $prune); 153 local *_ = \my $a; 154 155 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd(); 156 if ($Is_VMS) { 157 # VMS returns this by default in VMS format which just doesn't 158 # work for the rest of this module. 159 $cwd = VMS::Filespec::unixpath($cwd); 160 161 # Apparently this is not expected to have a trailing space. 162 # To attempt to make VMS/UNIX conversions mostly reversible, 163 # a trailing slash is needed. The run-time functions ignore the 164 # resulting double slash, but it causes the perl tests to fail. 165 $cwd =~ s#/\z##; 166 167 # This comes up in upper case now, but should be lower. 168 # In the future this could be exact case, no need to change. 169 } 170 my $cwd_untainted = $cwd; 171 my $check_t_cwd = 1; 172 $wanted_callback = $wanted->{wanted}; 173 $bydepth = $wanted->{bydepth}; 174 $pre_process = $wanted->{preprocess}; 175 $post_process = $wanted->{postprocess}; 176 $no_chdir = $wanted->{no_chdir}; 177 $full_check = $wanted->{follow}; 178 $follow = $full_check || $wanted->{follow_fast}; 179 $follow_skip = $wanted->{follow_skip}; 180 $untaint = $wanted->{untaint}; 181 $untaint_pat = $wanted->{untaint_pattern}; 182 $untaint_skip = $wanted->{untaint_skip}; 183 $dangling_symlinks = $wanted->{dangling_symlinks}; 184 185 # for compatibility reasons (find.pl, find2perl) 186 local our ($topdir, $topdev, $topino, $topmode, $topnlink); 187 188 # a symbolic link to a directory doesn't increase the link count 189 $avoid_nlink = $follow || $File::Find::dont_use_nlink; 190 191 my ($abs_dir, $Is_Dir); 192 193 Proc_Top_Item: 194 foreach my $TOP (@_) { 195 my $top_item = $TOP; 196 $top_item = VMS::Filespec::unixify($top_item) if $Is_VMS; 197 198 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; 199 200 # canonicalize directory separators 201 $top_item =~ s|[/\\]|/|g if $Is_Win32; 202 203 # no trailing / unless path is root 204 $top_item =~ s|/\z|| unless _is_root($top_item); 205 206 $Is_Dir= 0; 207 208 if ($follow) { 209 210 if (_is_absolute($top_item)) { 211 $abs_dir = $top_item; 212 } 213 elsif ($top_item eq $File::Find::current_dir) { 214 $abs_dir = $cwd; 215 } 216 else { # care about any ../ 217 $top_item =~ s/\.dir\z//i if $Is_VMS; 218 $abs_dir = contract_name("$cwd/",$top_item); 219 } 220 $abs_dir= Follow_SymLink($abs_dir); 221 unless (defined $abs_dir) { 222 if ($dangling_symlinks) { 223 if (ref $dangling_symlinks eq 'CODE') { 224 $dangling_symlinks->($top_item, $cwd); 225 } else { 226 warnings::warnif "$top_item is a dangling symbolic link\n"; 227 } 228 } 229 next Proc_Top_Item; 230 } 231 232 if (-d _) { 233 $top_item =~ s/\.dir\z//i if $Is_VMS; 234 _find_dir_symlnk($wanted, $abs_dir, $top_item); 235 $Is_Dir= 1; 236 } 237 } 238 else { # no follow 239 $topdir = $top_item; 240 unless (defined $topnlink) { 241 warnings::warnif "Can't stat $top_item: $!\n"; 242 next Proc_Top_Item; 243 } 244 if (-d _) { 245 $top_item =~ s/\.dir\z//i if $Is_VMS; 246 _find_dir($wanted, $top_item, $topnlink); 247 $Is_Dir= 1; 248 } 249 else { 250 $abs_dir= $top_item; 251 } 252 } 253 254 unless ($Is_Dir) { 255 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { 256 ($dir,$_) = ('./', $top_item); 257 } 258 259 $abs_dir = $dir; 260 if (( $untaint ) && (is_tainted($dir) )) { 261 ( $abs_dir ) = $dir =~ m|$untaint_pat|; 262 unless (defined $abs_dir) { 263 if ($untaint_skip == 0) { 264 die "directory $dir is still tainted"; 265 } 266 else { 267 next Proc_Top_Item; 268 } 269 } 270 } 271 272 unless ($no_chdir || chdir $abs_dir) { 273 warnings::warnif "Couldn't chdir $abs_dir: $!\n"; 274 next Proc_Top_Item; 275 } 276 277 $name = $abs_dir . $_; # $File::Find::name 278 $_ = $name if $no_chdir; 279 280 { $wanted_callback->() }; # protect against wild "next" 281 282 } 283 284 unless ( $no_chdir ) { 285 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) { 286 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|; 287 unless (defined $cwd_untainted) { 288 die "insecure cwd in find(depth)"; 289 } 290 $check_t_cwd = 0; 291 } 292 unless (chdir $cwd_untainted) { 293 die "Can't cd to $cwd: $!\n"; 294 } 295 } 296 } 297} 298 299# API: 300# $wanted 301# $p_dir : "parent directory" 302# $nlink : what came back from the stat 303# preconditions: 304# chdir (if not no_chdir) to dir 305 306sub _find_dir($$$) { 307 my ($wanted, $p_dir, $nlink) = @_; 308 my ($CdLvl,$Level) = (0,0); 309 my @Stack; 310 my @filenames; 311 my ($subcount,$sub_nlink); 312 my $SE= []; 313 my $dir_name= $p_dir; 314 my $dir_pref; 315 my $dir_rel = $File::Find::current_dir; 316 my $tainted = 0; 317 my $no_nlink; 318 319 if ($Is_VMS) { 320 # VMS is returning trailing .dir on directories 321 # and trailing . on files and symbolic links 322 # in UNIX syntax. 323 # 324 325 $p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.'; 326 327 $dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" ); 328 } 329 else { 330 $dir_pref = _is_root($p_dir) ? $p_dir : "$p_dir/"; 331 } 332 333 local ($dir, $name, $prune); 334 335 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) { 336 my $udir = $p_dir; 337 if (( $untaint ) && (is_tainted($p_dir) )) { 338 ( $udir ) = $p_dir =~ m|$untaint_pat|; 339 unless (defined $udir) { 340 if ($untaint_skip == 0) { 341 die "directory $p_dir is still tainted"; 342 } 343 else { 344 return; 345 } 346 } 347 } 348 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 349 warnings::warnif "Can't cd to $udir: $!\n"; 350 return; 351 } 352 } 353 354 # push the starting directory 355 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 356 357 while (defined $SE) { 358 unless ($bydepth) { 359 $dir= $p_dir; # $File::Find::dir 360 $name= $dir_name; # $File::Find::name 361 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 362 # prune may happen here 363 $prune= 0; 364 { $wanted_callback->() }; # protect against wild "next" 365 next if $prune; 366 } 367 368 # change to that directory 369 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 370 my $udir= $dir_rel; 371 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) { 372 ( $udir ) = $dir_rel =~ m|$untaint_pat|; 373 unless (defined $udir) { 374 if ($untaint_skip == 0) { 375 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted"; 376 } else { # $untaint_skip == 1 377 next; 378 } 379 } 380 } 381 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 382 warnings::warnif "Can't cd to (" . 383 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n"; 384 next; 385 } 386 $CdLvl++; 387 } 388 389 $dir= $dir_name; # $File::Find::dir 390 391 # Get the list of files in the current directory. 392 my $dh; 393 unless (opendir $dh, ($no_chdir ? $dir_name : $File::Find::current_dir)) { 394 warnings::warnif "Can't opendir($dir_name): $!\n"; 395 next; 396 } 397 @filenames = readdir $dh; 398 closedir($dh); 399 @filenames = $pre_process->(@filenames) if $pre_process; 400 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; 401 402 # default: use whatever was specified 403 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back) 404 $no_nlink = $avoid_nlink; 405 # if dir has wrong nlink count, force switch to slower stat method 406 $no_nlink = 1 if ($nlink < 2); 407 408 if ($nlink == 2 && !$no_nlink) { 409 # This dir has no subdirectories. 410 for my $FN (@filenames) { 411 if ($Is_VMS) { 412 # Big hammer here - Compensate for VMS trailing . and .dir 413 # No win situation until this is changed, but this 414 # will handle the majority of the cases with breaking the fewest 415 416 $FN =~ s/\.dir\z//i; 417 $FN =~ s#\.$## if ($FN ne '.'); 418 } 419 next if $FN =~ $File::Find::skip_pattern; 420 421 $name = $dir_pref . $FN; # $File::Find::name 422 $_ = ($no_chdir ? $name : $FN); # $_ 423 { $wanted_callback->() }; # protect against wild "next" 424 } 425 426 } 427 else { 428 # This dir has subdirectories. 429 $subcount = $nlink - 2; 430 431 # HACK: insert directories at this position, so as to preserve 432 # the user pre-processed ordering of files (thus ensuring 433 # directory traversal is in user sorted order, not at random). 434 my $stack_top = @Stack; 435 436 for my $FN (@filenames) { 437 next if $FN =~ $File::Find::skip_pattern; 438 if ($subcount > 0 || $no_nlink) { 439 # Seen all the subdirs? 440 # check for directoriness. 441 # stat is faster for a file in the current directory 442 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3]; 443 444 if (-d _) { 445 --$subcount; 446 $FN =~ s/\.dir\z//i if $Is_VMS; 447 # HACK: replace push to preserve dir traversal order 448 #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink]; 449 splice @Stack, $stack_top, 0, 450 [$CdLvl,$dir_name,$FN,$sub_nlink]; 451 } 452 else { 453 $name = $dir_pref . $FN; # $File::Find::name 454 $_= ($no_chdir ? $name : $FN); # $_ 455 { $wanted_callback->() }; # protect against wild "next" 456 } 457 } 458 else { 459 $name = $dir_pref . $FN; # $File::Find::name 460 $_= ($no_chdir ? $name : $FN); # $_ 461 { $wanted_callback->() }; # protect against wild "next" 462 } 463 } 464 } 465 } 466 continue { 467 while ( defined ($SE = pop @Stack) ) { 468 ($Level, $p_dir, $dir_rel, $nlink) = @$SE; 469 if ($CdLvl > $Level && !$no_chdir) { 470 my $tmp; 471 if ($Is_VMS) { 472 $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']'; 473 } 474 else { 475 $tmp = join('/',('..') x ($CdLvl-$Level)); 476 } 477 die "Can't cd to $tmp from $dir_name: $!" 478 unless chdir ($tmp); 479 $CdLvl = $Level; 480 } 481 482 if ($^O eq 'VMS') { 483 if ($p_dir =~ m/[\]>]+$/) { 484 $dir_name = $p_dir; 485 $dir_name =~ s/([\]>]+)$/.$dir_rel$1/; 486 $dir_pref = $dir_name; 487 } 488 else { 489 $dir_name = "$p_dir/$dir_rel"; 490 $dir_pref = "$dir_name/"; 491 } 492 } 493 else { 494 $dir_name = _is_root($p_dir) ? "$p_dir$dir_rel" : "$p_dir/$dir_rel"; 495 $dir_pref = "$dir_name/"; 496 } 497 498 if ( $nlink == -2 ) { 499 $name = $dir = $p_dir; # $File::Find::name / dir 500 $_ = $File::Find::current_dir; 501 $post_process->(); # End-of-directory processing 502 } 503 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now 504 $name = $dir_name; 505 if ( substr($name,-2) eq '/.' ) { 506 substr($name, length($name) == 2 ? -1 : -2) = ''; 507 } 508 $dir = $p_dir; 509 $_ = ($no_chdir ? $dir_name : $dir_rel ); 510 if ( substr($_,-2) eq '/.' ) { 511 substr($_, length($_) == 2 ? -1 : -2) = ''; 512 } 513 { $wanted_callback->() }; # protect against wild "next" 514 } 515 else { 516 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 517 last; 518 } 519 } 520 } 521} 522 523 524# API: 525# $wanted 526# $dir_loc : absolute location of a dir 527# $p_dir : "parent directory" 528# preconditions: 529# chdir (if not no_chdir) to dir 530 531sub _find_dir_symlnk($$$) { 532 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory 533 my @Stack; 534 my @filenames; 535 my $new_loc; 536 my $updir_loc = $dir_loc; # untainted parent directory 537 my $SE = []; 538 my $dir_name = $p_dir; 539 my $dir_pref; 540 my $loc_pref; 541 my $dir_rel = $File::Find::current_dir; 542 my $byd_flag; # flag for pending stack entry if $bydepth 543 my $tainted = 0; 544 my $ok = 1; 545 546 $dir_pref = _is_root($p_dir) ? $p_dir : "$p_dir/"; 547 $loc_pref = _is_root($dir_loc) ? $dir_loc : "$dir_loc/"; 548 549 local ($dir, $name, $fullname, $prune); 550 551 unless ($no_chdir) { 552 # untaint the topdir 553 if (( $untaint ) && (is_tainted($dir_loc) )) { 554 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted 555 # once untainted, $updir_loc is pushed on the stack (as parent directory); 556 # hence, we don't need to untaint the parent directory every time we chdir 557 # to it later 558 unless (defined $updir_loc) { 559 if ($untaint_skip == 0) { 560 die "directory $dir_loc is still tainted"; 561 } 562 else { 563 return; 564 } 565 } 566 } 567 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir); 568 unless ($ok) { 569 warnings::warnif "Can't cd to $updir_loc: $!\n"; 570 return; 571 } 572 } 573 574 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth; 575 576 while (defined $SE) { 577 578 unless ($bydepth) { 579 # change (back) to parent directory (always untainted) 580 unless ($no_chdir) { 581 unless (chdir $updir_loc) { 582 warnings::warnif "Can't cd to $updir_loc: $!\n"; 583 next; 584 } 585 } 586 $dir= $p_dir; # $File::Find::dir 587 $name= $dir_name; # $File::Find::name 588 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 589 $fullname= $dir_loc; # $File::Find::fullname 590 # prune may happen here 591 $prune= 0; 592 lstat($_); # make sure file tests with '_' work 593 { $wanted_callback->() }; # protect against wild "next" 594 next if $prune; 595 } 596 597 # change to that directory 598 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 599 $updir_loc = $dir_loc; 600 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) { 601 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir 602 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; 603 unless (defined $updir_loc) { 604 if ($untaint_skip == 0) { 605 die "directory $dir_loc is still tainted"; 606 } 607 else { 608 next; 609 } 610 } 611 } 612 unless (chdir $updir_loc) { 613 warnings::warnif "Can't cd to $updir_loc: $!\n"; 614 next; 615 } 616 } 617 618 $dir = $dir_name; # $File::Find::dir 619 620 # Get the list of files in the current directory. 621 my $dh; 622 unless (opendir $dh, ($no_chdir ? $dir_loc : $File::Find::current_dir)) { 623 warnings::warnif "Can't opendir($dir_loc): $!\n"; 624 next; 625 } 626 @filenames = readdir $dh; 627 closedir($dh); 628 629 for my $FN (@filenames) { 630 if ($Is_VMS) { 631 # Big hammer here - Compensate for VMS trailing . and .dir 632 # No win situation until this is changed, but this 633 # will handle the majority of the cases with breaking the fewest. 634 635 $FN =~ s/\.dir\z//i; 636 $FN =~ s#\.$## if ($FN ne '.'); 637 } 638 next if $FN =~ $File::Find::skip_pattern; 639 640 # follow symbolic links / do an lstat 641 $new_loc = Follow_SymLink($loc_pref.$FN); 642 643 # ignore if invalid symlink 644 unless (defined $new_loc) { 645 if (!defined -l _ && $dangling_symlinks) { 646 $fullname = undef; 647 if (ref $dangling_symlinks eq 'CODE') { 648 $dangling_symlinks->($FN, $dir_pref); 649 } else { 650 warnings::warnif "$dir_pref$FN is a dangling symbolic link\n"; 651 } 652 } 653 else { 654 $fullname = $loc_pref . $FN; 655 } 656 $name = $dir_pref . $FN; 657 $_ = ($no_chdir ? $name : $FN); 658 { $wanted_callback->() }; 659 next; 660 } 661 662 if (-d _) { 663 if ($Is_VMS) { 664 $FN =~ s/\.dir\z//i; 665 $FN =~ s#\.$## if ($FN ne '.'); 666 $new_loc =~ s/\.dir\z//i; 667 $new_loc =~ s#\.$## if ($new_loc ne '.'); 668 } 669 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1]; 670 } 671 else { 672 $fullname = $new_loc; # $File::Find::fullname 673 $name = $dir_pref . $FN; # $File::Find::name 674 $_ = ($no_chdir ? $name : $FN); # $_ 675 { $wanted_callback->() }; # protect against wild "next" 676 } 677 } 678 679 } 680 continue { 681 while (defined($SE = pop @Stack)) { 682 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE; 683 $dir_name = _is_root($p_dir) ? "$p_dir$dir_rel" : "$p_dir/$dir_rel"; 684 $dir_pref = "$dir_name/"; 685 $loc_pref = "$dir_loc/"; 686 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now 687 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 688 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted 689 warnings::warnif "Can't cd to $updir_loc: $!\n"; 690 next; 691 } 692 } 693 $fullname = $dir_loc; # $File::Find::fullname 694 $name = $dir_name; # $File::Find::name 695 if ( substr($name,-2) eq '/.' ) { 696 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name 697 } 698 $dir = $p_dir; # $File::Find::dir 699 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_ 700 if ( substr($_,-2) eq '/.' ) { 701 substr($_, length($_) == 2 ? -1 : -2) = ''; 702 } 703 704 lstat($_); # make sure file tests with '_' work 705 { $wanted_callback->() }; # protect against wild "next" 706 } 707 else { 708 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth; 709 last; 710 } 711 } 712 } 713} 714 715 716sub wrap_wanted { 717 my $wanted = shift; 718 if ( ref($wanted) eq 'HASH' ) { 719 # RT #122547 720 my %valid_options = map {$_ => 1} qw( 721 wanted 722 bydepth 723 preprocess 724 postprocess 725 follow 726 follow_fast 727 follow_skip 728 dangling_symlinks 729 no_chdir 730 untaint 731 untaint_pattern 732 untaint_skip 733 ); 734 my @invalid_options = (); 735 for my $v (keys %{$wanted}) { 736 push @invalid_options, $v unless exists $valid_options{$v}; 737 } 738 warn "Invalid option(s): @invalid_options" if @invalid_options; 739 740 unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) { 741 die 'no &wanted subroutine given'; 742 } 743 if ( $wanted->{follow} || $wanted->{follow_fast}) { 744 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip}; 745 } 746 if ( $wanted->{untaint} ) { 747 $wanted->{untaint_pattern} = $File::Find::untaint_pattern 748 unless defined $wanted->{untaint_pattern}; 749 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip}; 750 } 751 return $wanted; 752 } 753 elsif( ref( $wanted ) eq 'CODE' ) { 754 return { wanted => $wanted }; 755 } 756 else { 757 die 'no &wanted subroutine given'; 758 } 759} 760 761sub find { 762 my $wanted = shift; 763 _find_opt(wrap_wanted($wanted), @_); 764} 765 766sub finddepth { 767 my $wanted = wrap_wanted(shift); 768 $wanted->{bydepth} = 1; 769 _find_opt($wanted, @_); 770} 771 772# default 773$File::Find::skip_pattern = qr/^\.{1,2}\z/; 774$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|; 775 776# this _should_ work properly on all platforms 777# where File::Find can be expected to work 778$File::Find::current_dir = File::Spec->curdir || '.'; 779 780$File::Find::dont_use_nlink = 1; 781 782# We need a function that checks if a scalar is tainted. Either use the 783# Scalar::Util module's tainted() function or our (slower) pure Perl 784# fallback is_tainted_pp() 785{ 786 local $@; 787 eval { require Scalar::Util }; 788 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted; 789} 790 7911; 792 793__END__ 794 795=head1 NAME 796 797File::Find - Traverse a directory tree. 798 799=head1 SYNOPSIS 800 801 use File::Find; 802 find(\&wanted, @directories_to_search); 803 sub wanted { ... } 804 805 use File::Find; 806 finddepth(\&wanted, @directories_to_search); 807 sub wanted { ... } 808 809 use File::Find; 810 find({ wanted => \&process, follow => 1 }, '.'); 811 812=head1 DESCRIPTION 813 814These are functions for searching through directory trees doing work 815on each file found similar to the Unix I<find> command. File::Find 816exports two functions, C<find> and C<finddepth>. They work similarly 817but have subtle differences. 818 819=over 4 820 821=item B<find> 822 823 find(\&wanted, @directories); 824 find(\%options, @directories); 825 826C<find()> does a depth-first search over the given C<@directories> in 827the order they are given. For each file or directory found, it calls 828the C<&wanted> subroutine. (See below for details on how to use the 829C<&wanted> function). Additionally, for each directory found, it will 830C<chdir()> into that directory and continue the search, invoking the 831C<&wanted> function on each file or subdirectory in the directory. 832 833=item B<finddepth> 834 835 finddepth(\&wanted, @directories); 836 finddepth(\%options, @directories); 837 838C<finddepth()> works just like C<find()> except that it invokes the 839C<&wanted> function for a directory I<after> invoking it for the 840directory's contents. It does a postorder traversal instead of a 841preorder traversal, working from the bottom of the directory tree up 842where C<find()> works from the top of the tree down. 843 844=back 845 846Despite the name of the C<finddepth()> function, both C<find()> and 847C<finddepth()> perform a depth-first search of the directory hierarchy. 848 849=head2 %options 850 851The first argument to C<find()> is either a code reference to your 852C<&wanted> function, or a hash reference describing the operations 853to be performed for each file. The 854code reference is described in L</The wanted function> below. 855 856Here are the possible keys for the hash: 857 858=over 4 859 860=item C<wanted> 861 862The value should be a code reference. This code reference is 863described in L</The wanted function> below. The C<&wanted> subroutine is 864mandatory. 865 866=item C<bydepth> 867 868Reports the name of a directory only AFTER all its entries 869have been reported. Entry point C<finddepth()> is a shortcut for 870specifying C<< { bydepth => 1 } >> in the first argument of C<find()>. 871 872=item C<preprocess> 873 874The value should be a code reference. This code reference is used to 875preprocess the current directory. The name of the currently processed 876directory is in C<$File::Find::dir>. Your preprocessing function is 877called after C<readdir()>, but before the loop that calls the C<wanted()> 878function. It is called with a list of strings (actually file/directory 879names) and is expected to return a list of strings. The code can be 880used to sort the file/directory names alphabetically, numerically, 881or to filter out directory entries based on their name alone. When 882I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op. 883 884=item C<postprocess> 885 886The value should be a code reference. It is invoked just before leaving 887the currently processed directory. It is called in void context with no 888arguments. The name of the current directory is in C<$File::Find::dir>. This 889hook is handy for summarizing a directory, such as calculating its disk 890usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a 891no-op. 892 893=item C<follow> 894 895Causes symbolic links to be followed. Since directory trees with symbolic 896links (followed) may contain files more than once and may even have 897cycles, a hash has to be built up with an entry for each file. 898This might be expensive both in space and time for a large 899directory tree. See L</follow_fast> and L</follow_skip> below. 900If either I<follow> or I<follow_fast> is in effect: 901 902=over 4 903 904=item * 905 906It is guaranteed that an I<lstat> has been called before the user's 907C<wanted()> function is called. This enables fast file checks involving C<_>. 908Note that this guarantee no longer holds if I<follow> or I<follow_fast> 909are not set. 910 911=item * 912 913There is a variable C<$File::Find::fullname> which holds the absolute 914pathname of the file with all symbolic links resolved. If the link is 915a dangling symbolic link, then fullname will be set to C<undef>. 916 917=back 918 919=item C<follow_fast> 920 921This is similar to I<follow> except that it may report some files more 922than once. It does detect cycles, however. Since only symbolic links 923have to be hashed, this is much cheaper both in space and time. If 924processing a file more than once (by the user's C<wanted()> function) 925is worse than just taking time, the option I<follow> should be used. 926 927=item C<follow_skip> 928 929C<follow_skip==1>, which is the default, causes all files which are 930neither directories nor symbolic links to be ignored if they are about 931to be processed a second time. If a directory or a symbolic link 932are about to be processed a second time, File::Find dies. 933 934C<follow_skip==0> causes File::Find to die if any file is about to be 935processed a second time. 936 937C<follow_skip==2> causes File::Find to ignore any duplicate files and 938directories but to proceed normally otherwise. 939 940=item C<dangling_symlinks> 941 942Specifies what to do with symbolic links whose target doesn't exist. 943If true and a code reference, will be called with the symbolic link 944name and the directory it lives in as arguments. Otherwise, if true 945and warnings are on, a warning of the form C<"symbolic_link_name is a dangling 946symbolic link\n"> will be issued. If false, the dangling symbolic link 947will be silently ignored. 948 949=item C<no_chdir> 950 951Does not C<chdir()> to each directory as it recurses. The C<wanted()> 952function will need to be aware of this, of course. In this case, 953C<$_> will be the same as C<$File::Find::name>. 954 955=item C<untaint> 956 957If find is used in L<taint-mode|perlsec/Taint mode> (-T command line switch or 958if EUID != UID or if EGID != GID), then internally directory names have to be 959untainted before they can be C<chdir>'d to. Therefore they are checked against 960a regular expression I<untaint_pattern>. Note that all names passed to the 961user's C<wanted()> function are still tainted. If this option is used while not 962in taint-mode, C<untaint> is a no-op. 963 964=item C<untaint_pattern> 965 966See above. This should be set using the C<qr> quoting operator. 967The default is set to C<qr|^([-+@\w./]+)$|>. 968Note that the parentheses are vital. 969 970=item C<untaint_skip> 971 972If set, a directory which fails the I<untaint_pattern> is skipped, 973including all its sub-directories. The default is to C<die> in such a case. 974 975=back 976 977=head2 The wanted function 978 979The C<wanted()> function does whatever verifications you want on 980each file and directory. Note that despite its name, the C<wanted()> 981function is a generic callback function, and does B<not> tell 982File::Find if a file is "wanted" or not. In fact, its return value 983is ignored. 984 985The wanted function takes no arguments but rather does its work 986through a collection of variables. 987 988=over 4 989 990=item C<$File::Find::dir> is the current directory name, 991 992=item C<$_> is the current filename within that directory 993 994=item C<$File::Find::name> is the complete pathname to the file. 995 996=back 997 998The above variables have all been localized and may be changed without 999affecting data outside of the wanted function. 1000 1001For example, when examining the file F</some/path/foo.ext> you will have: 1002 1003 $File::Find::dir = /some/path/ 1004 $_ = foo.ext 1005 $File::Find::name = /some/path/foo.ext 1006 1007You are chdir()'d to C<$File::Find::dir> when the function is called, 1008unless C<no_chdir> was specified. Note that when changing to 1009directories is in effect, the root directory (F</>) is a somewhat 1010special case inasmuch as the concatenation of C<$File::Find::dir>, 1011C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The 1012table below summarizes all variants: 1013 1014 $File::Find::name $File::Find::dir $_ 1015 default / / . 1016 no_chdir=>0 /etc / etc 1017 /etc/x /etc x 1018 1019 no_chdir=>1 / / / 1020 /etc / /etc 1021 /etc/x /etc /etc/x 1022 1023 1024When C<follow> or C<follow_fast> are in effect, there is 1025also a C<$File::Find::fullname>. The function may set 1026C<$File::Find::prune> to prune the tree unless C<bydepth> was 1027specified. Unless C<follow> or C<follow_fast> is specified, for 1028compatibility reasons (find.pl, find2perl) there are in addition the 1029following globals available: C<$File::Find::topdir>, 1030C<$File::Find::topdev>, C<$File::Find::topino>, 1031C<$File::Find::topmode> and C<$File::Find::topnlink>. 1032 1033This library is useful for the C<find2perl> tool (distributed as part of the 1034App-find2perl CPAN distribution), which when fed, 1035 1036 find2perl / -name .nfs\* -mtime +7 \ 1037 -exec rm -f {} \; -o -fstype nfs -prune 1038 1039produces something like: 1040 1041 sub wanted { 1042 /^\.nfs.*\z/s && 1043 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) && 1044 int(-M _) > 7 && 1045 unlink($_) 1046 || 1047 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) && 1048 $dev < 0 && 1049 ($File::Find::prune = 1); 1050 } 1051 1052Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical 1053filehandle that caches the information from the preceding 1054C<stat()>, C<lstat()>, or filetest. 1055 1056Here's another interesting wanted function. It will find all symbolic 1057links that don't resolve: 1058 1059 sub wanted { 1060 -l && !-e && print "bogus link: $File::Find::name\n"; 1061 } 1062 1063Note that you may mix directories and (non-directory) files in the list of 1064directories to be searched by the C<wanted()> function. 1065 1066 find(\&wanted, "./foo", "./bar", "./baz/epsilon"); 1067 1068In the example above, no file in F<./baz/> other than F<./baz/epsilon> will be 1069evaluated by C<wanted()>. 1070 1071See also the script C<pfind> on CPAN for a nice application of this 1072module. 1073 1074=head1 WARNINGS 1075 1076If you run your program with the C<-w> switch, or if you use the 1077C<warnings> pragma, File::Find will report warnings for several weird 1078situations. You can disable these warnings by putting the statement 1079 1080 no warnings 'File::Find'; 1081 1082in the appropriate scope. See L<warnings> for more info about lexical 1083warnings. 1084 1085=head1 BUGS AND CAVEATS 1086 1087=over 4 1088 1089=item $dont_use_nlink 1090 1091You can set the variable C<$File::Find::dont_use_nlink> to 0 if you 1092are sure the filesystem you are scanning reflects the number of 1093subdirectories in the parent directory's C<nlink> count. 1094 1095If you do set C<$File::Find::dont_use_nlink> to 0, you may notice an 1096improvement in speed at the risk of not recursing into subdirectories 1097if a filesystem doesn't populate C<nlink> as expected. 1098 1099C<$File::Find::dont_use_nlink> now defaults to 1 on all platforms. 1100 1101=item symlinks 1102 1103Be aware that the option to follow symbolic links can be dangerous. 1104Depending on the structure of the directory tree (including symbolic 1105links to directories) you might traverse a given (physical) directory 1106more than once (only if C<follow_fast> is in effect). 1107Furthermore, deleting or changing files in a symbolically linked directory 1108might cause very unpleasant surprises, since you delete or change files 1109in an unknown directory. 1110 1111=back 1112 1113=head1 HISTORY 1114 1115File::Find used to produce incorrect results if called recursively. 1116During the development of perl 5.8 this bug was fixed. 1117The first fixed version of File::Find was 1.01. 1118 1119=head1 SEE ALSO 1120 1121L<find(1)>, find2perl. 1122 1123=cut 1124