1package File::Find; 2use 5.006; 3use strict; 4use warnings; 5use warnings::register; 6our $VERSION = '1.37'; 7require Exporter; 8require Cwd; 9 10our @ISA = qw(Exporter); 11our @EXPORT = qw(find finddepth); 12 13 14use strict; 15my $Is_VMS = $^O eq 'VMS'; 16my $Is_Win32 = $^O eq 'MSWin32'; 17 18require File::Basename; 19require File::Spec; 20 21# Should ideally be my() not our() but local() currently 22# refuses to operate on lexicals 23 24our %SLnkSeen; 25our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 26 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 27 $pre_process, $post_process, $dangling_symlinks); 28 29sub contract_name { 30 my ($cdir,$fn) = @_; 31 32 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir; 33 34 $cdir = substr($cdir,0,rindex($cdir,'/')+1); 35 36 $fn =~ s|^\./||; 37 38 my $abs_name= $cdir . $fn; 39 40 if (substr($fn,0,3) eq '../') { 41 1 while $abs_name =~ s!/[^/]*/\.\./+!/!; 42 } 43 44 return $abs_name; 45} 46 47sub PathCombine($$) { 48 my ($Base,$Name) = @_; 49 my $AbsName; 50 51 if (substr($Name,0,1) eq '/') { 52 $AbsName= $Name; 53 } 54 else { 55 $AbsName= contract_name($Base,$Name); 56 } 57 58 # (simple) check for recursion 59 my $newlen= length($AbsName); 60 if ($newlen <= length($Base)) { 61 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/') 62 && $AbsName eq substr($Base,0,$newlen)) 63 { 64 return undef; 65 } 66 } 67 return $AbsName; 68} 69 70sub Follow_SymLink($) { 71 my ($AbsName) = @_; 72 73 my ($NewName,$DEV, $INO); 74 ($DEV, $INO)= lstat $AbsName; 75 76 while (-l _) { 77 if ($SLnkSeen{$DEV, $INO}++) { 78 if ($follow_skip < 2) { 79 die "$AbsName is encountered a second time"; 80 } 81 else { 82 return undef; 83 } 84 } 85 $NewName= PathCombine($AbsName, readlink($AbsName)); 86 unless(defined $NewName) { 87 if ($follow_skip < 2) { 88 die "$AbsName is a recursive symbolic link"; 89 } 90 else { 91 return undef; 92 } 93 } 94 else { 95 $AbsName= $NewName; 96 } 97 ($DEV, $INO) = lstat($AbsName); 98 return undef unless defined $DEV; # dangling symbolic link 99 } 100 101 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) { 102 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) { 103 die "$AbsName encountered a second time"; 104 } 105 else { 106 return undef; 107 } 108 } 109 110 return $AbsName; 111} 112 113our($dir, $name, $fullname, $prune); 114sub _find_dir_symlnk($$$); 115sub _find_dir($$$); 116 117# check whether or not a scalar variable is tainted 118# (code straight from the Camel, 3rd ed., page 561) 119sub is_tainted_pp { 120 my $arg = shift; 121 my $nada = substr($arg, 0, 0); # zero-length 122 local $@; 123 eval { eval "# $nada" }; 124 return length($@) != 0; 125} 126 127sub _find_opt { 128 my $wanted = shift; 129 return unless @_; 130 die "invalid top directory" unless defined $_[0]; 131 132 # This function must local()ize everything because callbacks may 133 # call find() or finddepth() 134 135 local %SLnkSeen; 136 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 137 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 138 $pre_process, $post_process, $dangling_symlinks); 139 local($dir, $name, $fullname, $prune); 140 local *_ = \my $a; 141 142 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd(); 143 if ($Is_VMS) { 144 # VMS returns this by default in VMS format which just doesn't 145 # work for the rest of this module. 146 $cwd = VMS::Filespec::unixpath($cwd); 147 148 # Apparently this is not expected to have a trailing space. 149 # To attempt to make VMS/UNIX conversions mostly reversible, 150 # a trailing slash is needed. The run-time functions ignore the 151 # resulting double slash, but it causes the perl tests to fail. 152 $cwd =~ s#/\z##; 153 154 # This comes up in upper case now, but should be lower. 155 # In the future this could be exact case, no need to change. 156 } 157 my $cwd_untainted = $cwd; 158 my $check_t_cwd = 1; 159 $wanted_callback = $wanted->{wanted}; 160 $bydepth = $wanted->{bydepth}; 161 $pre_process = $wanted->{preprocess}; 162 $post_process = $wanted->{postprocess}; 163 $no_chdir = $wanted->{no_chdir}; 164 $full_check = $Is_Win32 ? 0 : $wanted->{follow}; 165 $follow = $Is_Win32 ? 0 : 166 $full_check || $wanted->{follow_fast}; 167 $follow_skip = $wanted->{follow_skip}; 168 $untaint = $wanted->{untaint}; 169 $untaint_pat = $wanted->{untaint_pattern}; 170 $untaint_skip = $wanted->{untaint_skip}; 171 $dangling_symlinks = $wanted->{dangling_symlinks}; 172 173 # for compatibility reasons (find.pl, find2perl) 174 local our ($topdir, $topdev, $topino, $topmode, $topnlink); 175 176 # a symbolic link to a directory doesn't increase the link count 177 $avoid_nlink = $follow || $File::Find::dont_use_nlink; 178 179 my ($abs_dir, $Is_Dir); 180 181 Proc_Top_Item: 182 foreach my $TOP (@_) { 183 my $top_item = $TOP; 184 $top_item = VMS::Filespec::unixify($top_item) if $Is_VMS; 185 186 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; 187 188 if ($Is_Win32) { 189 $top_item =~ s|[/\\]\z|| 190 unless $top_item =~ m{^(?:\w:)?[/\\]$}; 191 } 192 else { 193 $top_item =~ s|/\z|| unless $top_item eq '/'; 194 } 195 196 $Is_Dir= 0; 197 198 if ($follow) { 199 200 if (substr($top_item,0,1) eq '/') { 201 $abs_dir = $top_item; 202 } 203 elsif ($top_item eq $File::Find::current_dir) { 204 $abs_dir = $cwd; 205 } 206 else { # care about any ../ 207 $top_item =~ s/\.dir\z//i if $Is_VMS; 208 $abs_dir = contract_name("$cwd/",$top_item); 209 } 210 $abs_dir= Follow_SymLink($abs_dir); 211 unless (defined $abs_dir) { 212 if ($dangling_symlinks) { 213 if (ref $dangling_symlinks eq 'CODE') { 214 $dangling_symlinks->($top_item, $cwd); 215 } else { 216 warnings::warnif "$top_item is a dangling symbolic link\n"; 217 } 218 } 219 next Proc_Top_Item; 220 } 221 222 if (-d _) { 223 $top_item =~ s/\.dir\z//i if $Is_VMS; 224 _find_dir_symlnk($wanted, $abs_dir, $top_item); 225 $Is_Dir= 1; 226 } 227 } 228 else { # no follow 229 $topdir = $top_item; 230 unless (defined $topnlink) { 231 warnings::warnif "Can't stat $top_item: $!\n"; 232 next Proc_Top_Item; 233 } 234 if (-d _) { 235 $top_item =~ s/\.dir\z//i if $Is_VMS; 236 _find_dir($wanted, $top_item, $topnlink); 237 $Is_Dir= 1; 238 } 239 else { 240 $abs_dir= $top_item; 241 } 242 } 243 244 unless ($Is_Dir) { 245 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { 246 ($dir,$_) = ('./', $top_item); 247 } 248 249 $abs_dir = $dir; 250 if (( $untaint ) && (is_tainted($dir) )) { 251 ( $abs_dir ) = $dir =~ m|$untaint_pat|; 252 unless (defined $abs_dir) { 253 if ($untaint_skip == 0) { 254 die "directory $dir is still tainted"; 255 } 256 else { 257 next Proc_Top_Item; 258 } 259 } 260 } 261 262 unless ($no_chdir || chdir $abs_dir) { 263 warnings::warnif "Couldn't chdir $abs_dir: $!\n"; 264 next Proc_Top_Item; 265 } 266 267 $name = $abs_dir . $_; # $File::Find::name 268 $_ = $name if $no_chdir; 269 270 { $wanted_callback->() }; # protect against wild "next" 271 272 } 273 274 unless ( $no_chdir ) { 275 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) { 276 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|; 277 unless (defined $cwd_untainted) { 278 die "insecure cwd in find(depth)"; 279 } 280 $check_t_cwd = 0; 281 } 282 unless (chdir $cwd_untainted) { 283 die "Can't cd to $cwd: $!\n"; 284 } 285 } 286 } 287} 288 289# API: 290# $wanted 291# $p_dir : "parent directory" 292# $nlink : what came back from the stat 293# preconditions: 294# chdir (if not no_chdir) to dir 295 296sub _find_dir($$$) { 297 my ($wanted, $p_dir, $nlink) = @_; 298 my ($CdLvl,$Level) = (0,0); 299 my @Stack; 300 my @filenames; 301 my ($subcount,$sub_nlink); 302 my $SE= []; 303 my $dir_name= $p_dir; 304 my $dir_pref; 305 my $dir_rel = $File::Find::current_dir; 306 my $tainted = 0; 307 my $no_nlink; 308 309 if ($Is_Win32) { 310 $dir_pref 311 = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? $p_dir : "$p_dir/" ); 312 } elsif ($Is_VMS) { 313 314 # VMS is returning trailing .dir on directories 315 # and trailing . on files and symbolic links 316 # in UNIX syntax. 317 # 318 319 $p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.'; 320 321 $dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" ); 322 } 323 else { 324 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" ); 325 } 326 327 local ($dir, $name, $prune, *DIR); 328 329 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) { 330 my $udir = $p_dir; 331 if (( $untaint ) && (is_tainted($p_dir) )) { 332 ( $udir ) = $p_dir =~ m|$untaint_pat|; 333 unless (defined $udir) { 334 if ($untaint_skip == 0) { 335 die "directory $p_dir is still tainted"; 336 } 337 else { 338 return; 339 } 340 } 341 } 342 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 343 warnings::warnif "Can't cd to $udir: $!\n"; 344 return; 345 } 346 } 347 348 # push the starting directory 349 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 350 351 while (defined $SE) { 352 unless ($bydepth) { 353 $dir= $p_dir; # $File::Find::dir 354 $name= $dir_name; # $File::Find::name 355 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 356 # prune may happen here 357 $prune= 0; 358 { $wanted_callback->() }; # protect against wild "next" 359 next if $prune; 360 } 361 362 # change to that directory 363 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 364 my $udir= $dir_rel; 365 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) { 366 ( $udir ) = $dir_rel =~ m|$untaint_pat|; 367 unless (defined $udir) { 368 if ($untaint_skip == 0) { 369 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted"; 370 } else { # $untaint_skip == 1 371 next; 372 } 373 } 374 } 375 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 376 warnings::warnif "Can't cd to (" . 377 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n"; 378 next; 379 } 380 $CdLvl++; 381 } 382 383 $dir= $dir_name; # $File::Find::dir 384 385 # Get the list of files in the current directory. 386 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) { 387 warnings::warnif "Can't opendir($dir_name): $!\n"; 388 next; 389 } 390 @filenames = readdir DIR; 391 closedir(DIR); 392 @filenames = $pre_process->(@filenames) if $pre_process; 393 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; 394 395 # default: use whatever was specified 396 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back) 397 $no_nlink = $avoid_nlink; 398 # if dir has wrong nlink count, force switch to slower stat method 399 $no_nlink = 1 if ($nlink < 2); 400 401 if ($nlink == 2 && !$no_nlink) { 402 # This dir has no subdirectories. 403 for my $FN (@filenames) { 404 if ($Is_VMS) { 405 # Big hammer here - Compensate for VMS trailing . and .dir 406 # No win situation until this is changed, but this 407 # will handle the majority of the cases with breaking the fewest 408 409 $FN =~ s/\.dir\z//i; 410 $FN =~ s#\.$## if ($FN ne '.'); 411 } 412 next if $FN =~ $File::Find::skip_pattern; 413 414 $name = $dir_pref . $FN; # $File::Find::name 415 $_ = ($no_chdir ? $name : $FN); # $_ 416 { $wanted_callback->() }; # protect against wild "next" 417 } 418 419 } 420 else { 421 # This dir has subdirectories. 422 $subcount = $nlink - 2; 423 424 # HACK: insert directories at this position, so as to preserve 425 # the user pre-processed ordering of files (thus ensuring 426 # directory traversal is in user sorted order, not at random). 427 my $stack_top = @Stack; 428 429 for my $FN (@filenames) { 430 next if $FN =~ $File::Find::skip_pattern; 431 if ($subcount > 0 || $no_nlink) { 432 # Seen all the subdirs? 433 # check for directoriness. 434 # stat is faster for a file in the current directory 435 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3]; 436 437 if (-d _) { 438 --$subcount; 439 $FN =~ s/\.dir\z//i if $Is_VMS; 440 # HACK: replace push to preserve dir traversal order 441 #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink]; 442 splice @Stack, $stack_top, 0, 443 [$CdLvl,$dir_name,$FN,$sub_nlink]; 444 } 445 else { 446 $name = $dir_pref . $FN; # $File::Find::name 447 $_= ($no_chdir ? $name : $FN); # $_ 448 { $wanted_callback->() }; # protect against wild "next" 449 } 450 } 451 else { 452 $name = $dir_pref . $FN; # $File::Find::name 453 $_= ($no_chdir ? $name : $FN); # $_ 454 { $wanted_callback->() }; # protect against wild "next" 455 } 456 } 457 } 458 } 459 continue { 460 while ( defined ($SE = pop @Stack) ) { 461 ($Level, $p_dir, $dir_rel, $nlink) = @$SE; 462 if ($CdLvl > $Level && !$no_chdir) { 463 my $tmp; 464 if ($Is_VMS) { 465 $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']'; 466 } 467 else { 468 $tmp = join('/',('..') x ($CdLvl-$Level)); 469 } 470 die "Can't cd to $tmp from $dir_name: $!" 471 unless chdir ($tmp); 472 $CdLvl = $Level; 473 } 474 475 if ($Is_Win32) { 476 $dir_name = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} 477 ? "$p_dir$dir_rel" : "$p_dir/$dir_rel"); 478 $dir_pref = "$dir_name/"; 479 } 480 elsif ($^O eq 'VMS') { 481 if ($p_dir =~ m/[\]>]+$/) { 482 $dir_name = $p_dir; 483 $dir_name =~ s/([\]>]+)$/.$dir_rel$1/; 484 $dir_pref = $dir_name; 485 } 486 else { 487 $dir_name = "$p_dir/$dir_rel"; 488 $dir_pref = "$dir_name/"; 489 } 490 } 491 else { 492 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); 493 $dir_pref = "$dir_name/"; 494 } 495 496 if ( $nlink == -2 ) { 497 $name = $dir = $p_dir; # $File::Find::name / dir 498 $_ = $File::Find::current_dir; 499 $post_process->(); # End-of-directory processing 500 } 501 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now 502 $name = $dir_name; 503 if ( substr($name,-2) eq '/.' ) { 504 substr($name, length($name) == 2 ? -1 : -2) = ''; 505 } 506 $dir = $p_dir; 507 $_ = ($no_chdir ? $dir_name : $dir_rel ); 508 if ( substr($_,-2) eq '/.' ) { 509 substr($_, length($_) == 2 ? -1 : -2) = ''; 510 } 511 { $wanted_callback->() }; # protect against wild "next" 512 } 513 else { 514 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 515 last; 516 } 517 } 518 } 519} 520 521 522# API: 523# $wanted 524# $dir_loc : absolute location of a dir 525# $p_dir : "parent directory" 526# preconditions: 527# chdir (if not no_chdir) to dir 528 529sub _find_dir_symlnk($$$) { 530 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory 531 my @Stack; 532 my @filenames; 533 my $new_loc; 534 my $updir_loc = $dir_loc; # untainted parent directory 535 my $SE = []; 536 my $dir_name = $p_dir; 537 my $dir_pref; 538 my $loc_pref; 539 my $dir_rel = $File::Find::current_dir; 540 my $byd_flag; # flag for pending stack entry if $bydepth 541 my $tainted = 0; 542 my $ok = 1; 543 544 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" ); 545 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" ); 546 547 local ($dir, $name, $fullname, $prune, *DIR); 548 549 unless ($no_chdir) { 550 # untaint the topdir 551 if (( $untaint ) && (is_tainted($dir_loc) )) { 552 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted 553 # once untainted, $updir_loc is pushed on the stack (as parent directory); 554 # hence, we don't need to untaint the parent directory every time we chdir 555 # to it later 556 unless (defined $updir_loc) { 557 if ($untaint_skip == 0) { 558 die "directory $dir_loc is still tainted"; 559 } 560 else { 561 return; 562 } 563 } 564 } 565 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir); 566 unless ($ok) { 567 warnings::warnif "Can't cd to $updir_loc: $!\n"; 568 return; 569 } 570 } 571 572 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth; 573 574 while (defined $SE) { 575 576 unless ($bydepth) { 577 # change (back) to parent directory (always untainted) 578 unless ($no_chdir) { 579 unless (chdir $updir_loc) { 580 warnings::warnif "Can't cd to $updir_loc: $!\n"; 581 next; 582 } 583 } 584 $dir= $p_dir; # $File::Find::dir 585 $name= $dir_name; # $File::Find::name 586 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 587 $fullname= $dir_loc; # $File::Find::fullname 588 # prune may happen here 589 $prune= 0; 590 lstat($_); # make sure file tests with '_' work 591 { $wanted_callback->() }; # protect against wild "next" 592 next if $prune; 593 } 594 595 # change to that directory 596 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 597 $updir_loc = $dir_loc; 598 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) { 599 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir 600 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; 601 unless (defined $updir_loc) { 602 if ($untaint_skip == 0) { 603 die "directory $dir_loc is still tainted"; 604 } 605 else { 606 next; 607 } 608 } 609 } 610 unless (chdir $updir_loc) { 611 warnings::warnif "Can't cd to $updir_loc: $!\n"; 612 next; 613 } 614 } 615 616 $dir = $dir_name; # $File::Find::dir 617 618 # Get the list of files in the current directory. 619 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) { 620 warnings::warnif "Can't opendir($dir_loc): $!\n"; 621 next; 622 } 623 @filenames = readdir DIR; 624 closedir(DIR); 625 626 for my $FN (@filenames) { 627 if ($Is_VMS) { 628 # Big hammer here - Compensate for VMS trailing . and .dir 629 # No win situation until this is changed, but this 630 # will handle the majority of the cases with breaking the fewest. 631 632 $FN =~ s/\.dir\z//i; 633 $FN =~ s#\.$## if ($FN ne '.'); 634 } 635 next if $FN =~ $File::Find::skip_pattern; 636 637 # follow symbolic links / do an lstat 638 $new_loc = Follow_SymLink($loc_pref.$FN); 639 640 # ignore if invalid symlink 641 unless (defined $new_loc) { 642 if (!defined -l _ && $dangling_symlinks) { 643 $fullname = undef; 644 if (ref $dangling_symlinks eq 'CODE') { 645 $dangling_symlinks->($FN, $dir_pref); 646 } else { 647 warnings::warnif "$dir_pref$FN is a dangling symbolic link\n"; 648 } 649 } 650 else { 651 $fullname = $loc_pref . $FN; 652 } 653 $name = $dir_pref . $FN; 654 $_ = ($no_chdir ? $name : $FN); 655 { $wanted_callback->() }; 656 next; 657 } 658 659 if (-d _) { 660 if ($Is_VMS) { 661 $FN =~ s/\.dir\z//i; 662 $FN =~ s#\.$## if ($FN ne '.'); 663 $new_loc =~ s/\.dir\z//i; 664 $new_loc =~ s#\.$## if ($new_loc ne '.'); 665 } 666 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1]; 667 } 668 else { 669 $fullname = $new_loc; # $File::Find::fullname 670 $name = $dir_pref . $FN; # $File::Find::name 671 $_ = ($no_chdir ? $name : $FN); # $_ 672 { $wanted_callback->() }; # protect against wild "next" 673 } 674 } 675 676 } 677 continue { 678 while (defined($SE = pop @Stack)) { 679 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE; 680 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); 681 $dir_pref = "$dir_name/"; 682 $loc_pref = "$dir_loc/"; 683 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now 684 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 685 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted 686 warnings::warnif "Can't cd to $updir_loc: $!\n"; 687 next; 688 } 689 } 690 $fullname = $dir_loc; # $File::Find::fullname 691 $name = $dir_name; # $File::Find::name 692 if ( substr($name,-2) eq '/.' ) { 693 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name 694 } 695 $dir = $p_dir; # $File::Find::dir 696 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_ 697 if ( substr($_,-2) eq '/.' ) { 698 substr($_, length($_) == 2 ? -1 : -2) = ''; 699 } 700 701 lstat($_); # make sure file tests with '_' work 702 { $wanted_callback->() }; # protect against wild "next" 703 } 704 else { 705 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth; 706 last; 707 } 708 } 709 } 710} 711 712 713sub wrap_wanted { 714 my $wanted = shift; 715 if ( ref($wanted) eq 'HASH' ) { 716 # RT #122547 717 my %valid_options = map {$_ => 1} qw( 718 wanted 719 bydepth 720 preprocess 721 postprocess 722 follow 723 follow_fast 724 follow_skip 725 dangling_symlinks 726 no_chdir 727 untaint 728 untaint_pattern 729 untaint_skip 730 ); 731 my @invalid_options = (); 732 for my $v (keys %{$wanted}) { 733 push @invalid_options, $v unless exists $valid_options{$v}; 734 } 735 warn "Invalid option(s): @invalid_options" if @invalid_options; 736 737 unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) { 738 die 'no &wanted subroutine given'; 739 } 740 if ( $wanted->{follow} || $wanted->{follow_fast}) { 741 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip}; 742 } 743 if ( $wanted->{untaint} ) { 744 $wanted->{untaint_pattern} = $File::Find::untaint_pattern 745 unless defined $wanted->{untaint_pattern}; 746 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip}; 747 } 748 return $wanted; 749 } 750 elsif( ref( $wanted ) eq 'CODE' ) { 751 return { wanted => $wanted }; 752 } 753 else { 754 die 'no &wanted subroutine given'; 755 } 756} 757 758sub find { 759 my $wanted = shift; 760 _find_opt(wrap_wanted($wanted), @_); 761} 762 763sub finddepth { 764 my $wanted = wrap_wanted(shift); 765 $wanted->{bydepth} = 1; 766 _find_opt($wanted, @_); 767} 768 769# default 770$File::Find::skip_pattern = qr/^\.{1,2}\z/; 771$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|; 772 773# this _should_ work properly on all platforms 774# where File::Find can be expected to work 775$File::Find::current_dir = File::Spec->curdir || '.'; 776 777$File::Find::dont_use_nlink = 1; 778 779# We need a function that checks if a scalar is tainted. Either use the 780# Scalar::Util module's tainted() function or our (slower) pure Perl 781# fallback is_tainted_pp() 782{ 783 local $@; 784 eval { require Scalar::Util }; 785 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted; 786} 787 7881; 789 790__END__ 791 792=head1 NAME 793 794File::Find - Traverse a directory tree. 795 796=head1 SYNOPSIS 797 798 use File::Find; 799 find(\&wanted, @directories_to_search); 800 sub wanted { ... } 801 802 use File::Find; 803 finddepth(\&wanted, @directories_to_search); 804 sub wanted { ... } 805 806 use File::Find; 807 find({ wanted => \&process, follow => 1 }, '.'); 808 809=head1 DESCRIPTION 810 811These are functions for searching through directory trees doing work 812on each file found similar to the Unix I<find> command. File::Find 813exports two functions, C<find> and C<finddepth>. They work similarly 814but have subtle differences. 815 816=over 4 817 818=item B<find> 819 820 find(\&wanted, @directories); 821 find(\%options, @directories); 822 823C<find()> does a depth-first search over the given C<@directories> in 824the order they are given. For each file or directory found, it calls 825the C<&wanted> subroutine. (See below for details on how to use the 826C<&wanted> function). Additionally, for each directory found, it will 827C<chdir()> into that directory and continue the search, invoking the 828C<&wanted> function on each file or subdirectory in the directory. 829 830=item B<finddepth> 831 832 finddepth(\&wanted, @directories); 833 finddepth(\%options, @directories); 834 835C<finddepth()> works just like C<find()> except that it invokes the 836C<&wanted> function for a directory I<after> invoking it for the 837directory's contents. It does a postorder traversal instead of a 838preorder traversal, working from the bottom of the directory tree up 839where C<find()> works from the top of the tree down. 840 841=back 842 843=head2 %options 844 845The first argument to C<find()> is either a code reference to your 846C<&wanted> function, or a hash reference describing the operations 847to be performed for each file. The 848code reference is described in L</The wanted function> below. 849 850Here are the possible keys for the hash: 851 852=over 3 853 854=item C<wanted> 855 856The value should be a code reference. This code reference is 857described in L</The wanted function> below. The C<&wanted> subroutine is 858mandatory. 859 860=item C<bydepth> 861 862Reports the name of a directory only AFTER all its entries 863have been reported. Entry point C<finddepth()> is a shortcut for 864specifying C<< { bydepth => 1 } >> in the first argument of C<find()>. 865 866=item C<preprocess> 867 868The value should be a code reference. This code reference is used to 869preprocess the current directory. The name of the currently processed 870directory is in C<$File::Find::dir>. Your preprocessing function is 871called after C<readdir()>, but before the loop that calls the C<wanted()> 872function. It is called with a list of strings (actually file/directory 873names) and is expected to return a list of strings. The code can be 874used to sort the file/directory names alphabetically, numerically, 875or to filter out directory entries based on their name alone. When 876I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op. 877 878=item C<postprocess> 879 880The value should be a code reference. It is invoked just before leaving 881the currently processed directory. It is called in void context with no 882arguments. The name of the current directory is in C<$File::Find::dir>. This 883hook is handy for summarizing a directory, such as calculating its disk 884usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a 885no-op. 886 887=item C<follow> 888 889Causes symbolic links to be followed. Since directory trees with symbolic 890links (followed) may contain files more than once and may even have 891cycles, a hash has to be built up with an entry for each file. 892This might be expensive both in space and time for a large 893directory tree. See L</follow_fast> and L</follow_skip> below. 894If either I<follow> or I<follow_fast> is in effect: 895 896=over 6 897 898=item * 899 900It is guaranteed that an I<lstat> has been called before the user's 901C<wanted()> function is called. This enables fast file checks involving C<_>. 902Note that this guarantee no longer holds if I<follow> or I<follow_fast> 903are not set. 904 905=item * 906 907There is a variable C<$File::Find::fullname> which holds the absolute 908pathname of the file with all symbolic links resolved. If the link is 909a dangling symbolic link, then fullname will be set to C<undef>. 910 911=back 912 913This is a no-op on Win32. 914 915=item C<follow_fast> 916 917This is similar to I<follow> except that it may report some files more 918than once. It does detect cycles, however. Since only symbolic links 919have to be hashed, this is much cheaper both in space and time. If 920processing a file more than once (by the user's C<wanted()> function) 921is worse than just taking time, the option I<follow> should be used. 922 923This is also a no-op on Win32. 924 925=item C<follow_skip> 926 927C<follow_skip==1>, which is the default, causes all files which are 928neither directories nor symbolic links to be ignored if they are about 929to be processed a second time. If a directory or a symbolic link 930are about to be processed a second time, File::Find dies. 931 932C<follow_skip==0> causes File::Find to die if any file is about to be 933processed a second time. 934 935C<follow_skip==2> causes File::Find to ignore any duplicate files and 936directories but to proceed normally otherwise. 937 938=item C<dangling_symlinks> 939 940Specifies what to do with symbolic links whose target doesn't exist. 941If true and a code reference, will be called with the symbolic link 942name and the directory it lives in as arguments. Otherwise, if true 943and warnings are on, a warning of the form C<"symbolic_link_name is a dangling 944symbolic link\n"> will be issued. If false, the dangling symbolic link 945will be silently ignored. 946 947=item C<no_chdir> 948 949Does not C<chdir()> to each directory as it recurses. The C<wanted()> 950function will need to be aware of this, of course. In this case, 951C<$_> will be the same as C<$File::Find::name>. 952 953=item C<untaint> 954 955If find is used in L<taint-mode|perlsec/Taint mode> (-T command line switch or 956if EUID != UID or if EGID != GID), then internally directory names have to be 957untainted before they can be C<chdir>'d to. Therefore they are checked against 958a regular expression I<untaint_pattern>. Note that all names passed to the 959user's C<wanted()> function are still tainted. If this option is used while not 960in taint-mode, C<untaint> is a no-op. 961 962=item C<untaint_pattern> 963 964See above. This should be set using the C<qr> quoting operator. 965The default is set to C<qr|^([-+@\w./]+)$|>. 966Note that the parentheses are vital. 967 968=item C<untaint_skip> 969 970If set, a directory which fails the I<untaint_pattern> is skipped, 971including all its sub-directories. The default is to C<die> in such a case. 972 973=back 974 975=head2 The wanted function 976 977The C<wanted()> function does whatever verifications you want on 978each file and directory. Note that despite its name, the C<wanted()> 979function is a generic callback function, and does B<not> tell 980File::Find if a file is "wanted" or not. In fact, its return value 981is ignored. 982 983The wanted function takes no arguments but rather does its work 984through a collection of variables. 985 986=over 4 987 988=item C<$File::Find::dir> is the current directory name, 989 990=item C<$_> is the current filename within that directory 991 992=item C<$File::Find::name> is the complete pathname to the file. 993 994=back 995 996The above variables have all been localized and may be changed without 997affecting data outside of the wanted function. 998 999For example, when examining the file F</some/path/foo.ext> you will have: 1000 1001 $File::Find::dir = /some/path/ 1002 $_ = foo.ext 1003 $File::Find::name = /some/path/foo.ext 1004 1005You are chdir()'d to C<$File::Find::dir> when the function is called, 1006unless C<no_chdir> was specified. Note that when changing to 1007directories is in effect, the root directory (F</>) is a somewhat 1008special case inasmuch as the concatenation of C<$File::Find::dir>, 1009C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The 1010table below summarizes all variants: 1011 1012 $File::Find::name $File::Find::dir $_ 1013 default / / . 1014 no_chdir=>0 /etc / etc 1015 /etc/x /etc x 1016 1017 no_chdir=>1 / / / 1018 /etc / /etc 1019 /etc/x /etc /etc/x 1020 1021 1022When C<follow> or C<follow_fast> are in effect, there is 1023also a C<$File::Find::fullname>. The function may set 1024C<$File::Find::prune> to prune the tree unless C<bydepth> was 1025specified. Unless C<follow> or C<follow_fast> is specified, for 1026compatibility reasons (find.pl, find2perl) there are in addition the 1027following globals available: C<$File::Find::topdir>, 1028C<$File::Find::topdev>, C<$File::Find::topino>, 1029C<$File::Find::topmode> and C<$File::Find::topnlink>. 1030 1031This library is useful for the C<find2perl> tool (distributed as part of the 1032App-find2perl CPAN distribution), which when fed, 1033 1034 find2perl / -name .nfs\* -mtime +7 \ 1035 -exec rm -f {} \; -o -fstype nfs -prune 1036 1037produces something like: 1038 1039 sub wanted { 1040 /^\.nfs.*\z/s && 1041 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) && 1042 int(-M _) > 7 && 1043 unlink($_) 1044 || 1045 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) && 1046 $dev < 0 && 1047 ($File::Find::prune = 1); 1048 } 1049 1050Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical 1051filehandle that caches the information from the preceding 1052C<stat()>, C<lstat()>, or filetest. 1053 1054Here's another interesting wanted function. It will find all symbolic 1055links that don't resolve: 1056 1057 sub wanted { 1058 -l && !-e && print "bogus link: $File::Find::name\n"; 1059 } 1060 1061Note that you may mix directories and (non-directory) files in the list of 1062directories to be searched by the C<wanted()> function. 1063 1064 find(\&wanted, "./foo", "./bar", "./baz/epsilon"); 1065 1066In the example above, no file in F<./baz/> other than F<./baz/epsilon> will be 1067evaluated by C<wanted()>. 1068 1069See also the script C<pfind> on CPAN for a nice application of this 1070module. 1071 1072=head1 WARNINGS 1073 1074If you run your program with the C<-w> switch, or if you use the 1075C<warnings> pragma, File::Find will report warnings for several weird 1076situations. You can disable these warnings by putting the statement 1077 1078 no warnings 'File::Find'; 1079 1080in the appropriate scope. See L<warnings> for more info about lexical 1081warnings. 1082 1083=head1 CAVEAT 1084 1085=over 2 1086 1087=item $dont_use_nlink 1088 1089You can set the variable C<$File::Find::dont_use_nlink> to 0 if you 1090are sure the filesystem you are scanning reflects the number of 1091subdirectories in the parent directory's C<nlink> count. 1092 1093If you do set C<$File::Find::dont_use_nlink> to 0, you may notice an 1094improvement in speed at the risk of not recursing into subdirectories 1095if a filesystem doesn't populate C<nlink> as expected. 1096 1097C<$File::Find::dont_use_nlink> now defaults to 1 on all platforms. 1098 1099=item symlinks 1100 1101Be aware that the option to follow symbolic links can be dangerous. 1102Depending on the structure of the directory tree (including symbolic 1103links to directories) you might traverse a given (physical) directory 1104more than once (only if C<follow_fast> is in effect). 1105Furthermore, deleting or changing files in a symbolically linked directory 1106might cause very unpleasant surprises, since you delete or change files 1107in an unknown directory. 1108 1109=back 1110 1111=head1 BUGS AND CAVEATS 1112 1113Despite the name of the C<finddepth()> function, both C<find()> and 1114C<finddepth()> perform a depth-first search of the directory 1115hierarchy. 1116 1117=head1 HISTORY 1118 1119File::Find used to produce incorrect results if called recursively. 1120During the development of perl 5.8 this bug was fixed. 1121The first fixed version of File::Find was 1.01. 1122 1123=head1 SEE ALSO 1124 1125L<find(1)>, find2perl. 1126 1127=cut 1128