1#!/usr/local/bin/perl -w 2'di'; 3'ig00'; 4############################################################################## 5## 6## search 7## 8## Jeffrey Friedl (jfriedl@omron.co.jp), Dec 1994. 9## Copyright 19.... ah hell, just take it. 10## 11## BLURB: 12## A combo of find and grep -- more or less do a 'grep' on a whole 13## directory tree. Fast, with lots of options. Much more powerful than 14## the simple "find ... | xargs grep ....". Has a full man page. 15## Powerfully customizable. 16## 17## This file is big, but mostly comments and man page. 18## 19## See man page for usage info. 20## Return value: 2=error, 1=nothing found, 0=something found. 21## 22 23$version = "950918.5"; 24## 25## "950918.5"; 26## Changed all 'sysread' to 'read' because Linux perl's don't seem 27## to like sysread() 28## 29## "941227.4"; 30## Added -n, -u 31## 32## "941222.3" 33## Added -nice (due to Lionel Cons <Lionel.Cons@cern.ch>) 34## Removed any leading "./" from name. 35## Added default flags for ~/.search, including TTY, -nice, -list, etc. 36## Program name now has path removed when printed in diagnostics. 37## Added simple tilde-expansion to -dir arg. 38## Added -dskip, etc. Fixed -iregex bug. 39## Changed -dir to be additive, adding -ddir. 40## Now screen out devices, pipes, and sockets. 41## More tidying and lots of expanding of the man page 42## 43## 44## "941217.2"; 45## initial release. 46 47$stripped=0; 48 49&init; 50if (exists $ENV{'HOME'}) { 51 $rc_file = join('/', $ENV{'HOME'}, ".search"); 52} 53else { 54 $rc_file = ""; 55} 56 57&check_args; 58 59## Make sure we've got a regex. 60## Don't need one if -find or -showrc was specified. 61$!=2, die "expecting regex arguments.\n" 62 if $FIND_ONLY == 0 && $showrc == 0 && @ARGV == 0; 63 64&prepare_to_search($rc_file); 65 66&import_program if !defined &dodir; ## BIG key to speed. 67 68## do search while there are directories to be done. 69&dodir(shift(@todo)) while @todo; 70 71&clear_message if $VERBOSE && $STDERR_IS_TTY; 72exit($retval); 73############################################################################### 74 75sub init 76{ 77 ## initialize variables that might be reset by command-line args 78 $DOREP=0; ## set true by -dorep (redo multi-hardlink files) 79 $DOREP=1 if $^O eq 'MSWin32'; 80 $DO_SORT=0; ## set by -sort (sort files in a dir before checking) 81 $FIND_ONLY=0; ## set by -find (don't search files) 82 $LIST_ONLY=0; ## set true by -l (list filenames only) 83 $NEWER=0; ## set by -newer, "-mtime -###" 84 $NICE=0; ## set by -nice (print human-readable output) 85 $NOLINKS=0; ## set true by -nolinks (don't follow symlinks) 86 $OLDER=0; ## set by -older, "-mtime ###" 87 $PREPEND_FILENAME=1; ## set false by -h (don't prefix lines with filename) 88 $REPORT_LINENUM=0; ## set true by -n (show line numbers) 89 $VERBOSE=0; ## set to a value by -v, -vv, etc. (verbose messages) 90 $WHY=0; ## set true by -why, -vvv+ (report why skipped) 91 $XDEV=0; ## set true by -xdev (stay on one filesystem) 92 $all=0; ## set true by -all (don't skip many kinds of files) 93 $iflag = ''; ## set to 'i' by -i (ignore case); 94 $norc=0; ## set by -norc (don't load rc file) 95 $showrc=0; ## set by -showrc (show what happens with rc file) 96 $underlineOK=0; ## set true by -u (watch for underline stuff) 97 $words=0; ## set true by -w (match whole-words only) 98 $DELAY=0; ## inter-file delay (seconds) 99 $retval=1; ## will set to 0 if we find anything. 100 101 ## various elements of stat() that we might access 102 $STAT_DEV = 1; 103 $STAT_INODE = 2; 104 $STAT_MTIME = 9; 105 106 $VV_PRINT_COUNT = 50; ## with -vv, print every VV_PRINT_COUNT files, or... 107 $VV_SIZE = 1024*1024; ## ...every VV_SIZE bytes searched 108 $vv_print = $vv_size = 0; ## running totals. 109 110 ## set default options, in case the rc file wants them 111 $opt{'TTY'}= 1 if -t STDOUT; 112 113 ## want to know this for debugging message stuff 114 $STDERR_IS_TTY = -t STDERR ? 1 : 0; 115 $STDERR_SCREWS_STDOUT = ($STDERR_IS_TTY && -t STDOUT) ? 1 : 0; 116 117 $0 =~ s,.*/,,; ## clean up $0 for any diagnostics we'll be printing. 118} 119 120## 121## Check arguments. 122## 123sub check_args 124{ 125 while (@ARGV && $ARGV[0] =~ m/^-/) 126 { 127 $arg = shift(@ARGV); 128 129 if ($arg eq '-version' || ($VERBOSE && $arg eq '-help')) { 130 print qq/Jeffrey's file search, version "$version".\n/; 131 exit(0) unless $arg eq '-help'; 132 } 133 if ($arg eq '-help') { 134 print <<INLINE_LITERAL_TEXT; 135usage: $0 [options] [-e] [PerlRegex ....] 136OPTIONS TELLING *WHERE* TO SEARCH: 137 -dir DIR start search at the named directory (default is current dir). 138 -xdev stay on starting file system. 139 -sort sort the files in each directory before processing. 140 -nolinks don't follow symbolic links. 141OPTIONS TELLING WHICH FILES TO EVEN CONSIDER: 142 -mtime # consider files modified > # days ago (-# for < # days old) 143 -newer FILE consider files modified more recently than FILE (also -older) 144 -name GLOB consider files whose name matches pattern (also -regex). 145 -skip GLOB opposite of -name: identifies files to not consider. 146 -path GLOB like -name, but for files whose whole path is described. 147 -dpath/-dregex/-dskip versions for selecting or pruning directories. 148 -all don't skip any files marked to be skipped by the startup file. 149 -x<SPECIAL> (see manual, and/or try -showrc). 150 -why report why a file isn't checked (also implied by -vvvv). 151OPTIONS TELLING WHAT TO DO WITH FILES THAT WILL BE CONSIDERED: 152 -f | -find just list files (PerlRegex ignored). Default is to grep them. 153 -ff | -ffind Does a faster -find (implies -find -all -dorep) 154OPTIONS CONTROLLING HOW THE SEARCH IS DONE (AND WHAT IS PRINTED): 155 -l | -list only list files with matches, not the lines themselves. 156 -nice | -nnice print more "human readable" output. 157 -n prefix each output line with its line number in the file. 158 -h don't prefix output lines with file name. 159 -u also look "inside" manpage-style underlined text 160 -i do case-insensitive searching. 161 -w match words only (as defined by perl's \\b). 162OTHER OPTIONS: 163 -v, -vv, -vvv various levels of message verbosity. 164 -e end of options (in case a regex looks like an option). 165 -showrc show what the rc file sets, then exit. 166 -norc don't load the rc file. 167 -dorep check files with multiple hard links multiple times. 168INLINE_LITERAL_TEXT 169 print "Use -v -help for more verbose help.\n" unless $VERBOSE; 170 print "This script file is also a man page.\n" unless $stripped; 171 print <<INLINE_LITERAL_TEXT if $VERBOSE; 172 173If -f (or -find) given, PerlRegex is optional and ignored. 174Otherwise, will search for files with lines matching any of the given regexes. 175 176Combining things like -name and -mtime implies boolean AND. 177However, duplicating things (such as -name '*.c' -name '*.txt') implies OR. 178 179-mtime may be given floating point (i.e. 1.5 is a day and a half). 180-iskip/-idskip/-ipath/... etc are case-insensitive versions. 181 182If any letter in -newer/-older is upper case, "or equal" is 183inserted into the test. 184 185INLINE_LITERAL_TEXT 186 exit(0); 187 } 188 $DOREP=1, next if $arg eq '-dorep'; ## do repeats 189 $DO_SORT=1, next if $arg eq '-sort'; ## sort files 190 $NOLINKS=1, next if $arg eq '-nolinks'; ## no sym. links 191 $PREPEND_FILENAME=0, next if $arg eq '-h'; ## no filename prefix 192 $REPORT_LINENUM=1, next if $arg eq '-n'; ## show line numbers 193 $WHY=1, next if $arg eq '-why'; ## tell why skipped 194 $XDEV=1, next if $arg eq '-xdev'; ## don't leave F.S. 195 $all=1,$opt{'-all'}=1,next if $arg eq '-all'; ## don't skip *.Z, etc 196 $iflag='i', next if $arg eq '-i'; ## ignore case 197 $norc=1, next if $arg eq '-norc'; ## don't load rc file 198 $showrc=1, next if $arg eq '-showrc'; ## show rc file 199 $underlineOK=1, next if $arg eq '-u'; ## look through underln. 200 $words=1, next if $arg eq '-w'; ## match "words" only 201 &strip if $arg eq '-strip'; ## dump this program 202 last if $arg eq '-e'; 203 $DELAY=$1, next if $arg =~ m/-delay(\d+)/; 204 205 $FIND_ONLY=1, next if $arg =~/^-f(ind)?$/;## do "find" only 206 207 $FIND_ONLY=1, $DOREP=1, $all=1, 208 next if $arg =~/^-ff(ind)?$/;## fast -find 209 $LIST_ONLY=1,$opt{'-list'}=1, 210 next if $arg =~/^-l(ist)?$/;## only list files 211 212 if ($arg =~ m/^-(v+)$/) { ## verbosity 213 $VERBOSE =length($1); 214 foreach $len (1..$VERBOSE) { $opt{'-'.('v' x $len)}=1 } 215 next; 216 } 217 if ($arg =~ m/^-(n+)ice$/) { ## "nice" output 218 $NICE =length($1); 219 foreach $len (1..$NICE) { $opt{'-'.('n' x $len).'ice'}=1 } 220 next; 221 } 222 223 if ($arg =~ m/^-(i?)(d?)skip$/) { 224 local($i) = $1 eq 'i'; 225 local($d) = $2 eq 'd'; 226 $! = 2, die qq/$0: expecting glob arg to -$arg\n/ unless @ARGV; 227 foreach (split(/\s+/, shift @ARGV)) { 228 if ($d) { 229 $idskip{$_}=1 if $i; 230 $dskip{$_}=1; 231 } else { 232 $iskip{$_}=1 if $i; 233 $skip{$_}=1; 234 } 235 } 236 next; 237 } 238 239 240 if ($arg =~ m/^-(i?)(d?)(regex|path|name)$/) { 241 local($i) = $1 eq 'i'; 242 $! = 2, die qq/$0: expecting arg to -$arg\n/ unless @ARGV; 243 foreach (split(/\s+/, shift @ARGV)) { 244 $iname{join(',', $arg, $_)}=1 if $i; 245 $name{join(',', $arg, $_)}=1; 246 } 247 next; 248 } 249 250 if ($arg =~ m/^-d?dir$/) { 251 $opt{'-dir'}=1; 252 $! = 2, die qq/$0: expecting filename arg to -$arg\n/ unless @ARGV; 253 $start = shift(@ARGV); 254 $start =~ s#^~(/+|$)#$ENV{'HOME'}$1# if defined $ENV{'HOME'}; 255 $! = 2, die qq/$0: can't find ${arg}'s "$start"\n/ unless -e $start; 256 $! = 2, die qq/$0: ${arg}'s "$start" not a directory.\n/ unless -d _; 257 undef(@todo), $opt{'-ddir'}=1 if $arg eq '-ddir'; 258 push(@todo, $start); 259 next; 260 } 261 262 if ($arg =~ m/^-(new|old)er$/i) { 263 $! = 2, die "$0: expecting filename arg to -$arg\n" unless @ARGV; 264 local($file, $time) = shift(@ARGV); 265 $! = 2, die qq/$0: can't stat -${arg}'s "$file"./ 266 unless $time = (stat($file))[$STAT_MTIME]; 267 local($upper) = $arg =~ tr/A-Z//; 268 if ($arg =~ m/new/i) { 269 $time++ unless $upper; 270 $NEWER = $time if $NEWER < $time; 271 } else { 272 $time-- unless $upper; 273 $OLDER = $time if $OLDER == 0 || $OLDER > $time; 274 } 275 next; 276 } 277 278 if ($arg =~ m/-mtime/) { 279 $! = 2, die "$0: expecting numerical arg to -$arg\n" unless @ARGV; 280 local($days) = shift(@ARGV); 281 $! = 2, die qq/$0: inappropriate arg ($days) to $arg\n/ if $days==0; 282 $days *= 3600 * 24; 283 if ($days < 0) { 284 local($time) = $^T + $days; 285 $NEWER = $time if $NEWER < $time; 286 } else { 287 local($time) = $^T - $days; 288 $OLDER = $time if $OLDER == 0 || $OLDER > $time; 289 } 290 next; 291 } 292 293 ## special user options 294 if ($arg =~ m/^-x(.+)/) { 295 foreach (split(/[\s,]+/, $1)) { $user_opt{$_} = $opt{$_}= 1; } 296 next; 297 } 298 299 $! = 2, die "$0: unknown arg [$arg]\n"; 300 } 301} 302 303## 304## Given a filename glob, return a regex. 305## If the glob has no globbing chars (no * ? or [..]), then 306## prepend an effective '*' to it. 307## 308sub glob_to_regex 309{ 310 local($glob) = @_; 311 local(@parts) = $glob =~ m/\\.|[*?]|\[]?[^]]*]|[^[\\*?]+/g; 312 local($trueglob)=0; 313 foreach (@parts) { 314 if ($_ eq '*' || $_ eq '?') { 315 $_ = ".$_"; 316 $trueglob=1; ## * and ? are a real glob 317 } elsif (substr($_, 0, 1) eq '[') { 318 $trueglob=1; ## [..] is a real glob 319 } else { 320 s/^\\//; ## remove any leading backslash; 321 s/\W/\\$&/g; ## now quote anything dangerous; 322 } 323 } 324 unshift(@parts, '.*') unless $trueglob; 325 join('', '^', @parts, '$'); 326} 327 328sub prepare_to_search 329{ 330 local($rc_file) = @_; 331 332 $HEADER_BYTES=0; ## Might be set nonzero in &read_rc; 333 $last_message_length = 0; ## For &message and &clear_message. 334 335 &read_rc($rc_file, $showrc) unless $norc; 336 exit(0) if $showrc; 337 338 $NEXT_DIR_ENTRY = $DO_SORT ? 'shift @files' : 'readdir(DIR)'; 339 $WHY = 1 if $VERBOSE > 3; ## Arg -vvvv or above implies -why. 340 @todo = ('.') if @todo == 0; ## Where we'll start looking 341 342 ## see if any user options were specified that weren't accounted for 343 foreach $opt (keys %user_opt) { 344 next if defined $seen_opt{$opt}; 345 warn "warning: -x$opt never considered.\n"; 346 } 347 348 die "$0: multiple time constraints exclude all possible files.\n" 349 if ($NEWER && $OLDER) && ($NEWER > $OLDER); 350 351 ## 352 ## Process any -skip/-iskip args that had been given 353 ## 354 local(@skip_test); 355 foreach $glob (keys %skip) { 356 $i = defined($iskip{$glob}) ? 'i': ''; 357 push(@skip_test, '$name =~ m/'. &glob_to_regex($glob). "/$i"); 358 } 359 if (@skip_test) { 360 $SKIP_TEST = join('||',@skip_test); 361 $DO_SKIP_TEST = 1; 362 } else { 363 $DO_SKIP_TEST = $SKIP_TEST = 0; 364 } 365 366 ## 367 ## Process any -dskip/-idskip args that had been given 368 ## 369 local(@dskip_test); 370 foreach $glob (keys %dskip) { 371 $i = defined($idskip{$glob}) ? 'i': ''; 372 push(@dskip_test, '$name =~ m/'. &glob_to_regex($glob). "/$i"); 373 } 374 if (@dskip_test) { 375 $DSKIP_TEST = join('||',@dskip_test); 376 $DO_DSKIP_TEST = 1; 377 } else { 378 $DO_DSKIP_TEST = $DSKIP_TEST = 0; 379 } 380 381 382 ## 383 ## Process any -name, -path, -regex, etc. args that had been given. 384 ## 385 undef @name_test; 386 undef @dname_test; 387 foreach $key (keys %name) { 388 local($type, $pat) = split(/,/, $key, 2); 389 local($i) = defined($iname{$key}) ? 'i' : ''; 390 if ($type =~ /regex/) { 391 $pat =~ s/!/\\!/g; 392 $test = "\$name =~ m!^$pat\$!$i"; 393 } else { 394 local($var) = $type eq 'name' ? '$name' : '$file'; 395 $test = "$var =~ m/". &glob_to_regex($pat). "/$i"; 396 } 397 if ($type =~ m/^-i?d/) { 398 push(@dname_test, $test); 399 } else { 400 push(@name_test, $test); 401 } 402 } 403 if (@name_test) { 404 $GLOB_TESTS = join('||', @name_test); 405 406 $DO_GLOB_TESTS = 1; 407 } else { 408 $GLOB_TESTS = $DO_GLOB_TESTS = 0; 409 } 410 if (@dname_test) { 411 $DGLOB_TESTS = join('||', @dname_test); 412 $DO_DGLOB_TESTS = 1; 413 } else { 414 $DGLOB_TESTS = $DO_DGLOB_TESTS = 0; 415 } 416 417 418 ## 419 ## Process any 'magic' things from the startup file. 420 ## 421 if (@magic_tests && $HEADER_BYTES) { 422 ## the $magic' one is for when &dodir is not inlined 423 $tests = join('||',@magic_tests); 424 $MAGIC_TESTS = " { package magic; \$val = ($tests) }"; 425 $DO_MAGIC_TESTS = 1; 426 } else { 427 $MAGIC_TESTS = 1; 428 $DO_MAGIC_TESTS = 0; 429 } 430 431 ## 432 ## Prepare regular expressions. 433 ## 434 { 435 local(@regex_tests); 436 437 if ($LIST_ONLY) { 438 $mflag = ''; 439 ## need to have $* set, but perl5 just won''t shut up about it. 440 if ($] >= 5) { 441 $mflag = 'm'; 442 } else { 443 eval ' $* = 1 '; 444 } 445 } 446 447 ## 448 ## Until I figure out a better way to deal with it, 449 ## We have to worry about a regex like [^xyz] when doing $LIST_ONLY. 450 ## Such a regex *will* match \n, and if I'm pulling in multiple 451 ## lines, it can allow lines to match that would otherwise not match. 452 ## 453 ## Therefore, if there is a '[^' in a regex, we can NOT take a chance 454 ## an use the fast listonly. 455 ## 456 $CAN_USE_FAST_LISTONLY = $LIST_ONLY; 457 458 local(@extra); 459 local($underline_glue) = ($] >= 5) ? '(:?_\cH)?' : '(_\cH)?'; 460 while (@ARGV) { 461 $regex = shift(@ARGV); 462 ## 463 ## If watching for underlined things too, add another regex. 464 ## 465 if ($underlineOK) { 466 if ($regex =~ m/[?*+{}()\\.|^\$[]/) { 467 warn "$0: warning, can't underline-safe '$regex'.\n"; 468 } else { 469 $regex = join($underline_glue, split(//, $regex)); 470 } 471 } 472 473 ## If nothing special in the regex, just use index... 474 ## is quite a bit faster. 475 if (($iflag eq '') && ($words == 0) && 476 $regex !~ m/[?*+{}()\\.|^\$[]/) 477 { 478 push(@regex_tests, "(index(\$_, q+$regex+)>=0)"); 479 480 } else { 481 $regex =~ s#[\$\@\/]\w#\\$&#; 482 if ($words) { 483 if ($regex =~ m/\|/) { 484 ## could be dangerous -- see if we can wrap in parens. 485 if ($regex =~ m/\\\d/) { 486 warn "warning: -w and a | in a regex is dangerous.\n" 487 } else { 488 $regex = join($regex, '(', ')'); 489 } 490 } 491 $regex = join($regex, '\b', '\b'); 492 } 493 $CAN_USE_FAST_LISTONLY = 0 if substr($regex, "[^") >= 0; 494 push(@regex_tests, "m/$regex/$iflag$mflag"); 495 } 496 497 ## If we're done, but still have @extra to do, get set for that. 498 if (@ARGV == 0 && @extra) { 499 @ARGV = @extra; ## now deal with the extra stuff. 500 $underlineOK = 0; ## but no more of this. 501 undef @extra; ## or this. 502 } 503 } 504 if (@regex_tests) { 505 $REGEX_TEST = join('||', @regex_tests); 506 ## print STDERR $REGEX_TEST, "\n"; exit; 507 } else { 508 ## must be doing -find -- just give something syntactically correct. 509 $REGEX_TEST = 1; 510 } 511 } 512 513 ## 514 ## Make sure we can read the first item(s). 515 ## 516 foreach $start (@todo) { 517 $! = 2, die qq/$0: can't stat "$start"\n/ 518 unless ($dev,$inode) = (stat($start))[$STAT_DEV,$STAT_INODE]; 519 520 if (defined $dir_done{"$dev,$inode"}) { 521 ## ignore the repeat. 522 warn(qq/ignoring "$start" (same as "$dir_done{"$dev,$inode"}").\n/) 523 if $VERBOSE; 524 next; 525 } 526 527 ## if -xdev was given, remember the device. 528 $xdev{$dev} = 1 if $XDEV; 529 530 ## Note that we won't want to do it again 531 $dir_done{"$dev,$inode"} = $start; 532 } 533} 534 535 536## 537## See the comment above the __END__ above the 'sub dodir' below. 538## 539sub import_program 540{ 541 sub bad { 542 print STDERR "$0: internal error (@_)\n"; 543 exit 2; 544 } 545 546 ## Read from data, up to next __END__. This will be &dodir. 547 local($/) = "\n__END__"; 548 $prog = <DATA>; 549 close(DATA); 550 551 $prog =~ s/\beval\b//g; ## remove any 'eval' 552 553 ## Inline uppercase $-variables by their current values. 554 if ($] >= 5) { 555 $prog =~ s/\$([A-Z][A-Z0-9_]{2,}\b)/ 556 &bad($1) if !defined ${$main::{$1}}; ${$main::{$1}};/eg; 557 } else { 558 $prog =~ s/\$([A-Z][A-Z0-9_]{2,}\b)/local(*VAR) = $_main{$1}; 559 &bad($1) if !defined $VAR; $VAR;/eg; 560 } 561 562 eval $prog; ## now do it. This will define &dodir; 563 $!=2, die "$0 internal error: $@\n" if $@; 564} 565 566########################################################################### 567 568## 569## Read the .search file: 570## Blank lines and lines that are only #-comments ignored. 571## Newlines may be escaped to create long lines 572## Other lines are directives. 573## 574## A directive may begin with an optional tag in the form <...> 575## Things inside the <...> are evaluated as with: 576## <(this || that) && must> 577## will be true if 578## -xmust -xthis or -xmust -xthat 579## were specified on the command line (order doesn't matter, though) 580## A directive is not done if there is a tag and it's false. 581## Any characters but whitespace and &|()>,! may appear after an -x 582## (although "-xdev" is special). -xmust,this is the same as -xmust -xthis. 583## Something like -x~ would make <~> true, and <!~> false. 584## 585## Directives are in the form: 586## option: STRING 587## magic : NUMBYTES : EXPR 588## 589## With option: 590## The STRING is parsed like a Bourne shell command line, and the 591## options are used as if given on the command line. 592## No comments are allowed on 'option' lines. 593## Examples: 594## # skip objects and libraries 595## option: -skip '.o .a' 596## # skip emacs *~ and *# files, unless -x~ given: 597## <!~> option: -skip '~ #' 598## 599## With magic: 600## EXPR can be pretty much any perl (comments allowed!). 601## If it evaluates to true for any particular file, it is skipped. 602## The only info you'll have about a file is the variable $H, which 603## will have at least the first NUMBYTES of the file (less if the file 604## is shorter than that, of course, and maybe more). You'll also have 605## any variables you set in previous 'magic' lines. 606## Examples: 607## magic: 6 : ($x6 = substr($H, 0, 6)) eq 'GIF87a' 608## magic: 6 : $x6 eq 'GIF89a' 609## 610## magic: 6 : (($x6 = substr($H, 0, 6)) eq 'GIF87a' ## old gif \ 611## || $x6 eq 'GIF89a' ## new gif 612## (the above two sets are the same) 613## ## Check the first 32 bytes for "binarish" looking bytes. 614## ## Don't blindly dump on any high-bit set, as non-ASCII text 615## ## often has them set. \x80 and \xff seem to be special, though. 616## ## Require two in a row to not get things like perl's $^T. 617## ## This is known to get *.Z, *.gz, pkzip, *.elc and about any 618## ## executable you'll find. 619## magic: 32 : $H =~ m/[\x00-\x06\x10-\x1a\x1c-\x1f\x80\xff]{2}/ 620## 621sub read_rc 622{ 623 local($file, $show) = @_; 624 local($line_num, $ln, $tag) = 0; 625 local($use_default, @default) = 0; 626 627 { package magic; $^W= 0; } ## turn off warnings for when we run EXPR's 628 629 unless (open(RC, '<', $file)) { 630 $use_default=1; 631 $file = "<internal default startup file>"; 632 ## no RC file -- use this default. 633 @default = split(/\n/,<<'--------INLINE_LITERAL_TEXT'); 634 magic: 32 : $H =~ m/[\x00-\x06\x10-\x1a\x1c-\x1f\x80\xff]{2}/ 635 option: -skip '.a .elc .gz .o .pbm .xbm .dvi' 636 option: -iskip '.com .exe .lib .pdb .tarz .zip .z .lzh .jpg .jpeg .gif .uu' 637 <!~> option: -skip '~ #' 638--------INLINE_LITERAL_TEXT 639 } 640 641 ## 642 ## Make an eval error pretty. 643 ## 644 sub clean_eval_error { 645 local($_) = @_; 646 s/ in file \(eval\) at line \d+,//g; ## perl4-style error 647 s/ at \(eval \d+\) line \d+,//g; ## perl5-style error 648 $_ = $` if m/\n/; ## remove all but first line 649 "$_\n"; 650 } 651 652 print "reading RC file: $file\n" if $show; 653 654 while (defined($_ = ($use_default ? shift(@default) : <RC>))) { 655 $ln = ++$line_num; ## note starting line num. 656 $_ .= <RC>, $line_num++ while s/\\\n?$/\n/; ## allow continuations 657 next if /^\s*(#.*)?$/; ## skip blank or comment-only lines. 658 $do = ''; 659 660 ## look for an initial <...> tag. 661 if (s/^\s*<([^>]*)>//) { 662 ## This simple s// will make the tag ready to eval. 663 ($tag = $msg = $1) =~ 664 s/[^\s&|(!)]+/ 665 $seen_opt{$&}=1; ## note seen option 666 "defined(\$opt{q>$&>})" ## (q>> is safe quoting here) 667 /eg; 668 669 ## see if the tag is true or not, abort this line if not. 670 $dothis = (eval $tag); 671 $!=2, die "$file $ln <$msg>: $_".&clean_eval_error($@) if $@; 672 673 if ($show) { 674 $msg =~ s/[^\s&|(!)]+/-x$&/; 675 $msg =~ s/\s*!\s*/ no /g; 676 $msg =~ s/\s*&&\s*/ and /g; 677 $msg =~ s/\s*\|\|\s*/ or /g; 678 $msg =~ s/^\s+//; $msg =~ s/\s+$//; 679 $do = $dothis ? "(doing because $msg)" : 680 "(do if $msg)"; 681 } elsif (!$dothis) { 682 next; 683 } 684 } 685 686 if (m/^\s*option\s*:\s*/) { 687 next if $all && !$show; ## -all turns off these checks; 688 local($_) = $'; 689 s/\n$//; 690 local($orig) = $_; 691 print " $do option: $_\n" if $show; 692 local($0) = "$0 ($file)"; ## for any error message. 693 local(@ARGV); 694 local($this); 695 ## 696 ## Parse $_ as a Bourne shell line -- fill @ARGV 697 ## 698 while (length) { 699 if (s/^\s+//) { 700 push(@ARGV, $this) if defined $this; 701 undef $this; 702 next; 703 } 704 $this = '' if !defined $this; 705 $this .= $1 while s/^'([^']*)'// || 706 s/^"([^"]*)"// || 707 s/^([^'"\s\\]+)//|| 708 s/^(\\[\D\d])//; 709 die "$file $ln: error parsing $orig at $_\n" if m/^\S/; 710 } 711 push(@ARGV, $this) if defined $this; 712 &check_args; 713 die qq/$file $ln: unused arg "@ARGV".\n/ if @ARGV; 714 next; 715 } 716 717 if (m/^\s*magic\s*:\s*(\d+)\s*:\s*/) { 718 next if $all && !$show; ## -all turns off these checks; 719 local($bytes, $check) = ($1, $'); 720 721 if ($show) { 722 $check =~ s/\n?$/\n/; 723 print " $do contents: $check"; 724 } 725 ## Check to make sure the thing at least compiles. 726 eval "package magic; (\$H = '1'x \$main'bytes) && (\n$check\n)\n"; 727 $! = 2, die "$file $ln: ".&clean_eval_error($@) if $@; 728 729 $HEADER_BYTES = $bytes if $bytes > $HEADER_BYTES; 730 push(@magic_tests, "(\n$check\n)"); 731 next; 732 } 733 $! = 2, die "$file $ln: unknown command\n"; 734 } 735 close(RC); 736} 737 738sub message 739{ 740 if (!$STDERR_IS_TTY) { 741 print STDERR $_[0], "\n"; 742 } else { 743 local($text) = @_; 744 $thislength = length($text); 745 if ($thislength >= $last_message_length) { 746 print STDERR $text, "\r"; 747 } else { 748 print STDERR $text, ' 'x ($last_message_length-$thislength),"\r"; 749 } 750 $last_message_length = $thislength; 751 } 752} 753 754sub clear_message 755{ 756 print STDERR ' ' x $last_message_length, "\r" if $last_message_length; 757 $vv_print = $vv_size = $last_message_length = 0; 758} 759 760## 761## Output a copy of this program with comments, extra whitespace, and 762## the trailing man page removed. On an ultra slow machine, such a copy 763## might load faster (but I can't tell any difference on my machine). 764## 765sub strip { 766 seek(DATA, 0, 0) || die "$0: can't reset internal pointer.\n"; 767 while(<DATA>) { 768 print, next if /INLINE_LITERAL_TEXT/.../INLINE_LITERAL_TEXT/; 769 ## must mention INLINE_LITERAL_TEXT on this line! 770 s/\#\#.*|^\s+|\s+$//; ## remove cruft 771 last if $_ eq '.00;'; 772 next if ($_ eq '') || ($_ eq "'di'") || ($_ eq "'ig00'"); 773 s/\$stripped=0;/\$stripped=1;/; 774 s/\s\s+/ /; ## squish multiple whitespaces down to one. 775 print $_, "\n"; 776 } 777 exit(0); 778} 779 780## 781## Just to shut up -w. Never executed. 782## 783sub dummy { 784 785 1 || &dummy || &dir_done || &bad || &message || $NEXT_DIR_ENTRY || 786 $DELAY || $VV_SIZE || $VV_PRINT_COUNT || $STDERR_SCREWS_STDOUT || 787 @files || @files || $magic'H || $magic'H || $xdev{''} || &clear_message; 788 789} 790 791## 792## If the following __END__ is in place, what follows will be 793## inlined when the program first starts up. Any $ variable name 794## all in upper case, specifically, any string matching 795## \$([A-Z][A-Z0-9_]{2,}\b 796## will have the true value for that variable inlined. Also, any 'eval' is 797## removed 798## 799## The idea is that when the whole thing is then eval'ed to define &dodir, 800## the perl optimizer will make all the decisions that are based upon 801## command-line options (such as $VERBOSE), since they'll be inlined as 802## constants 803## 804## Also, and here's the big win, the tests for matching the regex, and a 805## few others, are all inlined. Should be blinding speed here. 806## 807## See the read from <DATA> above for where all this takes place. 808## But all-in-all, you *want* the __END__ here. Comment it out only for 809## debugging.... 810## 811 812__END__ 813 814## 815## Given a directory, check all "appropriate" files in it. 816## Shove any subdirectories into the global @todo, so they'll be done 817## later. 818## 819## Be careful about adding any upper-case variables, as they are subject 820## to being inlined. See comments above the __END__ above. 821## 822sub dodir 823{ 824 local($dir) = @_; 825 $dir =~ s,/+$,,; ## remove any trailing slash. 826 unless (opendir(DIR, "$dir/.")) { 827 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; 828 warn qq($0: can't opendir "$dir/".\n); 829 return; 830 } 831 832 if ($VERBOSE) { 833 &message($dir); 834 $vv_print = $vv_size = 0; 835 } 836 837 @files = sort readdir(DIR) if $DO_SORT; 838 839 while (defined($name = eval $NEXT_DIR_ENTRY)) 840 { 841 next if $name eq '.' || $name eq '..'; ## never follow these. 842 843 ## create full relative pathname. 844 $file = $dir eq '.' ? $name : "$dir/$name"; 845 846 ## if link and skipping them, do so. 847 if ($NOLINKS && -l $file) { 848 warn qq/skip (symlink): $file\n/ if $WHY; 849 next; 850 } 851 852 ## skip things unless files or directories 853 unless (-f $file || -d _) { 854 if ($WHY) { 855 $why = (-S _ && "socket") || 856 (-p _ && "pipe") || 857 (-b _ && "block special")|| 858 (-c _ && "char special") || "somekinda special"; 859 warn qq/skip ($why): $file\n/; 860 } 861 next; 862 } 863 864 ## skip things we can't read 865 unless (-r _) { 866 if ($WHY) { 867 $why = (-l $file) ? "follow" : "read"; 868 warn qq/skip (can't $why): $file\n/; 869 } 870 next; 871 } 872 873 ## skip things that are empty 874 unless (-s _ || -d _) { 875 warn qq/skip (empty): $file\n/ if $WHY; 876 next; 877 } 878 879 ## Note file device & inode. If -xdev, skip if appropriate. 880 ($dev, $inode) = (stat(_))[$STAT_DEV, $STAT_INODE]; 881 if ($XDEV && defined $xdev{$dev}) { 882 warn qq/skip (other device): $file\n/ if $WHY; 883 next; 884 } 885 $id = "$dev,$inode"; 886 887 ## special work for a directory 888 if (-d _) { 889 ## Do checks for directory file endings. 890 if ($DO_DSKIP_TEST && (eval $DSKIP_TEST)) { 891 warn qq/skip (-dskip): $file\n/ if $WHY; 892 next; 893 } 894 ## do checks for -name/-regex/-path tests 895 if ($DO_DGLOB_TESTS && !(eval $DGLOB_TESTS)) { 896 warn qq/skip (dirname): $file\n/ if $WHY; 897 next; 898 } 899 900 ## _never_ redo a directory 901 if (defined $dir_done{$id} and $^O ne 'MSWin32') { 902 warn qq/skip (did as "$dir_done{$id}"): $file\n/ if $WHY; 903 next; 904 } 905 $dir_done{$id} = $file; ## mark it done. 906 unshift(@todo, $file); ## add to the list to do. 907 next; 908 } 909 if ($WHY == 0 && $VERBOSE > 1) { 910 if ($VERBOSE>2||$vv_print++>$VV_PRINT_COUNT||($vv_size+=-s _)>$VV_SIZE){ 911 &message($file); 912 $vv_print = $vv_size = 0; 913 } 914 } 915 916 ## do time-related tests 917 if ($NEWER || $OLDER) { 918 $_ = (stat(_))[$STAT_MTIME]; 919 if ($NEWER && $_ < $NEWER) { 920 warn qq/skip (too old): $file\n/ if $WHY; 921 next; 922 } 923 if ($OLDER && $_ > $OLDER) { 924 warn qq/skip (too new): $file\n/ if $WHY; 925 next; 926 } 927 } 928 929 ## do checks for file endings 930 if ($DO_SKIP_TEST && (eval $SKIP_TEST)) { 931 warn qq/skip (-skip): $file\n/ if $WHY; 932 next; 933 } 934 935 ## do checks for -name/-regex/-path tests 936 if ($DO_GLOB_TESTS && !(eval $GLOB_TESTS)) { 937 warn qq/skip (filename): $file\n/ if $WHY; 938 next; 939 } 940 941 942 ## If we're not repeating files, 943 ## skip this one if we've done it, or note we're doing it. 944 unless ($DOREP) { 945 if (defined $file_done{$id}) { 946 warn qq/skip (did as "$file_done{$id}"): $file\n/ if $WHY; 947 next; 948 } 949 $file_done{$id} = $file; 950 } 951 952 if ($DO_MAGIC_TESTS) { 953 if (!open(FILE_IN, '<', $file)) { 954 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; 955 warn qq/$0: can't open: $file\n/; 956 next; 957 } 958 unless (read(FILE_IN, $magic'H, $HEADER_BYTES)) { 959 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; 960 warn qq/$0: can't read from "$file"\n"/; 961 close(FILE_IN); 962 next; 963 } 964 965 eval $MAGIC_TESTS; 966 if ($magic'val) { 967 close(FILE_IN); 968 warn qq/skip (magic): $file\n/ if $WHY; 969 next; 970 } 971 seek(FILE_IN, 0, 0); ## reset for later <FILE_IN> 972 } 973 974 if ($WHY != 0 && $VERBOSE > 1) { 975 if ($VERBOSE>2||$vv_print++>$VV_PRINT_COUNT||($vv_size+=-s _)>$VV_SIZE){ 976 &message($file); 977 $vv_print = $vv_size = 0; 978 } 979 } 980 981 if ($DELAY) { 982 sleep($DELAY); 983 } 984 985 if ($FIND_ONLY) { 986 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; 987 print $file, "\n"; 988 $retval=0; ## we've found something 989 close(FILE_IN) if $DO_MAGIC_TESTS; 990 next; 991 } else { 992 ## if we weren't doing magic tests, file won't be open yet... 993 if (!$DO_MAGIC_TESTS && !open(FILE_IN, '<', $file)) { 994 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; 995 warn qq/$0: can't open: $file\n/; 996 next; 997 } 998 if ($LIST_ONLY && $CAN_USE_FAST_LISTONLY) { 999 ## 1000 ## This is rather complex, but buys us a LOT when we're just 1001 ## listing files and not the individual internal lines. 1002 ## 1003 local($size) = 4096; ## block-size in which to do reads 1004 local($nl); ## will point to $_'s ending newline. 1005 local($read); ## will be how many bytes read. 1006 local($_) = ''; ## Starts out empty 1007 local($hold); ## (see below) 1008 1009 while (($read = read(FILE_IN,$_,$size,length($_)))||length($_)) 1010 { 1011 undef @parts; 1012 ## if read a full block, but no newline, need to read more. 1013 while ($read == $size && ($nl = rindex($_, "\n")) < 0) { 1014 push(@parts, $_); ## save that part 1015 $read = read(FILE_IN, $_, $size); ## keep trying 1016 } 1017 1018 ## 1019 ## If we had to save parts, must now combine them together. 1020 ## adjusting $nl to reflect the now-larger $_. This should 1021 ## be a lot more efficient than using any kind of .= in the 1022 ## loop above. 1023 ## 1024 if (@parts) { 1025 local($lastlen) = length($_); #only need if $nl >= 0 1026 $_ = join('', @parts, $_); 1027 $nl = length($_) - ($lastlen - $nl) if $nl >= 0; 1028 } 1029 1030 ## 1031 ## If we're at the end of the file, then we can use $_ as 1032 ## is. Otherwise, we need to remove the final partial-line 1033 ## and save it so that it'll be at the beginning of the 1034 ## next read (where the rest of the line will be layed in 1035 ## right after it). $hold will be what we should save 1036 ## until next time. 1037 ## 1038 if ($read != $size || $nl < 0) { 1039 $hold = ''; 1040 } else { 1041 $hold = substr($_, $nl + 1); 1042 substr($_, $nl + 1) = ''; 1043 } 1044 1045 ## 1046 ## Now have a bunch of full lines in $_. Use it. 1047 ## 1048 if (eval $REGEX_TEST) { 1049 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; 1050 print $file, "\n"; 1051 $retval=0; ## we've found something 1052 1053 last; 1054 } 1055 1056 ## Prepare for next read.... 1057 $_ = $hold; 1058 } 1059 1060 } else { ## else not using faster block scanning..... 1061 1062 $lines_printed = 0 if $NICE; 1063 while (<FILE_IN>) { 1064 study; 1065 next unless (eval $REGEX_TEST); 1066 1067 ## 1068 ## We found a matching line. 1069 ## 1070 $retval=0; 1071 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; 1072 if ($LIST_ONLY) { 1073 print $file, "\n"; 1074 last; 1075 } else { 1076 ## prepare to print line. 1077 if ($NICE && $lines_printed++ == 0) { 1078 print '-' x 70, "\n" if $NICE > 1; 1079 print $file, ":\n"; 1080 } 1081 1082 ## 1083 ## Print all the prelim stuff. This looks less efficient 1084 ## than it needs to be, but that's so that when the eval 1085 ## is compiled (and the tests are optimized away), the 1086 ## result will be less actual PRINTs than the more natural 1087 ## way of doing these tests.... 1088 ## 1089 if ($NICE) { 1090 if ($REPORT_LINENUM) { 1091 print " line $.: "; 1092 } else { 1093 print " "; 1094 } 1095 } elsif ($REPORT_LINENUM && $PREPEND_FILENAME) { 1096 print "$file,:$.: "; 1097 } elsif ($PREPEND_FILENAME) { 1098 print "$file: "; 1099 } elsif ($REPORT_LINENUM) { 1100 print "$.: "; 1101 } 1102 print $_; 1103 print "\n" unless m/\n$/; 1104 } 1105 } 1106 print "\n" if ($NICE > 1) && $lines_printed; 1107 } 1108 close(FILE_IN); 1109 } 1110 } 1111 closedir(DIR); 1112} 1113 1114__END__ 1115.00; ## finish .ig 1116 1117'di \" finish diversion--previous line must be blank 1118.nr nl 0-1 \" fake up transition to first page again 1119.nr % 0 \" start at page 1 1120.\"__________________NORMAL_MAN_PAGE_BELOW_________________ 1121.ll+10n 1122.TH search 1 "Dec 17, 1994" 1123.SH SEARCH 1124search \- search files (a'la grep) in a whole directory tree. 1125.SH SYNOPSIS 1126search [ grep-like and find-like options] [regex ....] 1127.SH DESCRIPTION 1128.I Search 1129is more or less a combo of 'find' and 'grep' (although the regular 1130expression flavor is that of the perl being used, which is closer to 1131egrep's than grep's). 1132 1133.I Search 1134does generally the same kind of thing that 1135.nf 1136 find <blah blah> | xargs egrep <blah blah> 1137.fi 1138does, but is 1139.I much 1140more powerful and efficient (and intuitive, I think). 1141 1142This manual describes 1143.I search 1144as of version "941227.4". 1145 1146.SH "QUICK EXAMPLE" 1147Basic use is simple: 1148.nf 1149 % search jeff 1150.fi 1151will search files in the current directory, and all sub directories, for 1152files that have "jeff" in them. The lines will be listed with the 1153containing file's name prepended. 1154.PP 1155If you list more than one regex, such as with 1156.nf 1157 % search jeff Larry Randal+ 'Stoc?k' 'C.*son' 1158.fi 1159then a line containing any of the regexes will be listed. 1160This makes it effectively the same as 1161.nf 1162 % search 'jeff|Larry|Randal+|Stoc?k|C.*son' 1163.fi 1164However, listing them separately is much more efficient (and is easier 1165to type). 1166.PP 1167Note that in the case of these examples, the 1168.B \-w 1169(list whole-words only) option would be useful. 1170.PP 1171Normally, various kinds of files are automatically removed from consideration. 1172If it has has a certain ending (such as ".tar", ".Z", ".o", .etc), or if 1173the beginning of the file looks like a binary, it'll be excluded. 1174You can control exactly how this works -- see below. One quick way to 1175override this is to use the 1176.B \-all 1177option, which means to consider all the files that would normally be 1178automatically excluded. 1179Or, if you're curious, you can use 1180.B \-why 1181to have notes about what files are skipped (and why) printed to stderr. 1182 1183.SH "BASIC OVERVIEW" 1184Normally, the search starts in the current directory, considering files in 1185all subdirectories. 1186 1187You can use the 1188.I ~/.search 1189file to control ways to automatically exclude files. 1190If you don't have this file, a default one will kick in, which automatically 1191add 1192.nf 1193 -skip .o .Z .gif 1194.fi 1195(among others) to exclude those kinds of files (which you probably want to 1196skip when searching for text, as is normal). 1197Files that look to be be binary will also be excluded. 1198 1199Files ending with "#" and "~" will also be excluded unless the 1200.B -x~ 1201option is given. 1202 1203You can use 1204.B -showrc 1205to show what kinds of files will normally be skipped. 1206See the section on the startup file 1207for more info. 1208 1209You can use the 1210.B -all 1211option to indicate you want to consider all files that would otherwise be 1212skipped by the startup file. 1213 1214Based upon various other flags (see "WHICH FILES TO CONSIDER" below), 1215more files might be removed from consideration. For example 1216.nf 1217 -mtime 3 1218.fi 1219will exclude files that aren't at least three days old (change the 3 to -3 1220to exclude files that are more than three days old), while 1221.nf 1222 -skip .* 1223.fi 1224would exclude any file beginning with a dot (of course, '.' and '..' are 1225special and always excluded). 1226 1227If you'd like to see what files are being excluded, and why, you can get the 1228list via the 1229.B \-why 1230option. 1231 1232If a file makes it past all the checks, it is then "considered". 1233This usually means it is greped for the regular expressions you gave 1234on the command line. 1235 1236If any of the regexes match a line, the line is printed. 1237However, if 1238.B -list 1239is given, just the filename is printed. Or, if 1240.B -nice 1241is given, a somewhat more (human-)readable output is generated. 1242 1243If you're searching a huge tree and want to keep informed about how 1244the search is progressing, 1245.B -v 1246will print (to stderr) the current directory being searched. 1247Using 1248.B -vv 1249will also print the current file "every so often", which could be useful 1250if a directory is huge. Using 1251.B -vvv 1252will print the update with every file. 1253 1254Below is the full listing of options. 1255 1256.SH "OPTIONS TELLING *WHERE* TO SEARCH" 1257.TP 1258.BI -dir " DIR" 1259Start searching at the named directory instead of the current directory. 1260If multiple 1261.B -dir 1262arguments are given, multiple trees will be searched. 1263.TP 1264.BI -ddir " DIR" 1265Like 1266.B -dir 1267except it flushes any previous 1268.B -dir 1269directories (i.e. "-dir A -dir B -dir C" will search A, B, and C, while 1270"-dir A -ddir B -dir C" will search only B and C. This might be of use 1271in the startup file (see that section below). 1272.TP 1273.B -xdev 1274Stay on the same filesystem as the starting directory/directories. 1275.TP 1276.B -sort 1277Sort the items in a directory before processing them. 1278Normally they are processed in whatever order they happen to be read from 1279the directory. 1280.TP 1281.B -nolinks 1282Don't follow symbolic links. Normally they're followed. 1283 1284.SH "OPTIONS CONTROLLING WHICH FILES TO CONSIDER AND EXCLUDE" 1285.TP 1286.BI -mtime " NUM" 1287Only consider files that were last changed more than 1288.I NUM 1289days ago 1290(less than 1291.I NUM 1292days if 1293.I NUM 1294has '-' prepended, i.e. "-mtime -2.5" means to consider files that 1295have been changed in the last two and a half days). 1296.TP 1297.B -older FILE 1298Only consider files that have not changed since 1299.I FILE 1300was last changed. 1301If there is any upper case in the "-older", "or equal" is added to the sense 1302of the test. Therefore, "search -older ./file regex" will never consider 1303"./file", while "search -Older ./file regex" will. 1304 1305If a file is a symbolic link, the time used is that of the file and not the 1306link. 1307.TP 1308.BI -newer " FILE" 1309Opposite of 1310.BR -older . 1311.TP 1312.BI -name " GLOB" 1313Only consider files that match the shell filename pattern 1314.IR GLOB . 1315The check is only done on a file's name (use 1316.B -path 1317to check the whole path, and use 1318.B -dname 1319to check directory names). 1320 1321Multiple specifications can be given by separating them with spaces, a'la 1322.nf 1323 -name '*.c *.h' 1324.fi 1325to consider C source and header files. 1326If 1327.I GLOB 1328doesn't contain any special pattern characters, a '*' is prepended. 1329This last example could have been given as 1330.nf 1331 -name '.c .h' 1332.fi 1333It could also be given as 1334.nf 1335 -name .c -name .h 1336.fi 1337or 1338.nf 1339 -name '*.c' -name '*.h' 1340.fi 1341or 1342.nf 1343 -name '*.[ch]' 1344.fi 1345(among others) 1346but in this last case, you have to be sure to supply the leading '*'. 1347.TP 1348.BI -path " GLOB" 1349Like 1350.B -name 1351except the entire path is checked against the pattern. 1352.TP 1353.B -regex " REGEX" 1354Considers files whose names (not paths) match the given perl regex 1355exactly. 1356.TP 1357.BI -iname " GLOB" 1358Case-insensitive version of 1359.BR -name . 1360.TP 1361.BI -ipath " GLOB" 1362Case-insensitive version of 1363.BR -path . 1364.TP 1365.BI -iregex " REGEX" 1366Case-insensitive version of 1367.BR -regex . 1368 1369.TP 1370.BI -dpath " GLOB" 1371Only search down directories whose path matches the given pattern (this 1372doesn't apply to the initial directory given by 1373.BI -dir , 1374of course). 1375Something like 1376.nf 1377 -dir /usr/man -dpath /usr/man/man* 1378.fi 1379would completely skip 1380"/usr/man/cat1", "/usr/man/cat2", etc. 1381.TP 1382.BI -dskip " GLOB" 1383Skips directories whose name (not path) matches the given pattern. 1384Something like 1385.nf 1386 -dir /usr/man -dskip cat* 1387.fi 1388would completely skip any directory in the tree whose name begins with "cat" 1389(including "/usr/man/cat1", "/usr/man/cat2", etc.). 1390.TP 1391.BI -dregex " REGEX" 1392Like 1393.BI -dpath , 1394but the pattern is a full perl regex. Note that this quite different 1395from 1396.B -regex 1397which considers only file names (not paths). This option considers 1398full directory paths (not just names). It's much more useful this way. 1399Sorry if it's confusing. 1400.TP 1401.BI -dpath " GLOB" 1402This option exists, but is probably not very useful. It probably wants to 1403be like the '-below' or something I mention in the "TODO" section. 1404.TP 1405.BI -idpath " GLOB" 1406Case-insensitive version of 1407.BR -dpath . 1408.TP 1409.BI -idskip " GLOB" 1410Case-insensitive version of 1411.BR -dskip . 1412.TP 1413.BI -idregex " REGEX" 1414Case-insensitive version of 1415.BR -dregex . 1416.TP 1417.B -all 1418Ignore any 'magic' or 'option' lines in the startup file. 1419The effect is that all files that would otherwise be automatically 1420excluded are considered. 1421.TP 1422.BI -x SPECIAL 1423Arguments starting with 1424.B -x 1425(except 1426.BR -xdev , 1427explained elsewhere) do special interaction with the 1428.I ~/.search 1429startup file. Something like 1430.nf 1431 -xflag1 -xflag2 1432.fi 1433will turn on "flag1" and "flag2" in the startup file (and is 1434the same as "-xflag1,flag2"). You can use this to write your own 1435rules for what kinds of files are to be considered. 1436 1437For example, the internal-default startup file contains the line 1438.nf 1439 <!~> option: -skip '~ #' 1440.fi 1441This means that if the 1442.B -x~ 1443flag is 1444.I not 1445seen, the option 1446.nf 1447 -skip '~ #' 1448.fi 1449should be done. 1450The effect is that emacs temp and backup files are not normally 1451considered, but you can included them with the -x~ flag. 1452 1453You can write your own rules to customize 1454.I search 1455in powerful ways. See the STARTUP FILE section below. 1456.TP 1457.B -why 1458Print a message (to stderr) when and why a file is not considered. 1459 1460.SH "OPTIONS TELLING WHAT TO DO WITH FILES THAT WILL BE CONSIDERED" 1461.TP 1462.B -find 1463(you can use 1464.B -f 1465as well). 1466This option changes the basic action of 1467.IR search . 1468 1469Normally, if a file is considered, it is searched 1470for the regular expressions as described earlier. However, if this option 1471is given, the filename is printed and no searching takes place. This turns 1472.I search 1473into a 'find' of some sorts. 1474 1475In this case, no regular expressions are needed on the command line 1476(any that are there are silently ignored). 1477 1478This is not intended to be a replacement for the 'find' program, 1479but to aid 1480you in understanding just what files are getting past the exclusion checks. 1481If you really want to use it as a sort of replacement for the 'find' program, 1482you might want to use 1483.B -all 1484so that it doesn't waste time checking to see if the file is binary, etc 1485(unless you really want that, of course). 1486 1487If you use 1488.BR -find , 1489none of the "GREP-LIKE OPTIONS" (below) matter. 1490 1491As a replacement for 'find', 1492.I search 1493is probably a bit slower (or in the case of GNU find, a lot slower -- 1494GNU find is 1495.I unbelievably 1496fast). 1497However, "search -ffind" 1498might be more useful than 'find' when options such as 1499.B -skip 1500are used (at least until 'find' gets such functionality). 1501.TP 1502.B -ffind 1503(or 1504.BR -ff ) 1505A faster more 'find'-like find. Does 1506.nf 1507 -find -all -dorep 1508.fi 1509.SH "GREP-LIKE OPTIONS" 1510These options control how a searched file is accessed, 1511and how things are printed. 1512.TP 1513.B -i 1514Ignore letter case when matching. 1515.TP 1516.B -w 1517Consider only whole-word matches ("whole word" as defined by perl's "\\b" 1518regex). 1519.TP 1520.B -u 1521If the regex(es) is/are simple, try to modify them so that they'll work 1522in manpage-like underlined text (i.e. like _^Ht_^Hh_^Hi_^Hs). 1523This is very rudimentary at the moment. 1524.TP 1525.B -list 1526(you can use 1527.B -l 1528too). 1529Don't print matching lines, but the names of files that contain matching 1530lines. This will likely be *much* faster, as special optimizations are 1531made -- particularly with large files. 1532.TP 1533.B -n 1534Pepfix each line by its line number. 1535.TP 1536.B -nice 1537Not a grep-like option, but similar to 1538.BR -list , 1539so included here. 1540.B -nice 1541will have the output be a bit more human-readable, with matching lines printed 1542slightly indented after the filename, a'la 1543.nf 1544 1545 % search foo 1546 somedir/somefile: line with foo in it 1547 somedir/somefile: some food for thought 1548 anotherdir/x: don't be a buffoon! 1549 % 1550 1551.fi 1552will become 1553.nf 1554 1555 % search -nice foo 1556 somedir/somefile: 1557 line with foo in it 1558 some food for thought 1559 anotherdir/x: 1560 don't be a buffoon! 1561 % 1562 1563.fi 1564This option due to Lionel Cons. 1565.TP 1566.B -nnice 1567Be a bit nicer than 1568.BR -nice . 1569Prefix each file's output by a rule line, and follow with an extra blank line. 1570.TP 1571.B -h 1572Don't prepend each output line with the name of the file 1573(meaningless when 1574.B -find 1575or 1576.B -l 1577are given). 1578 1579.SH "OTHER OPTIONS" 1580.TP 1581.B -help 1582Print the usage information. 1583.TP 1584.B -version 1585Print the version information and quit. 1586.TP 1587.B -v 1588Set the level of message verbosity. 1589.B -v 1590will print a note whenever a new directory is entered. 1591.B -vv 1592will also print a note "every so often". This can be useful to see 1593what's happening when searching huge directories. 1594.B -vvv 1595will print a new with every file. 1596.B -vvvv 1597is 1598-vvv 1599plus 1600.BR -why . 1601.TP 1602.B -e 1603This ends the options, and can be useful if the regex begins with '-'. 1604.TP 1605.B -showrc 1606Shows what is being considered in the startup file, then exits. 1607.TP 1608.B -dorep 1609Normally, an identical file won't be checked twice (even with multiple 1610hard or symbolic links). If you're just trying to do a fast 1611.BR -find , 1612the bookkeeping to remember which files have been seen is not desirable, 1613so you can eliminate the bookkeeping with this flag. 1614 1615.SH "STARTUP FILE" 1616When 1617.I search 1618starts up, it processes the directives in 1619.IR ~/.search . 1620If no such file exists, a default 1621internal version is used. 1622 1623The internal version looks like: 1624.nf 1625 1626 magic: 32 : $H =~ m/[\ex00-\ex06\ex10-\ex1a\ex1c-\ex1f\ex80\exff]{2}/ 1627 option: -skip '.a .COM .elc .EXE .gz .o .pbm .xbm .dvi' 1628 option: -iskip '.tarz .zip .z .lzh .jpg .jpeg .gif .uu' 1629 <!~> option: -skip '~ #' 1630 1631.fi 1632If you wish to create your own "~/.search", 1633you might consider copying the above, and then working from there. 1634 1635There are two kinds of directives in a startup file: "magic" and "option". 1636.RS 0n 1637.TP 1638OPTION 1639Option lines will automatically do the command-line options given. 1640For example, the line 1641.nf 1642 option: -v 1643.fi 1644in you startup file will turn on -v every time, without needing to type it 1645on the command line. 1646 1647The text on the line after the "option:" directive is processed 1648like the Bourne shell, so make sure to pay attention to quoting. 1649.nf 1650 option: -skip .exe .com 1651.fi 1652will give an error (".com" by itself isn't a valid option), while 1653.nf 1654 option: -skip ".exe .com" 1655.fi 1656will properly include it as part of -skip's argument. 1657 1658.TP 1659MAGIC 1660Magic lines are used to determine if a file should be considered a binary 1661or not (the term "magic" refers to checking a file's magic number). These 1662are described in more detail below. 1663.RE 1664 1665Blank lines and comments (lines beginning with '#') are allowed. 1666 1667If a line begins with <...>, then it's a check to see if the 1668directive on the line should be done or not. The stuff inside the <...> 1669can contain perl's && (and), || (or), ! (not), and parens for grouping, 1670along with "flags" that might be indicated by the user with 1671.BI -x flag 1672options. 1673 1674For example, using "-xfoo" will cause "foo" to be true inside the <...> 1675blocks. Therefore, a line beginning with "<foo>" would be done only when 1676"-xfoo" had been specified, while a line beginning with "<!foo>" would be 1677done only when "-xfoo" is not specified (of course, a line without any <...> 1678is done in either case). 1679 1680A realistic example might be 1681.nf 1682 <!v> -vv 1683.fi 1684This will cause -vv messages to be the default, but allow "-xv" to override. 1685 1686There are a few flags that are set automatically: 1687.RS 1688.TP 1689.B TTY 1690true if the output is to the screen (as opposed to being redirected to a file). 1691You can force this (as with all the other automatic flags) with -xTTY. 1692.TP 1693.B -v 1694True if -v was specified. If -vv was specified, both 1695.B -v 1696and 1697.B -vv 1698flags are true (and so on). 1699.TP 1700.B -nice 1701True if -nice was specified. Same thing about -nnice as for -vv. 1702.PP 1703.TP 1704.B -list 1705true if -list (or -l) was given. 1706.TP 1707.B -dir 1708true if -dir was given. 1709.RE 1710 1711Using this info, you might change the last example to 1712.nf 1713 1714 <!v && !-v> option: -vv 1715 1716.fi 1717The added "&& !-v" means "and if the '-v' option not given". 1718This will allow you to use "-v" alone on the command line, and not 1719have this directive add the more verbose "-vv" automatically. 1720 1721.RS 0 1722Some other examples: 1723.TP 1724<!-dir && !here> option: -dir ~/ 1725Effectively make the default directory your home directory (instead of the 1726current directory). Using -dir or -xhere will undo this. 1727.TP 1728<tex> option: -name .tex -dir ~/pub 1729Create '-xtex' to search only "*.tex" files in your ~/pub directory tree. 1730Actually, this could be made a bit better. If you combine '-xtex' and '-dir' 1731on the command line, this directive will add ~/pub to the list, when you 1732probably want to use the -dir directory only. You could do 1733.nf 1734 1735 <tex> option: -name .tex 1736 <tex && !-dir> option: -dir ~/pub 1737.fi 1738 1739to will allow '-xtex' to work as before, but allow a command-line "-dir" 1740to take precedence with respect to ~/pub. 1741.TP 1742<fluff> option: -nnice -sort -i -vvv 1743Combine a few user-friendly options into one '-xfluff' option. 1744.TP 1745<man> option: -ddir /usr/man -v -w 1746When the '-xman' option is given, search "/usr/man" for whole-words 1747(of whatever regex or regexes are given on the command line), with -v. 1748.RE 1749 1750The lines in the startup file are executed from top to bottom, so something 1751like 1752.nf 1753 1754 <both> option: -xflag1 -xflag2 1755 <flag1> option: ...whatever... 1756 <flag2> option: ...whatever... 1757 1758.fi 1759will allow '-xboth' to be the same as '-xflag1 -xflag2' (or '-xflag1,flag2' 1760for that matter). However, if you put the "<both>" line below the others, 1761they will not be true when encountered, so the result would be different 1762(and probably undesired). 1763 1764The "magic" directives are used to determine if a file looks to be binary 1765or not. The form of a magic line is 1766.nf 1767 magic: \fISIZE\fP : \fIPERLCODE\fP 1768.fi 1769where 1770.I SIZE 1771is the number of bytes of the file you need to check, and 1772.I PERLCODE 1773is the code to do the check. Within 1774.IR PERLCODE , 1775the variable $H will hold at least the first 1776.I SIZE 1777bytes of the file (unless the file is shorter than that, of course). 1778It might hold more bytes. The perl should evaluate to true if the file 1779should be considered a binary. 1780 1781An example might be 1782.nf 1783 magic: 6 : substr($H, 0, 6) eq 'GIF87a' 1784.fi 1785to test for a GIF ("-iskip .gif" is better, but this might be useful 1786if you have images in files without the ".gif" extension). 1787 1788Since the startup file is checked from top to bottom, you can be a bit 1789efficient: 1790.nf 1791 magic: 6 : ($x6 = substr($H, 0, 6)) eq 'GIF87a' 1792 magic: 6 : $x6 eq 'GIF89a' 1793.fi 1794You could also write the same thing as 1795.nf 1796 magic: 6 : (($x6 = substr($H, 0, 6)) eq 'GIF87a') || ## an old gif, or.. \e 1797 $x6 eq 'GIF89a' ## .. a new one. 1798.fi 1799since newlines may be escaped. 1800 1801The default internal startup file includes 1802.nf 1803 magic: 32 : $H =~ m/[\ex00-\ex06\ex10-\ex1a\ex1c-\ex1f\ex80\exff]{2}/ 1804.fi 1805which checks for certain non-printable characters, and catches a large 1806number of binary files, including most system's executables, linkable 1807objects, compressed, tarred, and otherwise folded, spindled, and mutilated 1808files. 1809 1810Another example might be 1811.nf 1812 ## an archive library 1813 magic: 17 : substr($H, 0, 17) eq "!<arch>\en__.SYMDEF" 1814.fi 1815 1816.SH "RETURN VALUE" 1817.I Search 1818returns zero if lines (or files, if appropriate) were found, 1819or if no work was requested (such as with 1820.BR -help ). 1821Returns 1 if no lines (or files) were found. 1822Returns 2 on error. 1823 1824.SH TODO 1825Things I'd like to add some day: 1826.nf 1827 + show surrounding lines (context). 1828 + highlight matched portions of lines. 1829 + add '-and', which can go between regexes to override 1830 the default logical or of the regexes. 1831 + add something like 1832 -below GLOB 1833 which will examine a tree and only consider files that 1834 lie in a directory deeper than one named by the pattern. 1835 + add 'warning' and 'error' directives. 1836 + add 'help' directive. 1837.fi 1838.SH BUGS 1839If -xdev and multiple -dir arguments are given, any file in any of the 1840target filesystems are allowed. It would be better to allow each filesystem 1841for each separate tree. 1842 1843Multiple -dir args might also cause some confusing effects. Doing 1844.nf 1845 -dir some/dir -dir other 1846.fi 1847will search "some/dir" completely, then search "other" completely. This 1848is good. However, something like 1849.nf 1850 -dir some/dir -dir some/dir/more/specific 1851.fi 1852will search "some/dir" completely *except for* "some/dir/more/specific", 1853after which it will return and be searched. Not really a bug, but just sort 1854of odd. 1855 1856File times (for -newer, etc.) of symbolic links are for the file, not the 1857link. This could cause some misunderstandings. 1858 1859Probably more. Please let me know. 1860.SH AUTHOR 1861Jeffrey Friedl, Omron Corp (jfriedl@omron.co.jp) 1862.br 1863http://www.wg.omron.co.jp/cgi-bin/j-e/jfriedl.html 1864 1865.SH "LATEST SOURCE" 1866See http://www.wg.omron.co.jp/~jfriedl/perl/index.html 1867