1 use std::cmp;
2 use std::ffi::OsStr;
3 use std::fmt;
4 use std::fs::{self, FileType, Metadata};
5 use std::io;
6 use std::path::{Path, PathBuf};
7 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
8 use std::sync::{Arc, Mutex};
9 use std::thread;
10 use std::time::Duration;
11 use std::vec;
12
13 use same_file::Handle;
14 use walkdir::{self, WalkDir};
15
16 use dir::{Ignore, IgnoreBuilder};
17 use gitignore::GitignoreBuilder;
18 use overrides::Override;
19 use types::Types;
20 use {Error, PartialErrorBuilder};
21
22 /// A directory entry with a possible error attached.
23 ///
24 /// The error typically refers to a problem parsing ignore files in a
25 /// particular directory.
26 #[derive(Clone, Debug)]
27 pub struct DirEntry {
28 dent: DirEntryInner,
29 err: Option<Error>,
30 }
31
32 impl DirEntry {
33 /// The full path that this entry represents.
path(&self) -> &Path34 pub fn path(&self) -> &Path {
35 self.dent.path()
36 }
37
38 /// The full path that this entry represents.
39 /// Analogous to [`path`], but moves ownership of the path.
40 ///
41 /// [`path`]: struct.DirEntry.html#method.path
into_path(self) -> PathBuf42 pub fn into_path(self) -> PathBuf {
43 self.dent.into_path()
44 }
45
46 /// Whether this entry corresponds to a symbolic link or not.
path_is_symlink(&self) -> bool47 pub fn path_is_symlink(&self) -> bool {
48 self.dent.path_is_symlink()
49 }
50
51 /// Returns true if and only if this entry corresponds to stdin.
52 ///
53 /// i.e., The entry has depth 0 and its file name is `-`.
is_stdin(&self) -> bool54 pub fn is_stdin(&self) -> bool {
55 self.dent.is_stdin()
56 }
57
58 /// Return the metadata for the file that this entry points to.
metadata(&self) -> Result<Metadata, Error>59 pub fn metadata(&self) -> Result<Metadata, Error> {
60 self.dent.metadata()
61 }
62
63 /// Return the file type for the file that this entry points to.
64 ///
65 /// This entry doesn't have a file type if it corresponds to stdin.
file_type(&self) -> Option<FileType>66 pub fn file_type(&self) -> Option<FileType> {
67 self.dent.file_type()
68 }
69
70 /// Return the file name of this entry.
71 ///
72 /// If this entry has no file name (e.g., `/`), then the full path is
73 /// returned.
file_name(&self) -> &OsStr74 pub fn file_name(&self) -> &OsStr {
75 self.dent.file_name()
76 }
77
78 /// Returns the depth at which this entry was created relative to the root.
depth(&self) -> usize79 pub fn depth(&self) -> usize {
80 self.dent.depth()
81 }
82
83 /// Returns the underlying inode number if one exists.
84 ///
85 /// If this entry doesn't have an inode number, then `None` is returned.
86 #[cfg(unix)]
ino(&self) -> Option<u64>87 pub fn ino(&self) -> Option<u64> {
88 self.dent.ino()
89 }
90
91 /// Returns an error, if one exists, associated with processing this entry.
92 ///
93 /// An example of an error is one that occurred while parsing an ignore
94 /// file. Errors related to traversing a directory tree itself are reported
95 /// as part of yielding the directory entry, and not with this method.
error(&self) -> Option<&Error>96 pub fn error(&self) -> Option<&Error> {
97 self.err.as_ref()
98 }
99
100 /// Returns true if and only if this entry points to a directory.
is_dir(&self) -> bool101 pub(crate) fn is_dir(&self) -> bool {
102 self.dent.is_dir()
103 }
104
new_stdin() -> DirEntry105 fn new_stdin() -> DirEntry {
106 DirEntry { dent: DirEntryInner::Stdin, err: None }
107 }
108
new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry109 fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry {
110 DirEntry { dent: DirEntryInner::Walkdir(dent), err: err }
111 }
112
new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry113 fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry {
114 DirEntry { dent: DirEntryInner::Raw(dent), err: err }
115 }
116 }
117
118 /// DirEntryInner is the implementation of DirEntry.
119 ///
120 /// It specifically represents three distinct sources of directory entries:
121 ///
122 /// 1. From the walkdir crate.
123 /// 2. Special entries that represent things like stdin.
124 /// 3. From a path.
125 ///
126 /// Specifically, (3) has to essentially re-create the DirEntry implementation
127 /// from WalkDir.
128 #[derive(Clone, Debug)]
129 enum DirEntryInner {
130 Stdin,
131 Walkdir(walkdir::DirEntry),
132 Raw(DirEntryRaw),
133 }
134
135 impl DirEntryInner {
path(&self) -> &Path136 fn path(&self) -> &Path {
137 use self::DirEntryInner::*;
138 match *self {
139 Stdin => Path::new("<stdin>"),
140 Walkdir(ref x) => x.path(),
141 Raw(ref x) => x.path(),
142 }
143 }
144
into_path(self) -> PathBuf145 fn into_path(self) -> PathBuf {
146 use self::DirEntryInner::*;
147 match self {
148 Stdin => PathBuf::from("<stdin>"),
149 Walkdir(x) => x.into_path(),
150 Raw(x) => x.into_path(),
151 }
152 }
153
path_is_symlink(&self) -> bool154 fn path_is_symlink(&self) -> bool {
155 use self::DirEntryInner::*;
156 match *self {
157 Stdin => false,
158 Walkdir(ref x) => x.path_is_symlink(),
159 Raw(ref x) => x.path_is_symlink(),
160 }
161 }
162
is_stdin(&self) -> bool163 fn is_stdin(&self) -> bool {
164 match *self {
165 DirEntryInner::Stdin => true,
166 _ => false,
167 }
168 }
169
metadata(&self) -> Result<Metadata, Error>170 fn metadata(&self) -> Result<Metadata, Error> {
171 use self::DirEntryInner::*;
172 match *self {
173 Stdin => {
174 let err = Error::Io(io::Error::new(
175 io::ErrorKind::Other,
176 "<stdin> has no metadata",
177 ));
178 Err(err.with_path("<stdin>"))
179 }
180 Walkdir(ref x) => x.metadata().map_err(|err| {
181 Error::Io(io::Error::from(err)).with_path(x.path())
182 }),
183 Raw(ref x) => x.metadata(),
184 }
185 }
186
file_type(&self) -> Option<FileType>187 fn file_type(&self) -> Option<FileType> {
188 use self::DirEntryInner::*;
189 match *self {
190 Stdin => None,
191 Walkdir(ref x) => Some(x.file_type()),
192 Raw(ref x) => Some(x.file_type()),
193 }
194 }
195
file_name(&self) -> &OsStr196 fn file_name(&self) -> &OsStr {
197 use self::DirEntryInner::*;
198 match *self {
199 Stdin => OsStr::new("<stdin>"),
200 Walkdir(ref x) => x.file_name(),
201 Raw(ref x) => x.file_name(),
202 }
203 }
204
depth(&self) -> usize205 fn depth(&self) -> usize {
206 use self::DirEntryInner::*;
207 match *self {
208 Stdin => 0,
209 Walkdir(ref x) => x.depth(),
210 Raw(ref x) => x.depth(),
211 }
212 }
213
214 #[cfg(unix)]
ino(&self) -> Option<u64>215 fn ino(&self) -> Option<u64> {
216 use self::DirEntryInner::*;
217 use walkdir::DirEntryExt;
218 match *self {
219 Stdin => None,
220 Walkdir(ref x) => Some(x.ino()),
221 Raw(ref x) => Some(x.ino()),
222 }
223 }
224
225 /// Returns true if and only if this entry points to a directory.
is_dir(&self) -> bool226 fn is_dir(&self) -> bool {
227 self.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
228 }
229 }
230
231 /// DirEntryRaw is essentially copied from the walkdir crate so that we can
232 /// build `DirEntry`s from whole cloth in the parallel iterator.
233 #[derive(Clone)]
234 struct DirEntryRaw {
235 /// The path as reported by the `fs::ReadDir` iterator (even if it's a
236 /// symbolic link).
237 path: PathBuf,
238 /// The file type. Necessary for recursive iteration, so store it.
239 ty: FileType,
240 /// Is set when this entry was created from a symbolic link and the user
241 /// expects the iterator to follow symbolic links.
242 follow_link: bool,
243 /// The depth at which this entry was generated relative to the root.
244 depth: usize,
245 /// The underlying inode number (Unix only).
246 #[cfg(unix)]
247 ino: u64,
248 /// The underlying metadata (Windows only). We store this on Windows
249 /// because this comes for free while reading a directory.
250 #[cfg(windows)]
251 metadata: fs::Metadata,
252 }
253
254 impl fmt::Debug for DirEntryRaw {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result255 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
256 // Leaving out FileType because it doesn't have a debug impl
257 // in Rust 1.9. We could add it if we really wanted to by manually
258 // querying each possibly file type. Meh. ---AG
259 f.debug_struct("DirEntryRaw")
260 .field("path", &self.path)
261 .field("follow_link", &self.follow_link)
262 .field("depth", &self.depth)
263 .finish()
264 }
265 }
266
267 impl DirEntryRaw {
path(&self) -> &Path268 fn path(&self) -> &Path {
269 &self.path
270 }
271
into_path(self) -> PathBuf272 fn into_path(self) -> PathBuf {
273 self.path
274 }
275
path_is_symlink(&self) -> bool276 fn path_is_symlink(&self) -> bool {
277 self.ty.is_symlink() || self.follow_link
278 }
279
metadata(&self) -> Result<Metadata, Error>280 fn metadata(&self) -> Result<Metadata, Error> {
281 self.metadata_internal()
282 }
283
284 #[cfg(windows)]
metadata_internal(&self) -> Result<fs::Metadata, Error>285 fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
286 if self.follow_link {
287 fs::metadata(&self.path)
288 } else {
289 Ok(self.metadata.clone())
290 }
291 .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
292 }
293
294 #[cfg(not(windows))]
metadata_internal(&self) -> Result<fs::Metadata, Error>295 fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
296 if self.follow_link {
297 fs::metadata(&self.path)
298 } else {
299 fs::symlink_metadata(&self.path)
300 }
301 .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
302 }
303
file_type(&self) -> FileType304 fn file_type(&self) -> FileType {
305 self.ty
306 }
307
file_name(&self) -> &OsStr308 fn file_name(&self) -> &OsStr {
309 self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
310 }
311
depth(&self) -> usize312 fn depth(&self) -> usize {
313 self.depth
314 }
315
316 #[cfg(unix)]
ino(&self) -> u64317 fn ino(&self) -> u64 {
318 self.ino
319 }
320
from_entry( depth: usize, ent: &fs::DirEntry, ) -> Result<DirEntryRaw, Error>321 fn from_entry(
322 depth: usize,
323 ent: &fs::DirEntry,
324 ) -> Result<DirEntryRaw, Error> {
325 let ty = ent.file_type().map_err(|err| {
326 let err = Error::Io(io::Error::from(err)).with_path(ent.path());
327 Error::WithDepth { depth: depth, err: Box::new(err) }
328 })?;
329 DirEntryRaw::from_entry_os(depth, ent, ty)
330 }
331
332 #[cfg(windows)]
from_entry_os( depth: usize, ent: &fs::DirEntry, ty: fs::FileType, ) -> Result<DirEntryRaw, Error>333 fn from_entry_os(
334 depth: usize,
335 ent: &fs::DirEntry,
336 ty: fs::FileType,
337 ) -> Result<DirEntryRaw, Error> {
338 let md = ent.metadata().map_err(|err| {
339 let err = Error::Io(io::Error::from(err)).with_path(ent.path());
340 Error::WithDepth { depth: depth, err: Box::new(err) }
341 })?;
342 Ok(DirEntryRaw {
343 path: ent.path(),
344 ty: ty,
345 follow_link: false,
346 depth: depth,
347 metadata: md,
348 })
349 }
350
351 #[cfg(unix)]
from_entry_os( depth: usize, ent: &fs::DirEntry, ty: fs::FileType, ) -> Result<DirEntryRaw, Error>352 fn from_entry_os(
353 depth: usize,
354 ent: &fs::DirEntry,
355 ty: fs::FileType,
356 ) -> Result<DirEntryRaw, Error> {
357 use std::os::unix::fs::DirEntryExt;
358
359 Ok(DirEntryRaw {
360 path: ent.path(),
361 ty: ty,
362 follow_link: false,
363 depth: depth,
364 ino: ent.ino(),
365 })
366 }
367
368 // Placeholder implementation to allow compiling on non-standard platforms
369 // (e.g. wasm32).
370 #[cfg(not(any(windows, unix)))]
from_entry_os( depth: usize, ent: &fs::DirEntry, ty: fs::FileType, ) -> Result<DirEntryRaw, Error>371 fn from_entry_os(
372 depth: usize,
373 ent: &fs::DirEntry,
374 ty: fs::FileType,
375 ) -> Result<DirEntryRaw, Error> {
376 Err(Error::Io(io::Error::new(
377 io::ErrorKind::Other,
378 "unsupported platform",
379 )))
380 }
381
382 #[cfg(windows)]
from_path( depth: usize, pb: PathBuf, link: bool, ) -> Result<DirEntryRaw, Error>383 fn from_path(
384 depth: usize,
385 pb: PathBuf,
386 link: bool,
387 ) -> Result<DirEntryRaw, Error> {
388 let md =
389 fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
390 Ok(DirEntryRaw {
391 path: pb,
392 ty: md.file_type(),
393 follow_link: link,
394 depth: depth,
395 metadata: md,
396 })
397 }
398
399 #[cfg(unix)]
from_path( depth: usize, pb: PathBuf, link: bool, ) -> Result<DirEntryRaw, Error>400 fn from_path(
401 depth: usize,
402 pb: PathBuf,
403 link: bool,
404 ) -> Result<DirEntryRaw, Error> {
405 use std::os::unix::fs::MetadataExt;
406
407 let md =
408 fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
409 Ok(DirEntryRaw {
410 path: pb,
411 ty: md.file_type(),
412 follow_link: link,
413 depth: depth,
414 ino: md.ino(),
415 })
416 }
417
418 // Placeholder implementation to allow compiling on non-standard platforms
419 // (e.g. wasm32).
420 #[cfg(not(any(windows, unix)))]
from_path( depth: usize, pb: PathBuf, link: bool, ) -> Result<DirEntryRaw, Error>421 fn from_path(
422 depth: usize,
423 pb: PathBuf,
424 link: bool,
425 ) -> Result<DirEntryRaw, Error> {
426 Err(Error::Io(io::Error::new(
427 io::ErrorKind::Other,
428 "unsupported platform",
429 )))
430 }
431 }
432
433 /// WalkBuilder builds a recursive directory iterator.
434 ///
435 /// The builder supports a large number of configurable options. This includes
436 /// specific glob overrides, file type matching, toggling whether hidden
437 /// files are ignored or not, and of course, support for respecting gitignore
438 /// files.
439 ///
440 /// By default, all ignore files found are respected. This includes `.ignore`,
441 /// `.gitignore`, `.git/info/exclude` and even your global gitignore
442 /// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
443 ///
444 /// Some standard recursive directory options are also supported, such as
445 /// limiting the recursive depth or whether to follow symbolic links (disabled
446 /// by default).
447 ///
448 /// # Ignore rules
449 ///
450 /// There are many rules that influence whether a particular file or directory
451 /// is skipped by this iterator. Those rules are documented here. Note that
452 /// the rules assume a default configuration.
453 ///
454 /// * First, glob overrides are checked. If a path matches a glob override,
455 /// then matching stops. The path is then only skipped if the glob that matched
456 /// the path is an ignore glob. (An override glob is a whitelist glob unless it
457 /// starts with a `!`, in which case it is an ignore glob.)
458 /// * Second, ignore files are checked. Ignore files currently only come from
459 /// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
460 /// global gitignore file), plain `.ignore` files, which have the same format
461 /// as gitignore files, or explicitly added ignore files. The precedence order
462 /// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
463 /// finally explicitly added ignore files. Note that precedence between
464 /// different types of ignore files is not impacted by the directory hierarchy;
465 /// any `.ignore` file overrides all `.gitignore` files. Within each precedence
466 /// level, more nested ignore files have a higher precedence than less nested
467 /// ignore files.
468 /// * Third, if the previous step yields an ignore match, then all matching
469 /// is stopped and the path is skipped. If it yields a whitelist match, then
470 /// matching continues. A whitelist match can be overridden by a later matcher.
471 /// * Fourth, unless the path is a directory, the file type matcher is run on
472 /// the path. As above, if it yields an ignore match, then all matching is
473 /// stopped and the path is skipped. If it yields a whitelist match, then
474 /// matching continues.
475 /// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
476 /// path is skipped.
477 /// * Sixth, unless the path is a directory, the size of the file is compared
478 /// against the max filesize limit. If it exceeds the limit, it is skipped.
479 /// * Seventh, if the path has made it this far then it is yielded in the
480 /// iterator.
481 #[derive(Clone)]
482 pub struct WalkBuilder {
483 paths: Vec<PathBuf>,
484 ig_builder: IgnoreBuilder,
485 max_depth: Option<usize>,
486 max_filesize: Option<u64>,
487 follow_links: bool,
488 same_file_system: bool,
489 sorter: Option<Sorter>,
490 threads: usize,
491 skip: Option<Arc<Handle>>,
492 filter: Option<Filter>,
493 }
494
495 #[derive(Clone)]
496 enum Sorter {
497 ByName(
498 Arc<dyn Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static>,
499 ),
500 ByPath(Arc<dyn Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static>),
501 }
502
503 #[derive(Clone)]
504 struct Filter(Arc<dyn Fn(&DirEntry) -> bool + Send + Sync + 'static>);
505
506 impl fmt::Debug for WalkBuilder {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result507 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
508 f.debug_struct("WalkBuilder")
509 .field("paths", &self.paths)
510 .field("ig_builder", &self.ig_builder)
511 .field("max_depth", &self.max_depth)
512 .field("max_filesize", &self.max_filesize)
513 .field("follow_links", &self.follow_links)
514 .field("threads", &self.threads)
515 .field("skip", &self.skip)
516 .finish()
517 }
518 }
519
520 impl WalkBuilder {
521 /// Create a new builder for a recursive directory iterator for the
522 /// directory given.
523 ///
524 /// Note that if you want to traverse multiple different directories, it
525 /// is better to call `add` on this builder than to create multiple
526 /// `Walk` values.
new<P: AsRef<Path>>(path: P) -> WalkBuilder527 pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
528 WalkBuilder {
529 paths: vec![path.as_ref().to_path_buf()],
530 ig_builder: IgnoreBuilder::new(),
531 max_depth: None,
532 max_filesize: None,
533 follow_links: false,
534 same_file_system: false,
535 sorter: None,
536 threads: 0,
537 skip: None,
538 filter: None,
539 }
540 }
541
542 /// Build a new `Walk` iterator.
build(&self) -> Walk543 pub fn build(&self) -> Walk {
544 let follow_links = self.follow_links;
545 let max_depth = self.max_depth;
546 let sorter = self.sorter.clone();
547 let its = self
548 .paths
549 .iter()
550 .map(move |p| {
551 if p == Path::new("-") {
552 (p.to_path_buf(), None)
553 } else {
554 let mut wd = WalkDir::new(p);
555 wd = wd.follow_links(follow_links || p.is_file());
556 wd = wd.same_file_system(self.same_file_system);
557 if let Some(max_depth) = max_depth {
558 wd = wd.max_depth(max_depth);
559 }
560 if let Some(ref sorter) = sorter {
561 match sorter.clone() {
562 Sorter::ByName(cmp) => {
563 wd = wd.sort_by(move |a, b| {
564 cmp(a.file_name(), b.file_name())
565 });
566 }
567 Sorter::ByPath(cmp) => {
568 wd = wd.sort_by(move |a, b| {
569 cmp(a.path(), b.path())
570 });
571 }
572 }
573 }
574 (p.to_path_buf(), Some(WalkEventIter::from(wd)))
575 }
576 })
577 .collect::<Vec<_>>()
578 .into_iter();
579 let ig_root = self.ig_builder.build();
580 Walk {
581 its: its,
582 it: None,
583 ig_root: ig_root.clone(),
584 ig: ig_root.clone(),
585 max_filesize: self.max_filesize,
586 skip: self.skip.clone(),
587 filter: self.filter.clone(),
588 }
589 }
590
591 /// Build a new `WalkParallel` iterator.
592 ///
593 /// Note that this *doesn't* return something that implements `Iterator`.
594 /// Instead, the returned value must be run with a closure. e.g.,
595 /// `builder.build_parallel().run(|| |path| println!("{:?}", path))`.
build_parallel(&self) -> WalkParallel596 pub fn build_parallel(&self) -> WalkParallel {
597 WalkParallel {
598 paths: self.paths.clone().into_iter(),
599 ig_root: self.ig_builder.build(),
600 max_depth: self.max_depth,
601 max_filesize: self.max_filesize,
602 follow_links: self.follow_links,
603 same_file_system: self.same_file_system,
604 threads: self.threads,
605 skip: self.skip.clone(),
606 filter: self.filter.clone(),
607 }
608 }
609
610 /// Add a file path to the iterator.
611 ///
612 /// Each additional file path added is traversed recursively. This should
613 /// be preferred over building multiple `Walk` iterators since this
614 /// enables reusing resources across iteration.
add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder615 pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
616 self.paths.push(path.as_ref().to_path_buf());
617 self
618 }
619
620 /// The maximum depth to recurse.
621 ///
622 /// The default, `None`, imposes no depth restriction.
max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder623 pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
624 self.max_depth = depth;
625 self
626 }
627
628 /// Whether to follow symbolic links or not.
follow_links(&mut self, yes: bool) -> &mut WalkBuilder629 pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
630 self.follow_links = yes;
631 self
632 }
633
634 /// Whether to ignore files above the specified limit.
max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder635 pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
636 self.max_filesize = filesize;
637 self
638 }
639
640 /// The number of threads to use for traversal.
641 ///
642 /// Note that this only has an effect when using `build_parallel`.
643 ///
644 /// The default setting is `0`, which chooses the number of threads
645 /// automatically using heuristics.
threads(&mut self, n: usize) -> &mut WalkBuilder646 pub fn threads(&mut self, n: usize) -> &mut WalkBuilder {
647 self.threads = n;
648 self
649 }
650
651 /// Add a global ignore file to the matcher.
652 ///
653 /// This has lower precedence than all other sources of ignore rules.
654 ///
655 /// If there was a problem adding the ignore file, then an error is
656 /// returned. Note that the error may indicate *partial* failure. For
657 /// example, if an ignore file contains an invalid glob, all other globs
658 /// are still applied.
add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error>659 pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
660 let mut builder = GitignoreBuilder::new("");
661 let mut errs = PartialErrorBuilder::default();
662 errs.maybe_push(builder.add(path));
663 match builder.build() {
664 Ok(gi) => {
665 self.ig_builder.add_ignore(gi);
666 }
667 Err(err) => {
668 errs.push(err);
669 }
670 }
671 errs.into_error_option()
672 }
673
674 /// Add a custom ignore file name
675 ///
676 /// These ignore files have higher precedence than all other ignore files.
677 ///
678 /// When specifying multiple names, earlier names have lower precedence than
679 /// later names.
add_custom_ignore_filename<S: AsRef<OsStr>>( &mut self, file_name: S, ) -> &mut WalkBuilder680 pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
681 &mut self,
682 file_name: S,
683 ) -> &mut WalkBuilder {
684 self.ig_builder.add_custom_ignore_filename(file_name);
685 self
686 }
687
688 /// Add an override matcher.
689 ///
690 /// By default, no override matcher is used.
691 ///
692 /// This overrides any previous setting.
overrides(&mut self, overrides: Override) -> &mut WalkBuilder693 pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
694 self.ig_builder.overrides(overrides);
695 self
696 }
697
698 /// Add a file type matcher.
699 ///
700 /// By default, no file type matcher is used.
701 ///
702 /// This overrides any previous setting.
types(&mut self, types: Types) -> &mut WalkBuilder703 pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
704 self.ig_builder.types(types);
705 self
706 }
707
708 /// Enables all the standard ignore filters.
709 ///
710 /// This toggles, as a group, all the filters that are enabled by default:
711 ///
712 /// - [hidden()](#method.hidden)
713 /// - [parents()](#method.parents)
714 /// - [ignore()](#method.ignore)
715 /// - [git_ignore()](#method.git_ignore)
716 /// - [git_global()](#method.git_global)
717 /// - [git_exclude()](#method.git_exclude)
718 ///
719 /// They may still be toggled individually after calling this function.
720 ///
721 /// This is (by definition) enabled by default.
standard_filters(&mut self, yes: bool) -> &mut WalkBuilder722 pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder {
723 self.hidden(yes)
724 .parents(yes)
725 .ignore(yes)
726 .git_ignore(yes)
727 .git_global(yes)
728 .git_exclude(yes)
729 }
730
731 /// Enables ignoring hidden files.
732 ///
733 /// This is enabled by default.
hidden(&mut self, yes: bool) -> &mut WalkBuilder734 pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
735 self.ig_builder.hidden(yes);
736 self
737 }
738
739 /// Enables reading ignore files from parent directories.
740 ///
741 /// If this is enabled, then .gitignore files in parent directories of each
742 /// file path given are respected. Otherwise, they are ignored.
743 ///
744 /// This is enabled by default.
parents(&mut self, yes: bool) -> &mut WalkBuilder745 pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
746 self.ig_builder.parents(yes);
747 self
748 }
749
750 /// Enables reading `.ignore` files.
751 ///
752 /// `.ignore` files have the same semantics as `gitignore` files and are
753 /// supported by search tools such as ripgrep and The Silver Searcher.
754 ///
755 /// This is enabled by default.
ignore(&mut self, yes: bool) -> &mut WalkBuilder756 pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
757 self.ig_builder.ignore(yes);
758 self
759 }
760
761 /// Enables reading a global gitignore file, whose path is specified in
762 /// git's `core.excludesFile` config option.
763 ///
764 /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
765 /// does not exist or does not specify `core.excludesFile`, then
766 /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
767 /// set or is empty, then `$HOME/.config/git/ignore` is used instead.
768 ///
769 /// This is enabled by default.
git_global(&mut self, yes: bool) -> &mut WalkBuilder770 pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
771 self.ig_builder.git_global(yes);
772 self
773 }
774
775 /// Enables reading `.gitignore` files.
776 ///
777 /// `.gitignore` files have match semantics as described in the `gitignore`
778 /// man page.
779 ///
780 /// This is enabled by default.
git_ignore(&mut self, yes: bool) -> &mut WalkBuilder781 pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
782 self.ig_builder.git_ignore(yes);
783 self
784 }
785
786 /// Enables reading `.git/info/exclude` files.
787 ///
788 /// `.git/info/exclude` files have match semantics as described in the
789 /// `gitignore` man page.
790 ///
791 /// This is enabled by default.
git_exclude(&mut self, yes: bool) -> &mut WalkBuilder792 pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
793 self.ig_builder.git_exclude(yes);
794 self
795 }
796
797 /// Whether a git repository is required to apply git-related ignore
798 /// rules (global rules, .gitignore and local exclude rules).
799 ///
800 /// When disabled, git-related ignore rules are applied even when searching
801 /// outside a git repository.
require_git(&mut self, yes: bool) -> &mut WalkBuilder802 pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder {
803 self.ig_builder.require_git(yes);
804 self
805 }
806
807 /// Process ignore files case insensitively
808 ///
809 /// This is disabled by default.
ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder810 pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder {
811 self.ig_builder.ignore_case_insensitive(yes);
812 self
813 }
814
815 /// Set a function for sorting directory entries by their path.
816 ///
817 /// If a compare function is set, the resulting iterator will return all
818 /// paths in sorted order. The compare function will be called to compare
819 /// entries from the same directory.
820 ///
821 /// This is like `sort_by_file_name`, except the comparator accepts
822 /// a `&Path` instead of the base file name, which permits it to sort by
823 /// more criteria.
824 ///
825 /// This method will override any previous sorter set by this method or
826 /// by `sort_by_file_name`.
827 ///
828 /// Note that this is not used in the parallel iterator.
sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder where F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static,829 pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder
830 where
831 F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static,
832 {
833 self.sorter = Some(Sorter::ByPath(Arc::new(cmp)));
834 self
835 }
836
837 /// Set a function for sorting directory entries by file name.
838 ///
839 /// If a compare function is set, the resulting iterator will return all
840 /// paths in sorted order. The compare function will be called to compare
841 /// names from entries from the same directory using only the name of the
842 /// entry.
843 ///
844 /// This method will override any previous sorter set by this method or
845 /// by `sort_by_file_path`.
846 ///
847 /// Note that this is not used in the parallel iterator.
sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder where F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static,848 pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder
849 where
850 F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static,
851 {
852 self.sorter = Some(Sorter::ByName(Arc::new(cmp)));
853 self
854 }
855
856 /// Do not cross file system boundaries.
857 ///
858 /// When this option is enabled, directory traversal will not descend into
859 /// directories that are on a different file system from the root path.
860 ///
861 /// Currently, this option is only supported on Unix and Windows. If this
862 /// option is used on an unsupported platform, then directory traversal
863 /// will immediately return an error and will not yield any entries.
same_file_system(&mut self, yes: bool) -> &mut WalkBuilder864 pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder {
865 self.same_file_system = yes;
866 self
867 }
868
869 /// Do not yield directory entries that are believed to correspond to
870 /// stdout.
871 ///
872 /// This is useful when a command is invoked via shell redirection to a
873 /// file that is also being read. For example, `grep -r foo ./ > results`
874 /// might end up trying to search `results` even though it is also writing
875 /// to it, which could cause an unbounded feedback loop. Setting this
876 /// option prevents this from happening by skipping over the `results`
877 /// file.
878 ///
879 /// This is disabled by default.
skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder880 pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder {
881 if yes {
882 self.skip = stdout_handle().map(Arc::new);
883 } else {
884 self.skip = None;
885 }
886 self
887 }
888
889 /// Yields only entries which satisfy the given predicate and skips
890 /// descending into directories that do not satisfy the given predicate.
891 ///
892 /// The predicate is applied to all entries. If the predicate is
893 /// true, iteration carries on as normal. If the predicate is false, the
894 /// entry is ignored and if it is a directory, it is not descended into.
895 ///
896 /// Note that the errors for reading entries that may not satisfy the
897 /// predicate will still be yielded.
filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder where P: Fn(&DirEntry) -> bool + Send + Sync + 'static,898 pub fn filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder
899 where
900 P: Fn(&DirEntry) -> bool + Send + Sync + 'static,
901 {
902 self.filter = Some(Filter(Arc::new(filter)));
903 self
904 }
905 }
906
907 /// Walk is a recursive directory iterator over file paths in one or more
908 /// directories.
909 ///
910 /// Only file and directory paths matching the rules are returned. By default,
911 /// ignore files like `.gitignore` are respected. The precise matching rules
912 /// and precedence is explained in the documentation for `WalkBuilder`.
913 pub struct Walk {
914 its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
915 it: Option<WalkEventIter>,
916 ig_root: Ignore,
917 ig: Ignore,
918 max_filesize: Option<u64>,
919 skip: Option<Arc<Handle>>,
920 filter: Option<Filter>,
921 }
922
923 impl Walk {
924 /// Creates a new recursive directory iterator for the file path given.
925 ///
926 /// Note that this uses default settings, which include respecting
927 /// `.gitignore` files. To configure the iterator, use `WalkBuilder`
928 /// instead.
new<P: AsRef<Path>>(path: P) -> Walk929 pub fn new<P: AsRef<Path>>(path: P) -> Walk {
930 WalkBuilder::new(path).build()
931 }
932
skip_entry(&self, ent: &DirEntry) -> Result<bool, Error>933 fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> {
934 if ent.depth() == 0 {
935 return Ok(false);
936 }
937
938 if let Some(ref stdout) = self.skip {
939 if path_equals(ent, stdout)? {
940 return Ok(true);
941 }
942 }
943 if should_skip_entry(&self.ig, ent) {
944 return Ok(true);
945 }
946 if self.max_filesize.is_some() && !ent.is_dir() {
947 return Ok(skip_filesize(
948 self.max_filesize.unwrap(),
949 ent.path(),
950 &ent.metadata().ok(),
951 ));
952 }
953 if let Some(Filter(filter)) = &self.filter {
954 if !filter(ent) {
955 return Ok(true);
956 }
957 }
958 Ok(false)
959 }
960 }
961
962 impl Iterator for Walk {
963 type Item = Result<DirEntry, Error>;
964
965 #[inline(always)]
next(&mut self) -> Option<Result<DirEntry, Error>>966 fn next(&mut self) -> Option<Result<DirEntry, Error>> {
967 loop {
968 let ev = match self.it.as_mut().and_then(|it| it.next()) {
969 Some(ev) => ev,
970 None => {
971 match self.its.next() {
972 None => return None,
973 Some((_, None)) => {
974 return Some(Ok(DirEntry::new_stdin()));
975 }
976 Some((path, Some(it))) => {
977 self.it = Some(it);
978 if path.is_dir() {
979 let (ig, err) = self.ig_root.add_parents(path);
980 self.ig = ig;
981 if let Some(err) = err {
982 return Some(Err(err));
983 }
984 } else {
985 self.ig = self.ig_root.clone();
986 }
987 }
988 }
989 continue;
990 }
991 };
992 match ev {
993 Err(err) => {
994 return Some(Err(Error::from_walkdir(err)));
995 }
996 Ok(WalkEvent::Exit) => {
997 self.ig = self.ig.parent().unwrap();
998 }
999 Ok(WalkEvent::Dir(ent)) => {
1000 let mut ent = DirEntry::new_walkdir(ent, None);
1001 let should_skip = match self.skip_entry(&ent) {
1002 Err(err) => return Some(Err(err)),
1003 Ok(should_skip) => should_skip,
1004 };
1005 if should_skip {
1006 self.it.as_mut().unwrap().it.skip_current_dir();
1007 // Still need to push this on the stack because
1008 // we'll get a WalkEvent::Exit event for this dir.
1009 // We don't care if it errors though.
1010 let (igtmp, _) = self.ig.add_child(ent.path());
1011 self.ig = igtmp;
1012 continue;
1013 }
1014 let (igtmp, err) = self.ig.add_child(ent.path());
1015 self.ig = igtmp;
1016 ent.err = err;
1017 return Some(Ok(ent));
1018 }
1019 Ok(WalkEvent::File(ent)) => {
1020 let ent = DirEntry::new_walkdir(ent, None);
1021 let should_skip = match self.skip_entry(&ent) {
1022 Err(err) => return Some(Err(err)),
1023 Ok(should_skip) => should_skip,
1024 };
1025 if should_skip {
1026 continue;
1027 }
1028 return Some(Ok(ent));
1029 }
1030 }
1031 }
1032 }
1033 }
1034
1035 /// WalkEventIter transforms a WalkDir iterator into an iterator that more
1036 /// accurately describes the directory tree. Namely, it emits events that are
1037 /// one of three types: directory, file or "exit." An "exit" event means that
1038 /// the entire contents of a directory have been enumerated.
1039 struct WalkEventIter {
1040 depth: usize,
1041 it: walkdir::IntoIter,
1042 next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
1043 }
1044
1045 #[derive(Debug)]
1046 enum WalkEvent {
1047 Dir(walkdir::DirEntry),
1048 File(walkdir::DirEntry),
1049 Exit,
1050 }
1051
1052 impl From<WalkDir> for WalkEventIter {
from(it: WalkDir) -> WalkEventIter1053 fn from(it: WalkDir) -> WalkEventIter {
1054 WalkEventIter { depth: 0, it: it.into_iter(), next: None }
1055 }
1056 }
1057
1058 impl Iterator for WalkEventIter {
1059 type Item = walkdir::Result<WalkEvent>;
1060
1061 #[inline(always)]
next(&mut self) -> Option<walkdir::Result<WalkEvent>>1062 fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
1063 let dent = self.next.take().or_else(|| self.it.next());
1064 let depth = match dent {
1065 None => 0,
1066 Some(Ok(ref dent)) => dent.depth(),
1067 Some(Err(ref err)) => err.depth(),
1068 };
1069 if depth < self.depth {
1070 self.depth -= 1;
1071 self.next = dent;
1072 return Some(Ok(WalkEvent::Exit));
1073 }
1074 self.depth = depth;
1075 match dent {
1076 None => None,
1077 Some(Err(err)) => Some(Err(err)),
1078 Some(Ok(dent)) => {
1079 if walkdir_is_dir(&dent) {
1080 self.depth += 1;
1081 Some(Ok(WalkEvent::Dir(dent)))
1082 } else {
1083 Some(Ok(WalkEvent::File(dent)))
1084 }
1085 }
1086 }
1087 }
1088 }
1089
1090 /// WalkState is used in the parallel recursive directory iterator to indicate
1091 /// whether walking should continue as normal, skip descending into a
1092 /// particular directory or quit the walk entirely.
1093 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
1094 pub enum WalkState {
1095 /// Continue walking as normal.
1096 Continue,
1097 /// If the directory entry given is a directory, don't descend into it.
1098 /// In all other cases, this has no effect.
1099 Skip,
1100 /// Quit the entire iterator as soon as possible.
1101 ///
1102 /// Note that this is an inherently asynchronous action. It is possible
1103 /// for more entries to be yielded even after instructing the iterator
1104 /// to quit.
1105 Quit,
1106 }
1107
1108 impl WalkState {
is_continue(&self) -> bool1109 fn is_continue(&self) -> bool {
1110 *self == WalkState::Continue
1111 }
1112
is_quit(&self) -> bool1113 fn is_quit(&self) -> bool {
1114 *self == WalkState::Quit
1115 }
1116 }
1117
1118 /// A builder for constructing a visitor when using
1119 /// [`WalkParallel::visit`](struct.WalkParallel.html#method.visit). The builder
1120 /// will be called for each thread started by `WalkParallel`. The visitor
1121 /// returned from each builder is then called for every directory entry.
1122 pub trait ParallelVisitorBuilder<'s> {
1123 /// Create per-thread `ParallelVisitor`s for `WalkParallel`.
build(&mut self) -> Box<dyn ParallelVisitor + 's>1124 fn build(&mut self) -> Box<dyn ParallelVisitor + 's>;
1125 }
1126
1127 impl<'a, 's, P: ParallelVisitorBuilder<'s>> ParallelVisitorBuilder<'s>
1128 for &'a mut P
1129 {
build(&mut self) -> Box<dyn ParallelVisitor + 's>1130 fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1131 (**self).build()
1132 }
1133 }
1134
1135 /// Receives files and directories for the current thread.
1136 ///
1137 /// Setup for the traversal can be implemented as part of
1138 /// [`ParallelVisitorBuilder::build`](trait.ParallelVisitorBuilder.html#tymethod.build).
1139 /// Teardown when traversal finishes can be implemented by implementing the
1140 /// `Drop` trait on your traversal type.
1141 pub trait ParallelVisitor: Send {
1142 /// Receives files and directories for the current thread. This is called
1143 /// once for every directory entry visited by traversal.
visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState1144 fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState;
1145 }
1146
1147 struct FnBuilder<F> {
1148 builder: F,
1149 }
1150
1151 impl<'s, F: FnMut() -> FnVisitor<'s>> ParallelVisitorBuilder<'s>
1152 for FnBuilder<F>
1153 {
build(&mut self) -> Box<dyn ParallelVisitor + 's>1154 fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1155 let visitor = (self.builder)();
1156 Box::new(FnVisitorImp { visitor })
1157 }
1158 }
1159
1160 type FnVisitor<'s> =
1161 Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 's>;
1162
1163 struct FnVisitorImp<'s> {
1164 visitor: FnVisitor<'s>,
1165 }
1166
1167 impl<'s> ParallelVisitor for FnVisitorImp<'s> {
visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState1168 fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState {
1169 (self.visitor)(entry)
1170 }
1171 }
1172
1173 /// WalkParallel is a parallel recursive directory iterator over files paths
1174 /// in one or more directories.
1175 ///
1176 /// Only file and directory paths matching the rules are returned. By default,
1177 /// ignore files like `.gitignore` are respected. The precise matching rules
1178 /// and precedence is explained in the documentation for `WalkBuilder`.
1179 ///
1180 /// Unlike `Walk`, this uses multiple threads for traversing a directory.
1181 pub struct WalkParallel {
1182 paths: vec::IntoIter<PathBuf>,
1183 ig_root: Ignore,
1184 max_filesize: Option<u64>,
1185 max_depth: Option<usize>,
1186 follow_links: bool,
1187 same_file_system: bool,
1188 threads: usize,
1189 skip: Option<Arc<Handle>>,
1190 filter: Option<Filter>,
1191 }
1192
1193 impl WalkParallel {
1194 /// Execute the parallel recursive directory iterator. `mkf` is called
1195 /// for each thread used for iteration. The function produced by `mkf`
1196 /// is then in turn called for each visited file path.
run<'s, F>(self, mkf: F) where F: FnMut() -> FnVisitor<'s>,1197 pub fn run<'s, F>(self, mkf: F)
1198 where
1199 F: FnMut() -> FnVisitor<'s>,
1200 {
1201 self.visit(&mut FnBuilder { builder: mkf })
1202 }
1203
1204 /// Execute the parallel recursive directory iterator using a custom
1205 /// visitor.
1206 ///
1207 /// The builder given is used to construct a visitor for every thread
1208 /// used by this traversal. The visitor returned from each builder is then
1209 /// called for every directory entry seen by that thread.
1210 ///
1211 /// Typically, creating a custom visitor is useful if you need to perform
1212 /// some kind of cleanup once traversal is finished. This can be achieved
1213 /// by implementing `Drop` for your builder (or for your visitor, if you
1214 /// want to execute cleanup for every thread that is launched).
1215 ///
1216 /// For example, each visitor might build up a data structure of results
1217 /// corresponding to the directory entries seen for each thread. Since each
1218 /// visitor runs on only one thread, this build-up can be done without
1219 /// synchronization. Then, once traversal is complete, all of the results
1220 /// can be merged together into a single data structure.
visit(mut self, builder: &mut dyn ParallelVisitorBuilder)1221 pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder) {
1222 let threads = self.threads();
1223 let stack = Arc::new(Mutex::new(vec![]));
1224 {
1225 let mut stack = stack.lock().unwrap();
1226 let mut visitor = builder.build();
1227 let mut paths = Vec::new().into_iter();
1228 std::mem::swap(&mut paths, &mut self.paths);
1229 // Send the initial set of root paths to the pool of workers. Note
1230 // that we only send directories. For files, we send to them the
1231 // callback directly.
1232 for path in paths {
1233 let (dent, root_device) = if path == Path::new("-") {
1234 (DirEntry::new_stdin(), None)
1235 } else {
1236 let root_device = if !self.same_file_system {
1237 None
1238 } else {
1239 match device_num(&path) {
1240 Ok(root_device) => Some(root_device),
1241 Err(err) => {
1242 let err = Error::Io(err).with_path(path);
1243 if visitor.visit(Err(err)).is_quit() {
1244 return;
1245 }
1246 continue;
1247 }
1248 }
1249 };
1250 match DirEntryRaw::from_path(0, path, false) {
1251 Ok(dent) => {
1252 (DirEntry::new_raw(dent, None), root_device)
1253 }
1254 Err(err) => {
1255 if visitor.visit(Err(err)).is_quit() {
1256 return;
1257 }
1258 continue;
1259 }
1260 }
1261 };
1262 stack.push(Message::Work(Work {
1263 dent: dent,
1264 ignore: self.ig_root.clone(),
1265 root_device: root_device,
1266 }));
1267 }
1268 // ... but there's no need to start workers if we don't need them.
1269 if stack.is_empty() {
1270 return;
1271 }
1272 }
1273 // Create the workers and then wait for them to finish.
1274 let quit_now = Arc::new(AtomicBool::new(false));
1275 let num_pending =
1276 Arc::new(AtomicUsize::new(stack.lock().unwrap().len()));
1277 crossbeam_utils::thread::scope(|s| {
1278 let mut handles = vec![];
1279 for _ in 0..threads {
1280 let worker = Worker {
1281 visitor: builder.build(),
1282 stack: stack.clone(),
1283 quit_now: quit_now.clone(),
1284 num_pending: num_pending.clone(),
1285 max_depth: self.max_depth,
1286 max_filesize: self.max_filesize,
1287 follow_links: self.follow_links,
1288 skip: self.skip.clone(),
1289 filter: self.filter.clone(),
1290 };
1291 handles.push(s.spawn(|_| worker.run()));
1292 }
1293 for handle in handles {
1294 handle.join().unwrap();
1295 }
1296 })
1297 .unwrap(); // Pass along panics from threads
1298 }
1299
threads(&self) -> usize1300 fn threads(&self) -> usize {
1301 if self.threads == 0 {
1302 2
1303 } else {
1304 self.threads
1305 }
1306 }
1307 }
1308
1309 /// Message is the set of instructions that a worker knows how to process.
1310 enum Message {
1311 /// A work item corresponds to a directory that should be descended into.
1312 /// Work items for entries that should be skipped or ignored should not
1313 /// be produced.
1314 Work(Work),
1315 /// This instruction indicates that the worker should quit.
1316 Quit,
1317 }
1318
1319 /// A unit of work for each worker to process.
1320 ///
1321 /// Each unit of work corresponds to a directory that should be descended
1322 /// into.
1323 struct Work {
1324 /// The directory entry.
1325 dent: DirEntry,
1326 /// Any ignore matchers that have been built for this directory's parents.
1327 ignore: Ignore,
1328 /// The root device number. When present, only files with the same device
1329 /// number should be considered.
1330 root_device: Option<u64>,
1331 }
1332
1333 impl Work {
1334 /// Returns true if and only if this work item is a directory.
is_dir(&self) -> bool1335 fn is_dir(&self) -> bool {
1336 self.dent.is_dir()
1337 }
1338
1339 /// Returns true if and only if this work item is a symlink.
is_symlink(&self) -> bool1340 fn is_symlink(&self) -> bool {
1341 self.dent.file_type().map_or(false, |ft| ft.is_symlink())
1342 }
1343
1344 /// Adds ignore rules for parent directories.
1345 ///
1346 /// Note that this only applies to entries at depth 0. On all other
1347 /// entries, this is a no-op.
add_parents(&mut self) -> Option<Error>1348 fn add_parents(&mut self) -> Option<Error> {
1349 if self.dent.depth() > 0 {
1350 return None;
1351 }
1352 // At depth 0, the path of this entry is a root path, so we can
1353 // use it directly to add parent ignore rules.
1354 let (ig, err) = self.ignore.add_parents(self.dent.path());
1355 self.ignore = ig;
1356 err
1357 }
1358
1359 /// Reads the directory contents of this work item and adds ignore
1360 /// rules for this directory.
1361 ///
1362 /// If there was a problem with reading the directory contents, then
1363 /// an error is returned. If there was a problem reading the ignore
1364 /// rules for this directory, then the error is attached to this
1365 /// work item's directory entry.
read_dir(&mut self) -> Result<fs::ReadDir, Error>1366 fn read_dir(&mut self) -> Result<fs::ReadDir, Error> {
1367 let readdir = match fs::read_dir(self.dent.path()) {
1368 Ok(readdir) => readdir,
1369 Err(err) => {
1370 let err = Error::from(err)
1371 .with_path(self.dent.path())
1372 .with_depth(self.dent.depth());
1373 return Err(err);
1374 }
1375 };
1376 let (ig, err) = self.ignore.add_child(self.dent.path());
1377 self.ignore = ig;
1378 self.dent.err = err;
1379 Ok(readdir)
1380 }
1381 }
1382
1383 /// A worker is responsible for descending into directories, updating the
1384 /// ignore matchers, producing new work and invoking the caller's callback.
1385 ///
1386 /// Note that a worker is *both* a producer and a consumer.
1387 struct Worker<'s> {
1388 /// The caller's callback.
1389 visitor: Box<dyn ParallelVisitor + 's>,
1390 /// A stack of work to do.
1391 ///
1392 /// We use a stack instead of a channel because a stack lets us visit
1393 /// directories in depth first order. This can substantially reduce peak
1394 /// memory usage by keeping both the number of files path and gitignore
1395 /// matchers in memory lower.
1396 stack: Arc<Mutex<Vec<Message>>>,
1397 /// Whether all workers should terminate at the next opportunity. Note
1398 /// that we need this because we don't want other `Work` to be done after
1399 /// we quit. We wouldn't need this if have a priority channel.
1400 quit_now: Arc<AtomicBool>,
1401 /// The number of outstanding work items.
1402 num_pending: Arc<AtomicUsize>,
1403 /// The maximum depth of directories to descend. A value of `0` means no
1404 /// descension at all.
1405 max_depth: Option<usize>,
1406 /// The maximum size a searched file can be (in bytes). If a file exceeds
1407 /// this size it will be skipped.
1408 max_filesize: Option<u64>,
1409 /// Whether to follow symbolic links or not. When this is enabled, loop
1410 /// detection is performed.
1411 follow_links: bool,
1412 /// A file handle to skip, currently is either `None` or stdout, if it's
1413 /// a file and it has been requested to skip files identical to stdout.
1414 skip: Option<Arc<Handle>>,
1415 /// A predicate applied to dir entries. If true, the entry and all
1416 /// children will be skipped.
1417 filter: Option<Filter>,
1418 }
1419
1420 impl<'s> Worker<'s> {
1421 /// Runs this worker until there is no more work left to do.
1422 ///
1423 /// The worker will call the caller's callback for all entries that aren't
1424 /// skipped by the ignore matcher.
run(mut self)1425 fn run(mut self) {
1426 while let Some(work) = self.get_work() {
1427 if let WalkState::Quit = self.run_one(work) {
1428 self.quit_now();
1429 }
1430 self.work_done();
1431 }
1432 }
1433
run_one(&mut self, mut work: Work) -> WalkState1434 fn run_one(&mut self, mut work: Work) -> WalkState {
1435 // If the work is not a directory, then we can just execute the
1436 // caller's callback immediately and move on.
1437 if work.is_symlink() || !work.is_dir() {
1438 return self.visitor.visit(Ok(work.dent));
1439 }
1440 if let Some(err) = work.add_parents() {
1441 let state = self.visitor.visit(Err(err));
1442 if state.is_quit() {
1443 return state;
1444 }
1445 }
1446
1447 let descend = if let Some(root_device) = work.root_device {
1448 match is_same_file_system(root_device, work.dent.path()) {
1449 Ok(true) => true,
1450 Ok(false) => false,
1451 Err(err) => {
1452 let state = self.visitor.visit(Err(err));
1453 if state.is_quit() {
1454 return state;
1455 }
1456 false
1457 }
1458 }
1459 } else {
1460 true
1461 };
1462
1463 // Try to read the directory first before we transfer ownership
1464 // to the provided closure. Do not unwrap it immediately, though,
1465 // as we may receive an `Err` value e.g. in the case when we do not
1466 // have sufficient read permissions to list the directory.
1467 // In that case we still want to provide the closure with a valid
1468 // entry before passing the error value.
1469 let readdir = work.read_dir();
1470 let depth = work.dent.depth();
1471 let state = self.visitor.visit(Ok(work.dent));
1472 if !state.is_continue() {
1473 return state;
1474 }
1475 if !descend {
1476 return WalkState::Skip;
1477 }
1478
1479 let readdir = match readdir {
1480 Ok(readdir) => readdir,
1481 Err(err) => {
1482 return self.visitor.visit(Err(err));
1483 }
1484 };
1485
1486 if self.max_depth.map_or(false, |max| depth >= max) {
1487 return WalkState::Skip;
1488 }
1489 for result in readdir {
1490 let state = self.generate_work(
1491 &work.ignore,
1492 depth + 1,
1493 work.root_device,
1494 result,
1495 );
1496 if state.is_quit() {
1497 return state;
1498 }
1499 }
1500 WalkState::Continue
1501 }
1502
1503 /// Decides whether to submit the given directory entry as a file to
1504 /// search.
1505 ///
1506 /// If the entry is a path that should be ignored, then this is a no-op.
1507 /// Otherwise, the entry is pushed on to the queue. (The actual execution
1508 /// of the callback happens in `run_one`.)
1509 ///
1510 /// If an error occurs while reading the entry, then it is sent to the
1511 /// caller's callback.
1512 ///
1513 /// `ig` is the `Ignore` matcher for the parent directory. `depth` should
1514 /// be the depth of this entry. `result` should be the item yielded by
1515 /// a directory iterator.
generate_work( &mut self, ig: &Ignore, depth: usize, root_device: Option<u64>, result: Result<fs::DirEntry, io::Error>, ) -> WalkState1516 fn generate_work(
1517 &mut self,
1518 ig: &Ignore,
1519 depth: usize,
1520 root_device: Option<u64>,
1521 result: Result<fs::DirEntry, io::Error>,
1522 ) -> WalkState {
1523 let fs_dent = match result {
1524 Ok(fs_dent) => fs_dent,
1525 Err(err) => {
1526 return self
1527 .visitor
1528 .visit(Err(Error::from(err).with_depth(depth)));
1529 }
1530 };
1531 let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) {
1532 Ok(dent) => DirEntry::new_raw(dent, None),
1533 Err(err) => {
1534 return self.visitor.visit(Err(err));
1535 }
1536 };
1537 let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink());
1538 if self.follow_links && is_symlink {
1539 let path = dent.path().to_path_buf();
1540 dent = match DirEntryRaw::from_path(depth, path, true) {
1541 Ok(dent) => DirEntry::new_raw(dent, None),
1542 Err(err) => {
1543 return self.visitor.visit(Err(err));
1544 }
1545 };
1546 if dent.is_dir() {
1547 if let Err(err) = check_symlink_loop(ig, dent.path(), depth) {
1548 return self.visitor.visit(Err(err));
1549 }
1550 }
1551 }
1552 if let Some(ref stdout) = self.skip {
1553 let is_stdout = match path_equals(&dent, stdout) {
1554 Ok(is_stdout) => is_stdout,
1555 Err(err) => return self.visitor.visit(Err(err)),
1556 };
1557 if is_stdout {
1558 return WalkState::Continue;
1559 }
1560 }
1561 let should_skip_path = should_skip_entry(ig, &dent);
1562 let should_skip_filesize =
1563 if self.max_filesize.is_some() && !dent.is_dir() {
1564 skip_filesize(
1565 self.max_filesize.unwrap(),
1566 dent.path(),
1567 &dent.metadata().ok(),
1568 )
1569 } else {
1570 false
1571 };
1572 let should_skip_filtered =
1573 if let Some(Filter(predicate)) = &self.filter {
1574 !predicate(&dent)
1575 } else {
1576 false
1577 };
1578 if !should_skip_path && !should_skip_filesize && !should_skip_filtered
1579 {
1580 self.send(Work { dent, ignore: ig.clone(), root_device });
1581 }
1582 WalkState::Continue
1583 }
1584
1585 /// Returns the next directory to descend into.
1586 ///
1587 /// If all work has been exhausted, then this returns None. The worker
1588 /// should then subsequently quit.
get_work(&mut self) -> Option<Work>1589 fn get_work(&mut self) -> Option<Work> {
1590 let mut value = self.recv();
1591 loop {
1592 // Simulate a priority channel: If quit_now flag is set, we can
1593 // receive only quit messages.
1594 if self.is_quit_now() {
1595 value = Some(Message::Quit)
1596 }
1597 match value {
1598 Some(Message::Work(work)) => {
1599 return Some(work);
1600 }
1601 Some(Message::Quit) => {
1602 // Repeat quit message to wake up sleeping threads, if
1603 // any. The domino effect will ensure that every thread
1604 // will quit.
1605 self.send_quit();
1606 return None;
1607 }
1608 None => {
1609 // Once num_pending reaches 0, it is impossible for it to
1610 // ever increase again. Namely, it only reaches 0 once
1611 // all jobs have run such that no jobs have produced more
1612 // work. We have this guarantee because num_pending is
1613 // always incremented before each job is submitted and only
1614 // decremented once each job is completely finished.
1615 // Therefore, if this reaches zero, then there can be no
1616 // other job running.
1617 if self.num_pending() == 0 {
1618 // Every other thread is blocked at the next recv().
1619 // Send the initial quit message and quit.
1620 self.send_quit();
1621 return None;
1622 }
1623 // Wait for next `Work` or `Quit` message.
1624 loop {
1625 if let Some(v) = self.recv() {
1626 value = Some(v);
1627 break;
1628 }
1629 // Our stack isn't blocking. Instead of burning the
1630 // CPU waiting, we let the thread sleep for a bit. In
1631 // general, this tends to only occur once the search is
1632 // approaching termination.
1633 thread::sleep(Duration::from_millis(1));
1634 }
1635 }
1636 }
1637 }
1638 }
1639
1640 /// Indicates that all workers should quit immediately.
quit_now(&self)1641 fn quit_now(&self) {
1642 self.quit_now.store(true, Ordering::SeqCst);
1643 }
1644
1645 /// Returns true if this worker should quit immediately.
is_quit_now(&self) -> bool1646 fn is_quit_now(&self) -> bool {
1647 self.quit_now.load(Ordering::SeqCst)
1648 }
1649
1650 /// Returns the number of pending jobs.
num_pending(&self) -> usize1651 fn num_pending(&self) -> usize {
1652 self.num_pending.load(Ordering::SeqCst)
1653 }
1654
1655 /// Send work.
send(&self, work: Work)1656 fn send(&self, work: Work) {
1657 self.num_pending.fetch_add(1, Ordering::SeqCst);
1658 let mut stack = self.stack.lock().unwrap();
1659 stack.push(Message::Work(work));
1660 }
1661
1662 /// Send a quit message.
send_quit(&self)1663 fn send_quit(&self) {
1664 let mut stack = self.stack.lock().unwrap();
1665 stack.push(Message::Quit);
1666 }
1667
1668 /// Receive work.
recv(&self) -> Option<Message>1669 fn recv(&self) -> Option<Message> {
1670 let mut stack = self.stack.lock().unwrap();
1671 stack.pop()
1672 }
1673
1674 /// Signal that work has been received.
work_done(&self)1675 fn work_done(&self) {
1676 self.num_pending.fetch_sub(1, Ordering::SeqCst);
1677 }
1678 }
1679
check_symlink_loop( ig_parent: &Ignore, child_path: &Path, child_depth: usize, ) -> Result<(), Error>1680 fn check_symlink_loop(
1681 ig_parent: &Ignore,
1682 child_path: &Path,
1683 child_depth: usize,
1684 ) -> Result<(), Error> {
1685 let hchild = Handle::from_path(child_path).map_err(|err| {
1686 Error::from(err).with_path(child_path).with_depth(child_depth)
1687 })?;
1688 for ig in ig_parent.parents().take_while(|ig| !ig.is_absolute_parent()) {
1689 let h = Handle::from_path(ig.path()).map_err(|err| {
1690 Error::from(err).with_path(child_path).with_depth(child_depth)
1691 })?;
1692 if hchild == h {
1693 return Err(Error::Loop {
1694 ancestor: ig.path().to_path_buf(),
1695 child: child_path.to_path_buf(),
1696 }
1697 .with_depth(child_depth));
1698 }
1699 }
1700 Ok(())
1701 }
1702
1703 // Before calling this function, make sure that you ensure that is really
1704 // necessary as the arguments imply a file stat.
skip_filesize( max_filesize: u64, path: &Path, ent: &Option<Metadata>, ) -> bool1705 fn skip_filesize(
1706 max_filesize: u64,
1707 path: &Path,
1708 ent: &Option<Metadata>,
1709 ) -> bool {
1710 let filesize = match *ent {
1711 Some(ref md) => Some(md.len()),
1712 None => None,
1713 };
1714
1715 if let Some(fs) = filesize {
1716 if fs > max_filesize {
1717 debug!("ignoring {}: {} bytes", path.display(), fs);
1718 true
1719 } else {
1720 false
1721 }
1722 } else {
1723 false
1724 }
1725 }
1726
should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool1727 fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool {
1728 let m = ig.matched_dir_entry(dent);
1729 if m.is_ignore() {
1730 debug!("ignoring {}: {:?}", dent.path().display(), m);
1731 true
1732 } else if m.is_whitelist() {
1733 debug!("whitelisting {}: {:?}", dent.path().display(), m);
1734 false
1735 } else {
1736 false
1737 }
1738 }
1739
1740 /// Returns a handle to stdout for filtering search.
1741 ///
1742 /// A handle is returned if and only if stdout is being redirected to a file.
1743 /// The handle returned corresponds to that file.
1744 ///
1745 /// This can be used to ensure that we do not attempt to search a file that we
1746 /// may also be writing to.
stdout_handle() -> Option<Handle>1747 fn stdout_handle() -> Option<Handle> {
1748 let h = match Handle::stdout() {
1749 Err(_) => return None,
1750 Ok(h) => h,
1751 };
1752 let md = match h.as_file().metadata() {
1753 Err(_) => return None,
1754 Ok(md) => md,
1755 };
1756 if !md.is_file() {
1757 return None;
1758 }
1759 Some(h)
1760 }
1761
1762 /// Returns true if and only if the given directory entry is believed to be
1763 /// equivalent to the given handle. If there was a problem querying the path
1764 /// for information to determine equality, then that error is returned.
path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error>1765 fn path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error> {
1766 #[cfg(unix)]
1767 fn never_equal(dent: &DirEntry, handle: &Handle) -> bool {
1768 dent.ino() != Some(handle.ino())
1769 }
1770
1771 #[cfg(not(unix))]
1772 fn never_equal(_: &DirEntry, _: &Handle) -> bool {
1773 false
1774 }
1775
1776 // If we know for sure that these two things aren't equal, then avoid
1777 // the costly extra stat call to determine equality.
1778 if dent.is_stdin() || never_equal(dent, handle) {
1779 return Ok(false);
1780 }
1781 Handle::from_path(dent.path())
1782 .map(|h| &h == handle)
1783 .map_err(|err| Error::Io(err).with_path(dent.path()))
1784 }
1785
1786 /// Returns true if the given walkdir entry corresponds to a directory.
1787 ///
1788 /// This is normally just `dent.file_type().is_dir()`, but when we aren't
1789 /// following symlinks, the root directory entry may be a symlink to a
1790 /// directory that we *do* follow---by virtue of it being specified by the user
1791 /// explicitly. In that case, we need to follow the symlink and query whether
1792 /// it's a directory or not. But we only do this for root entries to avoid an
1793 /// additional stat check in most cases.
walkdir_is_dir(dent: &walkdir::DirEntry) -> bool1794 fn walkdir_is_dir(dent: &walkdir::DirEntry) -> bool {
1795 if dent.file_type().is_dir() {
1796 return true;
1797 }
1798 if !dent.file_type().is_symlink() || dent.depth() > 0 {
1799 return false;
1800 }
1801 dent.path().metadata().ok().map_or(false, |md| md.file_type().is_dir())
1802 }
1803
1804 /// Returns true if and only if the given path is on the same device as the
1805 /// given root device.
is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error>1806 fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
1807 let dent_device =
1808 device_num(path).map_err(|err| Error::Io(err).with_path(path))?;
1809 Ok(root_device == dent_device)
1810 }
1811
1812 #[cfg(unix)]
device_num<P: AsRef<Path>>(path: P) -> io::Result<u64>1813 fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1814 use std::os::unix::fs::MetadataExt;
1815
1816 path.as_ref().metadata().map(|md| md.dev())
1817 }
1818
1819 #[cfg(windows)]
device_num<P: AsRef<Path>>(path: P) -> io::Result<u64>1820 fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1821 use winapi_util::{file, Handle};
1822
1823 let h = Handle::from_path_any(path)?;
1824 file::information(h).map(|info| info.volume_serial_number())
1825 }
1826
1827 #[cfg(not(any(unix, windows)))]
device_num<P: AsRef<Path>>(_: P) -> io::Result<u64>1828 fn device_num<P: AsRef<Path>>(_: P) -> io::Result<u64> {
1829 Err(io::Error::new(
1830 io::ErrorKind::Other,
1831 "walkdir: same_file_system option not supported on this platform",
1832 ))
1833 }
1834
1835 #[cfg(test)]
1836 mod tests {
1837 use std::ffi::OsStr;
1838 use std::fs::{self, File};
1839 use std::io::Write;
1840 use std::path::Path;
1841 use std::sync::{Arc, Mutex};
1842
1843 use super::{DirEntry, WalkBuilder, WalkState};
1844 use tests::TempDir;
1845
wfile<P: AsRef<Path>>(path: P, contents: &str)1846 fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
1847 let mut file = File::create(path).unwrap();
1848 file.write_all(contents.as_bytes()).unwrap();
1849 }
1850
wfile_size<P: AsRef<Path>>(path: P, size: u64)1851 fn wfile_size<P: AsRef<Path>>(path: P, size: u64) {
1852 let file = File::create(path).unwrap();
1853 file.set_len(size).unwrap();
1854 }
1855
1856 #[cfg(unix)]
symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q)1857 fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) {
1858 use std::os::unix::fs::symlink;
1859 symlink(src, dst).unwrap();
1860 }
1861
mkdirp<P: AsRef<Path>>(path: P)1862 fn mkdirp<P: AsRef<Path>>(path: P) {
1863 fs::create_dir_all(path).unwrap();
1864 }
1865
normal_path(unix: &str) -> String1866 fn normal_path(unix: &str) -> String {
1867 if cfg!(windows) {
1868 unix.replace("\\", "/")
1869 } else {
1870 unix.to_string()
1871 }
1872 }
1873
1874 fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
1875 let mut paths = vec![];
1876 for result in builder.build() {
1877 let dent = match result {
1878 Err(_) => continue,
1879 Ok(dent) => dent,
1880 };
1881 let path = dent.path().strip_prefix(prefix).unwrap();
1882 if path.as_os_str().is_empty() {
1883 continue;
1884 }
1885 paths.push(normal_path(path.to_str().unwrap()));
1886 }
1887 paths.sort();
1888 paths
1889 }
1890
1891 fn walk_collect_parallel(
1892 prefix: &Path,
1893 builder: &WalkBuilder,
1894 ) -> Vec<String> {
1895 let mut paths = vec![];
1896 for dent in walk_collect_entries_parallel(builder) {
1897 let path = dent.path().strip_prefix(prefix).unwrap();
1898 if path.as_os_str().is_empty() {
1899 continue;
1900 }
1901 paths.push(normal_path(path.to_str().unwrap()));
1902 }
1903 paths.sort();
1904 paths
1905 }
1906
1907 fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec<DirEntry> {
1908 let dents = Arc::new(Mutex::new(vec![]));
1909 builder.build_parallel().run(|| {
1910 let dents = dents.clone();
1911 Box::new(move |result| {
1912 if let Ok(dent) = result {
1913 dents.lock().unwrap().push(dent);
1914 }
1915 WalkState::Continue
1916 })
1917 });
1918
1919 let dents = dents.lock().unwrap();
1920 dents.to_vec()
1921 }
1922
1923 fn mkpaths(paths: &[&str]) -> Vec<String> {
1924 let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect();
1925 paths.sort();
1926 paths
1927 }
1928
1929 fn tmpdir() -> TempDir {
1930 TempDir::new().unwrap()
1931 }
1932
1933 fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) {
1934 let got = walk_collect(prefix, builder);
1935 assert_eq!(got, mkpaths(expected), "single threaded");
1936 let got = walk_collect_parallel(prefix, builder);
1937 assert_eq!(got, mkpaths(expected), "parallel");
1938 }
1939
1940 #[test]
1941 fn no_ignores() {
1942 let td = tmpdir();
1943 mkdirp(td.path().join("a/b/c"));
1944 mkdirp(td.path().join("x/y"));
1945 wfile(td.path().join("a/b/foo"), "");
1946 wfile(td.path().join("x/y/foo"), "");
1947
1948 assert_paths(
1949 td.path(),
1950 &WalkBuilder::new(td.path()),
1951 &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
1952 );
1953 }
1954
1955 #[test]
1956 fn custom_ignore() {
1957 let td = tmpdir();
1958 let custom_ignore = ".customignore";
1959 mkdirp(td.path().join("a"));
1960 wfile(td.path().join(custom_ignore), "foo");
1961 wfile(td.path().join("foo"), "");
1962 wfile(td.path().join("a/foo"), "");
1963 wfile(td.path().join("bar"), "");
1964 wfile(td.path().join("a/bar"), "");
1965
1966 let mut builder = WalkBuilder::new(td.path());
1967 builder.add_custom_ignore_filename(&custom_ignore);
1968 assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
1969 }
1970
1971 #[test]
1972 fn custom_ignore_exclusive_use() {
1973 let td = tmpdir();
1974 let custom_ignore = ".customignore";
1975 mkdirp(td.path().join("a"));
1976 wfile(td.path().join(custom_ignore), "foo");
1977 wfile(td.path().join("foo"), "");
1978 wfile(td.path().join("a/foo"), "");
1979 wfile(td.path().join("bar"), "");
1980 wfile(td.path().join("a/bar"), "");
1981
1982 let mut builder = WalkBuilder::new(td.path());
1983 builder.ignore(false);
1984 builder.git_ignore(false);
1985 builder.git_global(false);
1986 builder.git_exclude(false);
1987 builder.add_custom_ignore_filename(&custom_ignore);
1988 assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
1989 }
1990
1991 #[test]
1992 fn gitignore() {
1993 let td = tmpdir();
1994 mkdirp(td.path().join(".git"));
1995 mkdirp(td.path().join("a"));
1996 wfile(td.path().join(".gitignore"), "foo");
1997 wfile(td.path().join("foo"), "");
1998 wfile(td.path().join("a/foo"), "");
1999 wfile(td.path().join("bar"), "");
2000 wfile(td.path().join("a/bar"), "");
2001
2002 assert_paths(
2003 td.path(),
2004 &WalkBuilder::new(td.path()),
2005 &["bar", "a", "a/bar"],
2006 );
2007 }
2008
2009 #[test]
2010 fn explicit_ignore() {
2011 let td = tmpdir();
2012 let igpath = td.path().join(".not-an-ignore");
2013 mkdirp(td.path().join("a"));
2014 wfile(&igpath, "foo");
2015 wfile(td.path().join("foo"), "");
2016 wfile(td.path().join("a/foo"), "");
2017 wfile(td.path().join("bar"), "");
2018 wfile(td.path().join("a/bar"), "");
2019
2020 let mut builder = WalkBuilder::new(td.path());
2021 assert!(builder.add_ignore(&igpath).is_none());
2022 assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2023 }
2024
2025 #[test]
2026 fn explicit_ignore_exclusive_use() {
2027 let td = tmpdir();
2028 let igpath = td.path().join(".not-an-ignore");
2029 mkdirp(td.path().join("a"));
2030 wfile(&igpath, "foo");
2031 wfile(td.path().join("foo"), "");
2032 wfile(td.path().join("a/foo"), "");
2033 wfile(td.path().join("bar"), "");
2034 wfile(td.path().join("a/bar"), "");
2035
2036 let mut builder = WalkBuilder::new(td.path());
2037 builder.standard_filters(false);
2038 assert!(builder.add_ignore(&igpath).is_none());
2039 assert_paths(
2040 td.path(),
2041 &builder,
2042 &[".not-an-ignore", "bar", "a", "a/bar"],
2043 );
2044 }
2045
2046 #[test]
2047 fn gitignore_parent() {
2048 let td = tmpdir();
2049 mkdirp(td.path().join(".git"));
2050 mkdirp(td.path().join("a"));
2051 wfile(td.path().join(".gitignore"), "foo");
2052 wfile(td.path().join("a/foo"), "");
2053 wfile(td.path().join("a/bar"), "");
2054
2055 let root = td.path().join("a");
2056 assert_paths(&root, &WalkBuilder::new(&root), &["bar"]);
2057 }
2058
2059 #[test]
2060 fn max_depth() {
2061 let td = tmpdir();
2062 mkdirp(td.path().join("a/b/c"));
2063 wfile(td.path().join("foo"), "");
2064 wfile(td.path().join("a/foo"), "");
2065 wfile(td.path().join("a/b/foo"), "");
2066 wfile(td.path().join("a/b/c/foo"), "");
2067
2068 let mut builder = WalkBuilder::new(td.path());
2069 assert_paths(
2070 td.path(),
2071 &builder,
2072 &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
2073 );
2074 assert_paths(td.path(), builder.max_depth(Some(0)), &[]);
2075 assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]);
2076 assert_paths(
2077 td.path(),
2078 builder.max_depth(Some(2)),
2079 &["a", "a/b", "foo", "a/foo"],
2080 );
2081 }
2082
2083 #[test]
2084 fn max_filesize() {
2085 let td = tmpdir();
2086 mkdirp(td.path().join("a/b"));
2087 wfile_size(td.path().join("foo"), 0);
2088 wfile_size(td.path().join("bar"), 400);
2089 wfile_size(td.path().join("baz"), 600);
2090 wfile_size(td.path().join("a/foo"), 600);
2091 wfile_size(td.path().join("a/bar"), 500);
2092 wfile_size(td.path().join("a/baz"), 200);
2093
2094 let mut builder = WalkBuilder::new(td.path());
2095 assert_paths(
2096 td.path(),
2097 &builder,
2098 &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2099 );
2100 assert_paths(
2101 td.path(),
2102 builder.max_filesize(Some(0)),
2103 &["a", "a/b", "foo"],
2104 );
2105 assert_paths(
2106 td.path(),
2107 builder.max_filesize(Some(500)),
2108 &["a", "a/b", "foo", "bar", "a/bar", "a/baz"],
2109 );
2110 assert_paths(
2111 td.path(),
2112 builder.max_filesize(Some(50000)),
2113 &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2114 );
2115 }
2116
2117 #[cfg(unix)] // because symlinks on windows are weird
2118 #[test]
2119 fn symlinks() {
2120 let td = tmpdir();
2121 mkdirp(td.path().join("a/b"));
2122 symlink(td.path().join("a/b"), td.path().join("z"));
2123 wfile(td.path().join("a/b/foo"), "");
2124
2125 let mut builder = WalkBuilder::new(td.path());
2126 assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]);
2127 assert_paths(
2128 td.path(),
2129 &builder.follow_links(true),
2130 &["a", "a/b", "a/b/foo", "z", "z/foo"],
2131 );
2132 }
2133
2134 #[cfg(unix)] // because symlinks on windows are weird
2135 #[test]
2136 fn first_path_not_symlink() {
2137 let td = tmpdir();
2138 mkdirp(td.path().join("foo"));
2139
2140 let dents = WalkBuilder::new(td.path().join("foo"))
2141 .build()
2142 .into_iter()
2143 .collect::<Result<Vec<_>, _>>()
2144 .unwrap();
2145 assert_eq!(1, dents.len());
2146 assert!(!dents[0].path_is_symlink());
2147
2148 let dents = walk_collect_entries_parallel(&WalkBuilder::new(
2149 td.path().join("foo"),
2150 ));
2151 assert_eq!(1, dents.len());
2152 assert!(!dents[0].path_is_symlink());
2153 }
2154
2155 #[cfg(unix)] // because symlinks on windows are weird
2156 #[test]
2157 fn symlink_loop() {
2158 let td = tmpdir();
2159 mkdirp(td.path().join("a/b"));
2160 symlink(td.path().join("a"), td.path().join("a/b/c"));
2161
2162 let mut builder = WalkBuilder::new(td.path());
2163 assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]);
2164 assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]);
2165 }
2166
2167 // It's a little tricky to test the 'same_file_system' option since
2168 // we need an environment with more than one file system. We adopt a
2169 // heuristic where /sys is typically a distinct volume on Linux and roll
2170 // with that.
2171 #[test]
2172 #[cfg(target_os = "linux")]
2173 fn same_file_system() {
2174 use super::device_num;
2175
2176 // If for some reason /sys doesn't exist or isn't a directory, just
2177 // skip this test.
2178 if !Path::new("/sys").is_dir() {
2179 return;
2180 }
2181
2182 // If our test directory actually isn't a different volume from /sys,
2183 // then this test is meaningless and we shouldn't run it.
2184 let td = tmpdir();
2185 if device_num(td.path()).unwrap() == device_num("/sys").unwrap() {
2186 return;
2187 }
2188
2189 mkdirp(td.path().join("same_file"));
2190 symlink("/sys", td.path().join("same_file").join("alink"));
2191
2192 // Create a symlink to sys and enable following symlinks. If the
2193 // same_file_system option doesn't work, then this probably will hit a
2194 // permission error. Otherwise, it should just skip over the symlink
2195 // completely.
2196 let mut builder = WalkBuilder::new(td.path());
2197 builder.follow_links(true).same_file_system(true);
2198 assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]);
2199 }
2200
2201 #[cfg(target_os = "linux")]
2202 #[test]
2203 fn no_read_permissions() {
2204 let dir_path = Path::new("/root");
2205
2206 // There's no /etc/sudoers.d, skip the test.
2207 if !dir_path.is_dir() {
2208 return;
2209 }
2210 // We're the root, so the test won't check what we want it to.
2211 if fs::read_dir(&dir_path).is_ok() {
2212 return;
2213 }
2214
2215 // Check that we can't descend but get an entry for the parent dir.
2216 let builder = WalkBuilder::new(&dir_path);
2217 assert_paths(dir_path.parent().unwrap(), &builder, &["root"]);
2218 }
2219
2220 #[test]
2221 fn filter() {
2222 let td = tmpdir();
2223 mkdirp(td.path().join("a/b/c"));
2224 mkdirp(td.path().join("x/y"));
2225 wfile(td.path().join("a/b/foo"), "");
2226 wfile(td.path().join("x/y/foo"), "");
2227
2228 assert_paths(
2229 td.path(),
2230 &WalkBuilder::new(td.path()),
2231 &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
2232 );
2233
2234 assert_paths(
2235 td.path(),
2236 &WalkBuilder::new(td.path())
2237 .filter_entry(|entry| entry.file_name() != OsStr::new("a")),
2238 &["x", "x/y", "x/y/foo"],
2239 );
2240 }
2241 }
2242