1 use std::cmp;
2 use std::ffi::OsStr;
3 use std::fmt;
4 use std::fs::{self, FileType, Metadata};
5 use std::io;
6 use std::path::{Path, PathBuf};
7 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
8 use std::sync::{Arc, Mutex};
9 use std::thread;
10 use std::time::Duration;
11 use std::vec;
12 
13 use same_file::Handle;
14 use walkdir::{self, WalkDir};
15 
16 use dir::{Ignore, IgnoreBuilder};
17 use gitignore::GitignoreBuilder;
18 use overrides::Override;
19 use types::Types;
20 use {Error, PartialErrorBuilder};
21 
22 /// A directory entry with a possible error attached.
23 ///
24 /// The error typically refers to a problem parsing ignore files in a
25 /// particular directory.
26 #[derive(Clone, Debug)]
27 pub struct DirEntry {
28     dent: DirEntryInner,
29     err: Option<Error>,
30 }
31 
32 impl DirEntry {
33     /// The full path that this entry represents.
path(&self) -> &Path34     pub fn path(&self) -> &Path {
35         self.dent.path()
36     }
37 
38     /// The full path that this entry represents.
39     /// Analogous to [`path`], but moves ownership of the path.
40     ///
41     /// [`path`]: struct.DirEntry.html#method.path
into_path(self) -> PathBuf42     pub fn into_path(self) -> PathBuf {
43         self.dent.into_path()
44     }
45 
46     /// Whether this entry corresponds to a symbolic link or not.
path_is_symlink(&self) -> bool47     pub fn path_is_symlink(&self) -> bool {
48         self.dent.path_is_symlink()
49     }
50 
51     /// Returns true if and only if this entry corresponds to stdin.
52     ///
53     /// i.e., The entry has depth 0 and its file name is `-`.
is_stdin(&self) -> bool54     pub fn is_stdin(&self) -> bool {
55         self.dent.is_stdin()
56     }
57 
58     /// Return the metadata for the file that this entry points to.
metadata(&self) -> Result<Metadata, Error>59     pub fn metadata(&self) -> Result<Metadata, Error> {
60         self.dent.metadata()
61     }
62 
63     /// Return the file type for the file that this entry points to.
64     ///
65     /// This entry doesn't have a file type if it corresponds to stdin.
file_type(&self) -> Option<FileType>66     pub fn file_type(&self) -> Option<FileType> {
67         self.dent.file_type()
68     }
69 
70     /// Return the file name of this entry.
71     ///
72     /// If this entry has no file name (e.g., `/`), then the full path is
73     /// returned.
file_name(&self) -> &OsStr74     pub fn file_name(&self) -> &OsStr {
75         self.dent.file_name()
76     }
77 
78     /// Returns the depth at which this entry was created relative to the root.
depth(&self) -> usize79     pub fn depth(&self) -> usize {
80         self.dent.depth()
81     }
82 
83     /// Returns the underlying inode number if one exists.
84     ///
85     /// If this entry doesn't have an inode number, then `None` is returned.
86     #[cfg(unix)]
ino(&self) -> Option<u64>87     pub fn ino(&self) -> Option<u64> {
88         self.dent.ino()
89     }
90 
91     /// Returns an error, if one exists, associated with processing this entry.
92     ///
93     /// An example of an error is one that occurred while parsing an ignore
94     /// file. Errors related to traversing a directory tree itself are reported
95     /// as part of yielding the directory entry, and not with this method.
error(&self) -> Option<&Error>96     pub fn error(&self) -> Option<&Error> {
97         self.err.as_ref()
98     }
99 
100     /// Returns true if and only if this entry points to a directory.
is_dir(&self) -> bool101     pub(crate) fn is_dir(&self) -> bool {
102         self.dent.is_dir()
103     }
104 
new_stdin() -> DirEntry105     fn new_stdin() -> DirEntry {
106         DirEntry { dent: DirEntryInner::Stdin, err: None }
107     }
108 
new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry109     fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry {
110         DirEntry { dent: DirEntryInner::Walkdir(dent), err: err }
111     }
112 
new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry113     fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry {
114         DirEntry { dent: DirEntryInner::Raw(dent), err: err }
115     }
116 }
117 
118 /// DirEntryInner is the implementation of DirEntry.
119 ///
120 /// It specifically represents three distinct sources of directory entries:
121 ///
122 /// 1. From the walkdir crate.
123 /// 2. Special entries that represent things like stdin.
124 /// 3. From a path.
125 ///
126 /// Specifically, (3) has to essentially re-create the DirEntry implementation
127 /// from WalkDir.
128 #[derive(Clone, Debug)]
129 enum DirEntryInner {
130     Stdin,
131     Walkdir(walkdir::DirEntry),
132     Raw(DirEntryRaw),
133 }
134 
135 impl DirEntryInner {
path(&self) -> &Path136     fn path(&self) -> &Path {
137         use self::DirEntryInner::*;
138         match *self {
139             Stdin => Path::new("<stdin>"),
140             Walkdir(ref x) => x.path(),
141             Raw(ref x) => x.path(),
142         }
143     }
144 
into_path(self) -> PathBuf145     fn into_path(self) -> PathBuf {
146         use self::DirEntryInner::*;
147         match self {
148             Stdin => PathBuf::from("<stdin>"),
149             Walkdir(x) => x.into_path(),
150             Raw(x) => x.into_path(),
151         }
152     }
153 
path_is_symlink(&self) -> bool154     fn path_is_symlink(&self) -> bool {
155         use self::DirEntryInner::*;
156         match *self {
157             Stdin => false,
158             Walkdir(ref x) => x.path_is_symlink(),
159             Raw(ref x) => x.path_is_symlink(),
160         }
161     }
162 
is_stdin(&self) -> bool163     fn is_stdin(&self) -> bool {
164         match *self {
165             DirEntryInner::Stdin => true,
166             _ => false,
167         }
168     }
169 
metadata(&self) -> Result<Metadata, Error>170     fn metadata(&self) -> Result<Metadata, Error> {
171         use self::DirEntryInner::*;
172         match *self {
173             Stdin => {
174                 let err = Error::Io(io::Error::new(
175                     io::ErrorKind::Other,
176                     "<stdin> has no metadata",
177                 ));
178                 Err(err.with_path("<stdin>"))
179             }
180             Walkdir(ref x) => x.metadata().map_err(|err| {
181                 Error::Io(io::Error::from(err)).with_path(x.path())
182             }),
183             Raw(ref x) => x.metadata(),
184         }
185     }
186 
file_type(&self) -> Option<FileType>187     fn file_type(&self) -> Option<FileType> {
188         use self::DirEntryInner::*;
189         match *self {
190             Stdin => None,
191             Walkdir(ref x) => Some(x.file_type()),
192             Raw(ref x) => Some(x.file_type()),
193         }
194     }
195 
file_name(&self) -> &OsStr196     fn file_name(&self) -> &OsStr {
197         use self::DirEntryInner::*;
198         match *self {
199             Stdin => OsStr::new("<stdin>"),
200             Walkdir(ref x) => x.file_name(),
201             Raw(ref x) => x.file_name(),
202         }
203     }
204 
depth(&self) -> usize205     fn depth(&self) -> usize {
206         use self::DirEntryInner::*;
207         match *self {
208             Stdin => 0,
209             Walkdir(ref x) => x.depth(),
210             Raw(ref x) => x.depth(),
211         }
212     }
213 
214     #[cfg(unix)]
ino(&self) -> Option<u64>215     fn ino(&self) -> Option<u64> {
216         use self::DirEntryInner::*;
217         use walkdir::DirEntryExt;
218         match *self {
219             Stdin => None,
220             Walkdir(ref x) => Some(x.ino()),
221             Raw(ref x) => Some(x.ino()),
222         }
223     }
224 
225     /// Returns true if and only if this entry points to a directory.
is_dir(&self) -> bool226     fn is_dir(&self) -> bool {
227         self.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
228     }
229 }
230 
231 /// DirEntryRaw is essentially copied from the walkdir crate so that we can
232 /// build `DirEntry`s from whole cloth in the parallel iterator.
233 #[derive(Clone)]
234 struct DirEntryRaw {
235     /// The path as reported by the `fs::ReadDir` iterator (even if it's a
236     /// symbolic link).
237     path: PathBuf,
238     /// The file type. Necessary for recursive iteration, so store it.
239     ty: FileType,
240     /// Is set when this entry was created from a symbolic link and the user
241     /// expects the iterator to follow symbolic links.
242     follow_link: bool,
243     /// The depth at which this entry was generated relative to the root.
244     depth: usize,
245     /// The underlying inode number (Unix only).
246     #[cfg(unix)]
247     ino: u64,
248     /// The underlying metadata (Windows only). We store this on Windows
249     /// because this comes for free while reading a directory.
250     #[cfg(windows)]
251     metadata: fs::Metadata,
252 }
253 
254 impl fmt::Debug for DirEntryRaw {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result255     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
256         // Leaving out FileType because it doesn't have a debug impl
257         // in Rust 1.9. We could add it if we really wanted to by manually
258         // querying each possibly file type. Meh. ---AG
259         f.debug_struct("DirEntryRaw")
260             .field("path", &self.path)
261             .field("follow_link", &self.follow_link)
262             .field("depth", &self.depth)
263             .finish()
264     }
265 }
266 
267 impl DirEntryRaw {
path(&self) -> &Path268     fn path(&self) -> &Path {
269         &self.path
270     }
271 
into_path(self) -> PathBuf272     fn into_path(self) -> PathBuf {
273         self.path
274     }
275 
path_is_symlink(&self) -> bool276     fn path_is_symlink(&self) -> bool {
277         self.ty.is_symlink() || self.follow_link
278     }
279 
metadata(&self) -> Result<Metadata, Error>280     fn metadata(&self) -> Result<Metadata, Error> {
281         self.metadata_internal()
282     }
283 
284     #[cfg(windows)]
metadata_internal(&self) -> Result<fs::Metadata, Error>285     fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
286         if self.follow_link {
287             fs::metadata(&self.path)
288         } else {
289             Ok(self.metadata.clone())
290         }
291         .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
292     }
293 
294     #[cfg(not(windows))]
metadata_internal(&self) -> Result<fs::Metadata, Error>295     fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
296         if self.follow_link {
297             fs::metadata(&self.path)
298         } else {
299             fs::symlink_metadata(&self.path)
300         }
301         .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
302     }
303 
file_type(&self) -> FileType304     fn file_type(&self) -> FileType {
305         self.ty
306     }
307 
file_name(&self) -> &OsStr308     fn file_name(&self) -> &OsStr {
309         self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
310     }
311 
depth(&self) -> usize312     fn depth(&self) -> usize {
313         self.depth
314     }
315 
316     #[cfg(unix)]
ino(&self) -> u64317     fn ino(&self) -> u64 {
318         self.ino
319     }
320 
from_entry( depth: usize, ent: &fs::DirEntry, ) -> Result<DirEntryRaw, Error>321     fn from_entry(
322         depth: usize,
323         ent: &fs::DirEntry,
324     ) -> Result<DirEntryRaw, Error> {
325         let ty = ent.file_type().map_err(|err| {
326             let err = Error::Io(io::Error::from(err)).with_path(ent.path());
327             Error::WithDepth { depth: depth, err: Box::new(err) }
328         })?;
329         DirEntryRaw::from_entry_os(depth, ent, ty)
330     }
331 
332     #[cfg(windows)]
from_entry_os( depth: usize, ent: &fs::DirEntry, ty: fs::FileType, ) -> Result<DirEntryRaw, Error>333     fn from_entry_os(
334         depth: usize,
335         ent: &fs::DirEntry,
336         ty: fs::FileType,
337     ) -> Result<DirEntryRaw, Error> {
338         let md = ent.metadata().map_err(|err| {
339             let err = Error::Io(io::Error::from(err)).with_path(ent.path());
340             Error::WithDepth { depth: depth, err: Box::new(err) }
341         })?;
342         Ok(DirEntryRaw {
343             path: ent.path(),
344             ty: ty,
345             follow_link: false,
346             depth: depth,
347             metadata: md,
348         })
349     }
350 
351     #[cfg(unix)]
from_entry_os( depth: usize, ent: &fs::DirEntry, ty: fs::FileType, ) -> Result<DirEntryRaw, Error>352     fn from_entry_os(
353         depth: usize,
354         ent: &fs::DirEntry,
355         ty: fs::FileType,
356     ) -> Result<DirEntryRaw, Error> {
357         use std::os::unix::fs::DirEntryExt;
358 
359         Ok(DirEntryRaw {
360             path: ent.path(),
361             ty: ty,
362             follow_link: false,
363             depth: depth,
364             ino: ent.ino(),
365         })
366     }
367 
368     // Placeholder implementation to allow compiling on non-standard platforms
369     // (e.g. wasm32).
370     #[cfg(not(any(windows, unix)))]
from_entry_os( depth: usize, ent: &fs::DirEntry, ty: fs::FileType, ) -> Result<DirEntryRaw, Error>371     fn from_entry_os(
372         depth: usize,
373         ent: &fs::DirEntry,
374         ty: fs::FileType,
375     ) -> Result<DirEntryRaw, Error> {
376         Err(Error::Io(io::Error::new(
377             io::ErrorKind::Other,
378             "unsupported platform",
379         )))
380     }
381 
382     #[cfg(windows)]
from_path( depth: usize, pb: PathBuf, link: bool, ) -> Result<DirEntryRaw, Error>383     fn from_path(
384         depth: usize,
385         pb: PathBuf,
386         link: bool,
387     ) -> Result<DirEntryRaw, Error> {
388         let md =
389             fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
390         Ok(DirEntryRaw {
391             path: pb,
392             ty: md.file_type(),
393             follow_link: link,
394             depth: depth,
395             metadata: md,
396         })
397     }
398 
399     #[cfg(unix)]
from_path( depth: usize, pb: PathBuf, link: bool, ) -> Result<DirEntryRaw, Error>400     fn from_path(
401         depth: usize,
402         pb: PathBuf,
403         link: bool,
404     ) -> Result<DirEntryRaw, Error> {
405         use std::os::unix::fs::MetadataExt;
406 
407         let md =
408             fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
409         Ok(DirEntryRaw {
410             path: pb,
411             ty: md.file_type(),
412             follow_link: link,
413             depth: depth,
414             ino: md.ino(),
415         })
416     }
417 
418     // Placeholder implementation to allow compiling on non-standard platforms
419     // (e.g. wasm32).
420     #[cfg(not(any(windows, unix)))]
from_path( depth: usize, pb: PathBuf, link: bool, ) -> Result<DirEntryRaw, Error>421     fn from_path(
422         depth: usize,
423         pb: PathBuf,
424         link: bool,
425     ) -> Result<DirEntryRaw, Error> {
426         Err(Error::Io(io::Error::new(
427             io::ErrorKind::Other,
428             "unsupported platform",
429         )))
430     }
431 }
432 
433 /// WalkBuilder builds a recursive directory iterator.
434 ///
435 /// The builder supports a large number of configurable options. This includes
436 /// specific glob overrides, file type matching, toggling whether hidden
437 /// files are ignored or not, and of course, support for respecting gitignore
438 /// files.
439 ///
440 /// By default, all ignore files found are respected. This includes `.ignore`,
441 /// `.gitignore`, `.git/info/exclude` and even your global gitignore
442 /// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
443 ///
444 /// Some standard recursive directory options are also supported, such as
445 /// limiting the recursive depth or whether to follow symbolic links (disabled
446 /// by default).
447 ///
448 /// # Ignore rules
449 ///
450 /// There are many rules that influence whether a particular file or directory
451 /// is skipped by this iterator. Those rules are documented here. Note that
452 /// the rules assume a default configuration.
453 ///
454 /// * First, glob overrides are checked. If a path matches a glob override,
455 /// then matching stops. The path is then only skipped if the glob that matched
456 /// the path is an ignore glob. (An override glob is a whitelist glob unless it
457 /// starts with a `!`, in which case it is an ignore glob.)
458 /// * Second, ignore files are checked. Ignore files currently only come from
459 /// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
460 /// global gitignore file), plain `.ignore` files, which have the same format
461 /// as gitignore files, or explicitly added ignore files. The precedence order
462 /// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
463 /// finally explicitly added ignore files. Note that precedence between
464 /// different types of ignore files is not impacted by the directory hierarchy;
465 /// any `.ignore` file overrides all `.gitignore` files. Within each precedence
466 /// level, more nested ignore files have a higher precedence than less nested
467 /// ignore files.
468 /// * Third, if the previous step yields an ignore match, then all matching
469 /// is stopped and the path is skipped. If it yields a whitelist match, then
470 /// matching continues. A whitelist match can be overridden by a later matcher.
471 /// * Fourth, unless the path is a directory, the file type matcher is run on
472 /// the path. As above, if it yields an ignore match, then all matching is
473 /// stopped and the path is skipped. If it yields a whitelist match, then
474 /// matching continues.
475 /// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
476 /// path is skipped.
477 /// * Sixth, unless the path is a directory, the size of the file is compared
478 /// against the max filesize limit. If it exceeds the limit, it is skipped.
479 /// * Seventh, if the path has made it this far then it is yielded in the
480 /// iterator.
481 #[derive(Clone)]
482 pub struct WalkBuilder {
483     paths: Vec<PathBuf>,
484     ig_builder: IgnoreBuilder,
485     max_depth: Option<usize>,
486     max_filesize: Option<u64>,
487     follow_links: bool,
488     same_file_system: bool,
489     sorter: Option<Sorter>,
490     threads: usize,
491     skip: Option<Arc<Handle>>,
492     filter: Option<Filter>,
493 }
494 
495 #[derive(Clone)]
496 enum Sorter {
497     ByName(
498         Arc<dyn Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static>,
499     ),
500     ByPath(Arc<dyn Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static>),
501 }
502 
503 #[derive(Clone)]
504 struct Filter(Arc<dyn Fn(&DirEntry) -> bool + Send + Sync + 'static>);
505 
506 impl fmt::Debug for WalkBuilder {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result507     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
508         f.debug_struct("WalkBuilder")
509             .field("paths", &self.paths)
510             .field("ig_builder", &self.ig_builder)
511             .field("max_depth", &self.max_depth)
512             .field("max_filesize", &self.max_filesize)
513             .field("follow_links", &self.follow_links)
514             .field("threads", &self.threads)
515             .field("skip", &self.skip)
516             .finish()
517     }
518 }
519 
520 impl WalkBuilder {
521     /// Create a new builder for a recursive directory iterator for the
522     /// directory given.
523     ///
524     /// Note that if you want to traverse multiple different directories, it
525     /// is better to call `add` on this builder than to create multiple
526     /// `Walk` values.
new<P: AsRef<Path>>(path: P) -> WalkBuilder527     pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
528         WalkBuilder {
529             paths: vec![path.as_ref().to_path_buf()],
530             ig_builder: IgnoreBuilder::new(),
531             max_depth: None,
532             max_filesize: None,
533             follow_links: false,
534             same_file_system: false,
535             sorter: None,
536             threads: 0,
537             skip: None,
538             filter: None,
539         }
540     }
541 
542     /// Build a new `Walk` iterator.
build(&self) -> Walk543     pub fn build(&self) -> Walk {
544         let follow_links = self.follow_links;
545         let max_depth = self.max_depth;
546         let sorter = self.sorter.clone();
547         let its = self
548             .paths
549             .iter()
550             .map(move |p| {
551                 if p == Path::new("-") {
552                     (p.to_path_buf(), None)
553                 } else {
554                     let mut wd = WalkDir::new(p);
555                     wd = wd.follow_links(follow_links || p.is_file());
556                     wd = wd.same_file_system(self.same_file_system);
557                     if let Some(max_depth) = max_depth {
558                         wd = wd.max_depth(max_depth);
559                     }
560                     if let Some(ref sorter) = sorter {
561                         match sorter.clone() {
562                             Sorter::ByName(cmp) => {
563                                 wd = wd.sort_by(move |a, b| {
564                                     cmp(a.file_name(), b.file_name())
565                                 });
566                             }
567                             Sorter::ByPath(cmp) => {
568                                 wd = wd.sort_by(move |a, b| {
569                                     cmp(a.path(), b.path())
570                                 });
571                             }
572                         }
573                     }
574                     (p.to_path_buf(), Some(WalkEventIter::from(wd)))
575                 }
576             })
577             .collect::<Vec<_>>()
578             .into_iter();
579         let ig_root = self.ig_builder.build();
580         Walk {
581             its: its,
582             it: None,
583             ig_root: ig_root.clone(),
584             ig: ig_root.clone(),
585             max_filesize: self.max_filesize,
586             skip: self.skip.clone(),
587             filter: self.filter.clone(),
588         }
589     }
590 
591     /// Build a new `WalkParallel` iterator.
592     ///
593     /// Note that this *doesn't* return something that implements `Iterator`.
594     /// Instead, the returned value must be run with a closure. e.g.,
595     /// `builder.build_parallel().run(|| |path| println!("{:?}", path))`.
build_parallel(&self) -> WalkParallel596     pub fn build_parallel(&self) -> WalkParallel {
597         WalkParallel {
598             paths: self.paths.clone().into_iter(),
599             ig_root: self.ig_builder.build(),
600             max_depth: self.max_depth,
601             max_filesize: self.max_filesize,
602             follow_links: self.follow_links,
603             same_file_system: self.same_file_system,
604             threads: self.threads,
605             skip: self.skip.clone(),
606             filter: self.filter.clone(),
607         }
608     }
609 
610     /// Add a file path to the iterator.
611     ///
612     /// Each additional file path added is traversed recursively. This should
613     /// be preferred over building multiple `Walk` iterators since this
614     /// enables reusing resources across iteration.
add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder615     pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
616         self.paths.push(path.as_ref().to_path_buf());
617         self
618     }
619 
620     /// The maximum depth to recurse.
621     ///
622     /// The default, `None`, imposes no depth restriction.
max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder623     pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
624         self.max_depth = depth;
625         self
626     }
627 
628     /// Whether to follow symbolic links or not.
follow_links(&mut self, yes: bool) -> &mut WalkBuilder629     pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
630         self.follow_links = yes;
631         self
632     }
633 
634     /// Whether to ignore files above the specified limit.
max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder635     pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
636         self.max_filesize = filesize;
637         self
638     }
639 
640     /// The number of threads to use for traversal.
641     ///
642     /// Note that this only has an effect when using `build_parallel`.
643     ///
644     /// The default setting is `0`, which chooses the number of threads
645     /// automatically using heuristics.
threads(&mut self, n: usize) -> &mut WalkBuilder646     pub fn threads(&mut self, n: usize) -> &mut WalkBuilder {
647         self.threads = n;
648         self
649     }
650 
651     /// Add a global ignore file to the matcher.
652     ///
653     /// This has lower precedence than all other sources of ignore rules.
654     ///
655     /// If there was a problem adding the ignore file, then an error is
656     /// returned. Note that the error may indicate *partial* failure. For
657     /// example, if an ignore file contains an invalid glob, all other globs
658     /// are still applied.
add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error>659     pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
660         let mut builder = GitignoreBuilder::new("");
661         let mut errs = PartialErrorBuilder::default();
662         errs.maybe_push(builder.add(path));
663         match builder.build() {
664             Ok(gi) => {
665                 self.ig_builder.add_ignore(gi);
666             }
667             Err(err) => {
668                 errs.push(err);
669             }
670         }
671         errs.into_error_option()
672     }
673 
674     /// Add a custom ignore file name
675     ///
676     /// These ignore files have higher precedence than all other ignore files.
677     ///
678     /// When specifying multiple names, earlier names have lower precedence than
679     /// later names.
add_custom_ignore_filename<S: AsRef<OsStr>>( &mut self, file_name: S, ) -> &mut WalkBuilder680     pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
681         &mut self,
682         file_name: S,
683     ) -> &mut WalkBuilder {
684         self.ig_builder.add_custom_ignore_filename(file_name);
685         self
686     }
687 
688     /// Add an override matcher.
689     ///
690     /// By default, no override matcher is used.
691     ///
692     /// This overrides any previous setting.
overrides(&mut self, overrides: Override) -> &mut WalkBuilder693     pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
694         self.ig_builder.overrides(overrides);
695         self
696     }
697 
698     /// Add a file type matcher.
699     ///
700     /// By default, no file type matcher is used.
701     ///
702     /// This overrides any previous setting.
types(&mut self, types: Types) -> &mut WalkBuilder703     pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
704         self.ig_builder.types(types);
705         self
706     }
707 
708     /// Enables all the standard ignore filters.
709     ///
710     /// This toggles, as a group, all the filters that are enabled by default:
711     ///
712     /// - [hidden()](#method.hidden)
713     /// - [parents()](#method.parents)
714     /// - [ignore()](#method.ignore)
715     /// - [git_ignore()](#method.git_ignore)
716     /// - [git_global()](#method.git_global)
717     /// - [git_exclude()](#method.git_exclude)
718     ///
719     /// They may still be toggled individually after calling this function.
720     ///
721     /// This is (by definition) enabled by default.
standard_filters(&mut self, yes: bool) -> &mut WalkBuilder722     pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder {
723         self.hidden(yes)
724             .parents(yes)
725             .ignore(yes)
726             .git_ignore(yes)
727             .git_global(yes)
728             .git_exclude(yes)
729     }
730 
731     /// Enables ignoring hidden files.
732     ///
733     /// This is enabled by default.
hidden(&mut self, yes: bool) -> &mut WalkBuilder734     pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
735         self.ig_builder.hidden(yes);
736         self
737     }
738 
739     /// Enables reading ignore files from parent directories.
740     ///
741     /// If this is enabled, then .gitignore files in parent directories of each
742     /// file path given are respected. Otherwise, they are ignored.
743     ///
744     /// This is enabled by default.
parents(&mut self, yes: bool) -> &mut WalkBuilder745     pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
746         self.ig_builder.parents(yes);
747         self
748     }
749 
750     /// Enables reading `.ignore` files.
751     ///
752     /// `.ignore` files have the same semantics as `gitignore` files and are
753     /// supported by search tools such as ripgrep and The Silver Searcher.
754     ///
755     /// This is enabled by default.
ignore(&mut self, yes: bool) -> &mut WalkBuilder756     pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
757         self.ig_builder.ignore(yes);
758         self
759     }
760 
761     /// Enables reading a global gitignore file, whose path is specified in
762     /// git's `core.excludesFile` config option.
763     ///
764     /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
765     /// does not exist or does not specify `core.excludesFile`, then
766     /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
767     /// set or is empty, then `$HOME/.config/git/ignore` is used instead.
768     ///
769     /// This is enabled by default.
git_global(&mut self, yes: bool) -> &mut WalkBuilder770     pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
771         self.ig_builder.git_global(yes);
772         self
773     }
774 
775     /// Enables reading `.gitignore` files.
776     ///
777     /// `.gitignore` files have match semantics as described in the `gitignore`
778     /// man page.
779     ///
780     /// This is enabled by default.
git_ignore(&mut self, yes: bool) -> &mut WalkBuilder781     pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
782         self.ig_builder.git_ignore(yes);
783         self
784     }
785 
786     /// Enables reading `.git/info/exclude` files.
787     ///
788     /// `.git/info/exclude` files have match semantics as described in the
789     /// `gitignore` man page.
790     ///
791     /// This is enabled by default.
git_exclude(&mut self, yes: bool) -> &mut WalkBuilder792     pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
793         self.ig_builder.git_exclude(yes);
794         self
795     }
796 
797     /// Whether a git repository is required to apply git-related ignore
798     /// rules (global rules, .gitignore and local exclude rules).
799     ///
800     /// When disabled, git-related ignore rules are applied even when searching
801     /// outside a git repository.
require_git(&mut self, yes: bool) -> &mut WalkBuilder802     pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder {
803         self.ig_builder.require_git(yes);
804         self
805     }
806 
807     /// Process ignore files case insensitively
808     ///
809     /// This is disabled by default.
ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder810     pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder {
811         self.ig_builder.ignore_case_insensitive(yes);
812         self
813     }
814 
815     /// Set a function for sorting directory entries by their path.
816     ///
817     /// If a compare function is set, the resulting iterator will return all
818     /// paths in sorted order. The compare function will be called to compare
819     /// entries from the same directory.
820     ///
821     /// This is like `sort_by_file_name`, except the comparator accepts
822     /// a `&Path` instead of the base file name, which permits it to sort by
823     /// more criteria.
824     ///
825     /// This method will override any previous sorter set by this method or
826     /// by `sort_by_file_name`.
827     ///
828     /// Note that this is not used in the parallel iterator.
sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder where F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static,829     pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder
830     where
831         F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static,
832     {
833         self.sorter = Some(Sorter::ByPath(Arc::new(cmp)));
834         self
835     }
836 
837     /// Set a function for sorting directory entries by file name.
838     ///
839     /// If a compare function is set, the resulting iterator will return all
840     /// paths in sorted order. The compare function will be called to compare
841     /// names from entries from the same directory using only the name of the
842     /// entry.
843     ///
844     /// This method will override any previous sorter set by this method or
845     /// by `sort_by_file_path`.
846     ///
847     /// Note that this is not used in the parallel iterator.
sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder where F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static,848     pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder
849     where
850         F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static,
851     {
852         self.sorter = Some(Sorter::ByName(Arc::new(cmp)));
853         self
854     }
855 
856     /// Do not cross file system boundaries.
857     ///
858     /// When this option is enabled, directory traversal will not descend into
859     /// directories that are on a different file system from the root path.
860     ///
861     /// Currently, this option is only supported on Unix and Windows. If this
862     /// option is used on an unsupported platform, then directory traversal
863     /// will immediately return an error and will not yield any entries.
same_file_system(&mut self, yes: bool) -> &mut WalkBuilder864     pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder {
865         self.same_file_system = yes;
866         self
867     }
868 
869     /// Do not yield directory entries that are believed to correspond to
870     /// stdout.
871     ///
872     /// This is useful when a command is invoked via shell redirection to a
873     /// file that is also being read. For example, `grep -r foo ./ > results`
874     /// might end up trying to search `results` even though it is also writing
875     /// to it, which could cause an unbounded feedback loop. Setting this
876     /// option prevents this from happening by skipping over the `results`
877     /// file.
878     ///
879     /// This is disabled by default.
skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder880     pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder {
881         if yes {
882             self.skip = stdout_handle().map(Arc::new);
883         } else {
884             self.skip = None;
885         }
886         self
887     }
888 
889     /// Yields only entries which satisfy the given predicate and skips
890     /// descending into directories that do not satisfy the given predicate.
891     ///
892     /// The predicate is applied to all entries. If the predicate is
893     /// true, iteration carries on as normal. If the predicate is false, the
894     /// entry is ignored and if it is a directory, it is not descended into.
895     ///
896     /// Note that the errors for reading entries that may not satisfy the
897     /// predicate will still be yielded.
filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder where P: Fn(&DirEntry) -> bool + Send + Sync + 'static,898     pub fn filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder
899     where
900         P: Fn(&DirEntry) -> bool + Send + Sync + 'static,
901     {
902         self.filter = Some(Filter(Arc::new(filter)));
903         self
904     }
905 }
906 
907 /// Walk is a recursive directory iterator over file paths in one or more
908 /// directories.
909 ///
910 /// Only file and directory paths matching the rules are returned. By default,
911 /// ignore files like `.gitignore` are respected. The precise matching rules
912 /// and precedence is explained in the documentation for `WalkBuilder`.
913 pub struct Walk {
914     its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
915     it: Option<WalkEventIter>,
916     ig_root: Ignore,
917     ig: Ignore,
918     max_filesize: Option<u64>,
919     skip: Option<Arc<Handle>>,
920     filter: Option<Filter>,
921 }
922 
923 impl Walk {
924     /// Creates a new recursive directory iterator for the file path given.
925     ///
926     /// Note that this uses default settings, which include respecting
927     /// `.gitignore` files. To configure the iterator, use `WalkBuilder`
928     /// instead.
new<P: AsRef<Path>>(path: P) -> Walk929     pub fn new<P: AsRef<Path>>(path: P) -> Walk {
930         WalkBuilder::new(path).build()
931     }
932 
skip_entry(&self, ent: &DirEntry) -> Result<bool, Error>933     fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> {
934         if ent.depth() == 0 {
935             return Ok(false);
936         }
937 
938         if let Some(ref stdout) = self.skip {
939             if path_equals(ent, stdout)? {
940                 return Ok(true);
941             }
942         }
943         if should_skip_entry(&self.ig, ent) {
944             return Ok(true);
945         }
946         if self.max_filesize.is_some() && !ent.is_dir() {
947             return Ok(skip_filesize(
948                 self.max_filesize.unwrap(),
949                 ent.path(),
950                 &ent.metadata().ok(),
951             ));
952         }
953         if let Some(Filter(filter)) = &self.filter {
954             if !filter(ent) {
955                 return Ok(true);
956             }
957         }
958         Ok(false)
959     }
960 }
961 
962 impl Iterator for Walk {
963     type Item = Result<DirEntry, Error>;
964 
965     #[inline(always)]
next(&mut self) -> Option<Result<DirEntry, Error>>966     fn next(&mut self) -> Option<Result<DirEntry, Error>> {
967         loop {
968             let ev = match self.it.as_mut().and_then(|it| it.next()) {
969                 Some(ev) => ev,
970                 None => {
971                     match self.its.next() {
972                         None => return None,
973                         Some((_, None)) => {
974                             return Some(Ok(DirEntry::new_stdin()));
975                         }
976                         Some((path, Some(it))) => {
977                             self.it = Some(it);
978                             if path.is_dir() {
979                                 let (ig, err) = self.ig_root.add_parents(path);
980                                 self.ig = ig;
981                                 if let Some(err) = err {
982                                     return Some(Err(err));
983                                 }
984                             } else {
985                                 self.ig = self.ig_root.clone();
986                             }
987                         }
988                     }
989                     continue;
990                 }
991             };
992             match ev {
993                 Err(err) => {
994                     return Some(Err(Error::from_walkdir(err)));
995                 }
996                 Ok(WalkEvent::Exit) => {
997                     self.ig = self.ig.parent().unwrap();
998                 }
999                 Ok(WalkEvent::Dir(ent)) => {
1000                     let mut ent = DirEntry::new_walkdir(ent, None);
1001                     let should_skip = match self.skip_entry(&ent) {
1002                         Err(err) => return Some(Err(err)),
1003                         Ok(should_skip) => should_skip,
1004                     };
1005                     if should_skip {
1006                         self.it.as_mut().unwrap().it.skip_current_dir();
1007                         // Still need to push this on the stack because
1008                         // we'll get a WalkEvent::Exit event for this dir.
1009                         // We don't care if it errors though.
1010                         let (igtmp, _) = self.ig.add_child(ent.path());
1011                         self.ig = igtmp;
1012                         continue;
1013                     }
1014                     let (igtmp, err) = self.ig.add_child(ent.path());
1015                     self.ig = igtmp;
1016                     ent.err = err;
1017                     return Some(Ok(ent));
1018                 }
1019                 Ok(WalkEvent::File(ent)) => {
1020                     let ent = DirEntry::new_walkdir(ent, None);
1021                     let should_skip = match self.skip_entry(&ent) {
1022                         Err(err) => return Some(Err(err)),
1023                         Ok(should_skip) => should_skip,
1024                     };
1025                     if should_skip {
1026                         continue;
1027                     }
1028                     return Some(Ok(ent));
1029                 }
1030             }
1031         }
1032     }
1033 }
1034 
1035 /// WalkEventIter transforms a WalkDir iterator into an iterator that more
1036 /// accurately describes the directory tree. Namely, it emits events that are
1037 /// one of three types: directory, file or "exit." An "exit" event means that
1038 /// the entire contents of a directory have been enumerated.
1039 struct WalkEventIter {
1040     depth: usize,
1041     it: walkdir::IntoIter,
1042     next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
1043 }
1044 
1045 #[derive(Debug)]
1046 enum WalkEvent {
1047     Dir(walkdir::DirEntry),
1048     File(walkdir::DirEntry),
1049     Exit,
1050 }
1051 
1052 impl From<WalkDir> for WalkEventIter {
from(it: WalkDir) -> WalkEventIter1053     fn from(it: WalkDir) -> WalkEventIter {
1054         WalkEventIter { depth: 0, it: it.into_iter(), next: None }
1055     }
1056 }
1057 
1058 impl Iterator for WalkEventIter {
1059     type Item = walkdir::Result<WalkEvent>;
1060 
1061     #[inline(always)]
next(&mut self) -> Option<walkdir::Result<WalkEvent>>1062     fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
1063         let dent = self.next.take().or_else(|| self.it.next());
1064         let depth = match dent {
1065             None => 0,
1066             Some(Ok(ref dent)) => dent.depth(),
1067             Some(Err(ref err)) => err.depth(),
1068         };
1069         if depth < self.depth {
1070             self.depth -= 1;
1071             self.next = dent;
1072             return Some(Ok(WalkEvent::Exit));
1073         }
1074         self.depth = depth;
1075         match dent {
1076             None => None,
1077             Some(Err(err)) => Some(Err(err)),
1078             Some(Ok(dent)) => {
1079                 if walkdir_is_dir(&dent) {
1080                     self.depth += 1;
1081                     Some(Ok(WalkEvent::Dir(dent)))
1082                 } else {
1083                     Some(Ok(WalkEvent::File(dent)))
1084                 }
1085             }
1086         }
1087     }
1088 }
1089 
1090 /// WalkState is used in the parallel recursive directory iterator to indicate
1091 /// whether walking should continue as normal, skip descending into a
1092 /// particular directory or quit the walk entirely.
1093 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
1094 pub enum WalkState {
1095     /// Continue walking as normal.
1096     Continue,
1097     /// If the directory entry given is a directory, don't descend into it.
1098     /// In all other cases, this has no effect.
1099     Skip,
1100     /// Quit the entire iterator as soon as possible.
1101     ///
1102     /// Note that this is an inherently asynchronous action. It is possible
1103     /// for more entries to be yielded even after instructing the iterator
1104     /// to quit.
1105     Quit,
1106 }
1107 
1108 impl WalkState {
is_continue(&self) -> bool1109     fn is_continue(&self) -> bool {
1110         *self == WalkState::Continue
1111     }
1112 
is_quit(&self) -> bool1113     fn is_quit(&self) -> bool {
1114         *self == WalkState::Quit
1115     }
1116 }
1117 
1118 /// A builder for constructing a visitor when using
1119 /// [`WalkParallel::visit`](struct.WalkParallel.html#method.visit). The builder
1120 /// will be called for each thread started by `WalkParallel`. The visitor
1121 /// returned from each builder is then called for every directory entry.
1122 pub trait ParallelVisitorBuilder<'s> {
1123     /// Create per-thread `ParallelVisitor`s for `WalkParallel`.
build(&mut self) -> Box<dyn ParallelVisitor + 's>1124     fn build(&mut self) -> Box<dyn ParallelVisitor + 's>;
1125 }
1126 
1127 impl<'a, 's, P: ParallelVisitorBuilder<'s>> ParallelVisitorBuilder<'s>
1128     for &'a mut P
1129 {
build(&mut self) -> Box<dyn ParallelVisitor + 's>1130     fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1131         (**self).build()
1132     }
1133 }
1134 
1135 /// Receives files and directories for the current thread.
1136 ///
1137 /// Setup for the traversal can be implemented as part of
1138 /// [`ParallelVisitorBuilder::build`](trait.ParallelVisitorBuilder.html#tymethod.build).
1139 /// Teardown when traversal finishes can be implemented by implementing the
1140 /// `Drop` trait on your traversal type.
1141 pub trait ParallelVisitor: Send {
1142     /// Receives files and directories for the current thread. This is called
1143     /// once for every directory entry visited by traversal.
visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState1144     fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState;
1145 }
1146 
1147 struct FnBuilder<F> {
1148     builder: F,
1149 }
1150 
1151 impl<'s, F: FnMut() -> FnVisitor<'s>> ParallelVisitorBuilder<'s>
1152     for FnBuilder<F>
1153 {
build(&mut self) -> Box<dyn ParallelVisitor + 's>1154     fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1155         let visitor = (self.builder)();
1156         Box::new(FnVisitorImp { visitor })
1157     }
1158 }
1159 
1160 type FnVisitor<'s> =
1161     Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 's>;
1162 
1163 struct FnVisitorImp<'s> {
1164     visitor: FnVisitor<'s>,
1165 }
1166 
1167 impl<'s> ParallelVisitor for FnVisitorImp<'s> {
visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState1168     fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState {
1169         (self.visitor)(entry)
1170     }
1171 }
1172 
1173 /// WalkParallel is a parallel recursive directory iterator over files paths
1174 /// in one or more directories.
1175 ///
1176 /// Only file and directory paths matching the rules are returned. By default,
1177 /// ignore files like `.gitignore` are respected. The precise matching rules
1178 /// and precedence is explained in the documentation for `WalkBuilder`.
1179 ///
1180 /// Unlike `Walk`, this uses multiple threads for traversing a directory.
1181 pub struct WalkParallel {
1182     paths: vec::IntoIter<PathBuf>,
1183     ig_root: Ignore,
1184     max_filesize: Option<u64>,
1185     max_depth: Option<usize>,
1186     follow_links: bool,
1187     same_file_system: bool,
1188     threads: usize,
1189     skip: Option<Arc<Handle>>,
1190     filter: Option<Filter>,
1191 }
1192 
1193 impl WalkParallel {
1194     /// Execute the parallel recursive directory iterator. `mkf` is called
1195     /// for each thread used for iteration. The function produced by `mkf`
1196     /// is then in turn called for each visited file path.
run<'s, F>(self, mkf: F) where F: FnMut() -> FnVisitor<'s>,1197     pub fn run<'s, F>(self, mkf: F)
1198     where
1199         F: FnMut() -> FnVisitor<'s>,
1200     {
1201         self.visit(&mut FnBuilder { builder: mkf })
1202     }
1203 
1204     /// Execute the parallel recursive directory iterator using a custom
1205     /// visitor.
1206     ///
1207     /// The builder given is used to construct a visitor for every thread
1208     /// used by this traversal. The visitor returned from each builder is then
1209     /// called for every directory entry seen by that thread.
1210     ///
1211     /// Typically, creating a custom visitor is useful if you need to perform
1212     /// some kind of cleanup once traversal is finished. This can be achieved
1213     /// by implementing `Drop` for your builder (or for your visitor, if you
1214     /// want to execute cleanup for every thread that is launched).
1215     ///
1216     /// For example, each visitor might build up a data structure of results
1217     /// corresponding to the directory entries seen for each thread. Since each
1218     /// visitor runs on only one thread, this build-up can be done without
1219     /// synchronization. Then, once traversal is complete, all of the results
1220     /// can be merged together into a single data structure.
visit(mut self, builder: &mut dyn ParallelVisitorBuilder)1221     pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder) {
1222         let threads = self.threads();
1223         let stack = Arc::new(Mutex::new(vec![]));
1224         {
1225             let mut stack = stack.lock().unwrap();
1226             let mut visitor = builder.build();
1227             let mut paths = Vec::new().into_iter();
1228             std::mem::swap(&mut paths, &mut self.paths);
1229             // Send the initial set of root paths to the pool of workers. Note
1230             // that we only send directories. For files, we send to them the
1231             // callback directly.
1232             for path in paths {
1233                 let (dent, root_device) = if path == Path::new("-") {
1234                     (DirEntry::new_stdin(), None)
1235                 } else {
1236                     let root_device = if !self.same_file_system {
1237                         None
1238                     } else {
1239                         match device_num(&path) {
1240                             Ok(root_device) => Some(root_device),
1241                             Err(err) => {
1242                                 let err = Error::Io(err).with_path(path);
1243                                 if visitor.visit(Err(err)).is_quit() {
1244                                     return;
1245                                 }
1246                                 continue;
1247                             }
1248                         }
1249                     };
1250                     match DirEntryRaw::from_path(0, path, false) {
1251                         Ok(dent) => {
1252                             (DirEntry::new_raw(dent, None), root_device)
1253                         }
1254                         Err(err) => {
1255                             if visitor.visit(Err(err)).is_quit() {
1256                                 return;
1257                             }
1258                             continue;
1259                         }
1260                     }
1261                 };
1262                 stack.push(Message::Work(Work {
1263                     dent: dent,
1264                     ignore: self.ig_root.clone(),
1265                     root_device: root_device,
1266                 }));
1267             }
1268             // ... but there's no need to start workers if we don't need them.
1269             if stack.is_empty() {
1270                 return;
1271             }
1272         }
1273         // Create the workers and then wait for them to finish.
1274         let quit_now = Arc::new(AtomicBool::new(false));
1275         let num_pending =
1276             Arc::new(AtomicUsize::new(stack.lock().unwrap().len()));
1277         crossbeam_utils::thread::scope(|s| {
1278             let mut handles = vec![];
1279             for _ in 0..threads {
1280                 let worker = Worker {
1281                     visitor: builder.build(),
1282                     stack: stack.clone(),
1283                     quit_now: quit_now.clone(),
1284                     num_pending: num_pending.clone(),
1285                     max_depth: self.max_depth,
1286                     max_filesize: self.max_filesize,
1287                     follow_links: self.follow_links,
1288                     skip: self.skip.clone(),
1289                     filter: self.filter.clone(),
1290                 };
1291                 handles.push(s.spawn(|_| worker.run()));
1292             }
1293             for handle in handles {
1294                 handle.join().unwrap();
1295             }
1296         })
1297         .unwrap(); // Pass along panics from threads
1298     }
1299 
threads(&self) -> usize1300     fn threads(&self) -> usize {
1301         if self.threads == 0 {
1302             2
1303         } else {
1304             self.threads
1305         }
1306     }
1307 }
1308 
1309 /// Message is the set of instructions that a worker knows how to process.
1310 enum Message {
1311     /// A work item corresponds to a directory that should be descended into.
1312     /// Work items for entries that should be skipped or ignored should not
1313     /// be produced.
1314     Work(Work),
1315     /// This instruction indicates that the worker should quit.
1316     Quit,
1317 }
1318 
1319 /// A unit of work for each worker to process.
1320 ///
1321 /// Each unit of work corresponds to a directory that should be descended
1322 /// into.
1323 struct Work {
1324     /// The directory entry.
1325     dent: DirEntry,
1326     /// Any ignore matchers that have been built for this directory's parents.
1327     ignore: Ignore,
1328     /// The root device number. When present, only files with the same device
1329     /// number should be considered.
1330     root_device: Option<u64>,
1331 }
1332 
1333 impl Work {
1334     /// Returns true if and only if this work item is a directory.
is_dir(&self) -> bool1335     fn is_dir(&self) -> bool {
1336         self.dent.is_dir()
1337     }
1338 
1339     /// Returns true if and only if this work item is a symlink.
is_symlink(&self) -> bool1340     fn is_symlink(&self) -> bool {
1341         self.dent.file_type().map_or(false, |ft| ft.is_symlink())
1342     }
1343 
1344     /// Adds ignore rules for parent directories.
1345     ///
1346     /// Note that this only applies to entries at depth 0. On all other
1347     /// entries, this is a no-op.
add_parents(&mut self) -> Option<Error>1348     fn add_parents(&mut self) -> Option<Error> {
1349         if self.dent.depth() > 0 {
1350             return None;
1351         }
1352         // At depth 0, the path of this entry is a root path, so we can
1353         // use it directly to add parent ignore rules.
1354         let (ig, err) = self.ignore.add_parents(self.dent.path());
1355         self.ignore = ig;
1356         err
1357     }
1358 
1359     /// Reads the directory contents of this work item and adds ignore
1360     /// rules for this directory.
1361     ///
1362     /// If there was a problem with reading the directory contents, then
1363     /// an error is returned. If there was a problem reading the ignore
1364     /// rules for this directory, then the error is attached to this
1365     /// work item's directory entry.
read_dir(&mut self) -> Result<fs::ReadDir, Error>1366     fn read_dir(&mut self) -> Result<fs::ReadDir, Error> {
1367         let readdir = match fs::read_dir(self.dent.path()) {
1368             Ok(readdir) => readdir,
1369             Err(err) => {
1370                 let err = Error::from(err)
1371                     .with_path(self.dent.path())
1372                     .with_depth(self.dent.depth());
1373                 return Err(err);
1374             }
1375         };
1376         let (ig, err) = self.ignore.add_child(self.dent.path());
1377         self.ignore = ig;
1378         self.dent.err = err;
1379         Ok(readdir)
1380     }
1381 }
1382 
1383 /// A worker is responsible for descending into directories, updating the
1384 /// ignore matchers, producing new work and invoking the caller's callback.
1385 ///
1386 /// Note that a worker is *both* a producer and a consumer.
1387 struct Worker<'s> {
1388     /// The caller's callback.
1389     visitor: Box<dyn ParallelVisitor + 's>,
1390     /// A stack of work to do.
1391     ///
1392     /// We use a stack instead of a channel because a stack lets us visit
1393     /// directories in depth first order. This can substantially reduce peak
1394     /// memory usage by keeping both the number of files path and gitignore
1395     /// matchers in memory lower.
1396     stack: Arc<Mutex<Vec<Message>>>,
1397     /// Whether all workers should terminate at the next opportunity. Note
1398     /// that we need this because we don't want other `Work` to be done after
1399     /// we quit. We wouldn't need this if have a priority channel.
1400     quit_now: Arc<AtomicBool>,
1401     /// The number of outstanding work items.
1402     num_pending: Arc<AtomicUsize>,
1403     /// The maximum depth of directories to descend. A value of `0` means no
1404     /// descension at all.
1405     max_depth: Option<usize>,
1406     /// The maximum size a searched file can be (in bytes). If a file exceeds
1407     /// this size it will be skipped.
1408     max_filesize: Option<u64>,
1409     /// Whether to follow symbolic links or not. When this is enabled, loop
1410     /// detection is performed.
1411     follow_links: bool,
1412     /// A file handle to skip, currently is either `None` or stdout, if it's
1413     /// a file and it has been requested to skip files identical to stdout.
1414     skip: Option<Arc<Handle>>,
1415     /// A predicate applied to dir entries. If true, the entry and all
1416     /// children will be skipped.
1417     filter: Option<Filter>,
1418 }
1419 
1420 impl<'s> Worker<'s> {
1421     /// Runs this worker until there is no more work left to do.
1422     ///
1423     /// The worker will call the caller's callback for all entries that aren't
1424     /// skipped by the ignore matcher.
run(mut self)1425     fn run(mut self) {
1426         while let Some(work) = self.get_work() {
1427             if let WalkState::Quit = self.run_one(work) {
1428                 self.quit_now();
1429             }
1430             self.work_done();
1431         }
1432     }
1433 
run_one(&mut self, mut work: Work) -> WalkState1434     fn run_one(&mut self, mut work: Work) -> WalkState {
1435         // If the work is not a directory, then we can just execute the
1436         // caller's callback immediately and move on.
1437         if work.is_symlink() || !work.is_dir() {
1438             return self.visitor.visit(Ok(work.dent));
1439         }
1440         if let Some(err) = work.add_parents() {
1441             let state = self.visitor.visit(Err(err));
1442             if state.is_quit() {
1443                 return state;
1444             }
1445         }
1446 
1447         let descend = if let Some(root_device) = work.root_device {
1448             match is_same_file_system(root_device, work.dent.path()) {
1449                 Ok(true) => true,
1450                 Ok(false) => false,
1451                 Err(err) => {
1452                     let state = self.visitor.visit(Err(err));
1453                     if state.is_quit() {
1454                         return state;
1455                     }
1456                     false
1457                 }
1458             }
1459         } else {
1460             true
1461         };
1462 
1463         // Try to read the directory first before we transfer ownership
1464         // to the provided closure. Do not unwrap it immediately, though,
1465         // as we may receive an `Err` value e.g. in the case when we do not
1466         // have sufficient read permissions to list the directory.
1467         // In that case we still want to provide the closure with a valid
1468         // entry before passing the error value.
1469         let readdir = work.read_dir();
1470         let depth = work.dent.depth();
1471         let state = self.visitor.visit(Ok(work.dent));
1472         if !state.is_continue() {
1473             return state;
1474         }
1475         if !descend {
1476             return WalkState::Skip;
1477         }
1478 
1479         let readdir = match readdir {
1480             Ok(readdir) => readdir,
1481             Err(err) => {
1482                 return self.visitor.visit(Err(err));
1483             }
1484         };
1485 
1486         if self.max_depth.map_or(false, |max| depth >= max) {
1487             return WalkState::Skip;
1488         }
1489         for result in readdir {
1490             let state = self.generate_work(
1491                 &work.ignore,
1492                 depth + 1,
1493                 work.root_device,
1494                 result,
1495             );
1496             if state.is_quit() {
1497                 return state;
1498             }
1499         }
1500         WalkState::Continue
1501     }
1502 
1503     /// Decides whether to submit the given directory entry as a file to
1504     /// search.
1505     ///
1506     /// If the entry is a path that should be ignored, then this is a no-op.
1507     /// Otherwise, the entry is pushed on to the queue. (The actual execution
1508     /// of the callback happens in `run_one`.)
1509     ///
1510     /// If an error occurs while reading the entry, then it is sent to the
1511     /// caller's callback.
1512     ///
1513     /// `ig` is the `Ignore` matcher for the parent directory. `depth` should
1514     /// be the depth of this entry. `result` should be the item yielded by
1515     /// a directory iterator.
generate_work( &mut self, ig: &Ignore, depth: usize, root_device: Option<u64>, result: Result<fs::DirEntry, io::Error>, ) -> WalkState1516     fn generate_work(
1517         &mut self,
1518         ig: &Ignore,
1519         depth: usize,
1520         root_device: Option<u64>,
1521         result: Result<fs::DirEntry, io::Error>,
1522     ) -> WalkState {
1523         let fs_dent = match result {
1524             Ok(fs_dent) => fs_dent,
1525             Err(err) => {
1526                 return self
1527                     .visitor
1528                     .visit(Err(Error::from(err).with_depth(depth)));
1529             }
1530         };
1531         let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) {
1532             Ok(dent) => DirEntry::new_raw(dent, None),
1533             Err(err) => {
1534                 return self.visitor.visit(Err(err));
1535             }
1536         };
1537         let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink());
1538         if self.follow_links && is_symlink {
1539             let path = dent.path().to_path_buf();
1540             dent = match DirEntryRaw::from_path(depth, path, true) {
1541                 Ok(dent) => DirEntry::new_raw(dent, None),
1542                 Err(err) => {
1543                     return self.visitor.visit(Err(err));
1544                 }
1545             };
1546             if dent.is_dir() {
1547                 if let Err(err) = check_symlink_loop(ig, dent.path(), depth) {
1548                     return self.visitor.visit(Err(err));
1549                 }
1550             }
1551         }
1552         if let Some(ref stdout) = self.skip {
1553             let is_stdout = match path_equals(&dent, stdout) {
1554                 Ok(is_stdout) => is_stdout,
1555                 Err(err) => return self.visitor.visit(Err(err)),
1556             };
1557             if is_stdout {
1558                 return WalkState::Continue;
1559             }
1560         }
1561         let should_skip_path = should_skip_entry(ig, &dent);
1562         let should_skip_filesize =
1563             if self.max_filesize.is_some() && !dent.is_dir() {
1564                 skip_filesize(
1565                     self.max_filesize.unwrap(),
1566                     dent.path(),
1567                     &dent.metadata().ok(),
1568                 )
1569             } else {
1570                 false
1571             };
1572         let should_skip_filtered =
1573             if let Some(Filter(predicate)) = &self.filter {
1574                 !predicate(&dent)
1575             } else {
1576                 false
1577             };
1578         if !should_skip_path && !should_skip_filesize && !should_skip_filtered
1579         {
1580             self.send(Work { dent, ignore: ig.clone(), root_device });
1581         }
1582         WalkState::Continue
1583     }
1584 
1585     /// Returns the next directory to descend into.
1586     ///
1587     /// If all work has been exhausted, then this returns None. The worker
1588     /// should then subsequently quit.
get_work(&mut self) -> Option<Work>1589     fn get_work(&mut self) -> Option<Work> {
1590         let mut value = self.recv();
1591         loop {
1592             // Simulate a priority channel: If quit_now flag is set, we can
1593             // receive only quit messages.
1594             if self.is_quit_now() {
1595                 value = Some(Message::Quit)
1596             }
1597             match value {
1598                 Some(Message::Work(work)) => {
1599                     return Some(work);
1600                 }
1601                 Some(Message::Quit) => {
1602                     // Repeat quit message to wake up sleeping threads, if
1603                     // any. The domino effect will ensure that every thread
1604                     // will quit.
1605                     self.send_quit();
1606                     return None;
1607                 }
1608                 None => {
1609                     // Once num_pending reaches 0, it is impossible for it to
1610                     // ever increase again. Namely, it only reaches 0 once
1611                     // all jobs have run such that no jobs have produced more
1612                     // work. We have this guarantee because num_pending is
1613                     // always incremented before each job is submitted and only
1614                     // decremented once each job is completely finished.
1615                     // Therefore, if this reaches zero, then there can be no
1616                     // other job running.
1617                     if self.num_pending() == 0 {
1618                         // Every other thread is blocked at the next recv().
1619                         // Send the initial quit message and quit.
1620                         self.send_quit();
1621                         return None;
1622                     }
1623                     // Wait for next `Work` or `Quit` message.
1624                     loop {
1625                         if let Some(v) = self.recv() {
1626                             value = Some(v);
1627                             break;
1628                         }
1629                         // Our stack isn't blocking. Instead of burning the
1630                         // CPU waiting, we let the thread sleep for a bit. In
1631                         // general, this tends to only occur once the search is
1632                         // approaching termination.
1633                         thread::sleep(Duration::from_millis(1));
1634                     }
1635                 }
1636             }
1637         }
1638     }
1639 
1640     /// Indicates that all workers should quit immediately.
quit_now(&self)1641     fn quit_now(&self) {
1642         self.quit_now.store(true, Ordering::SeqCst);
1643     }
1644 
1645     /// Returns true if this worker should quit immediately.
is_quit_now(&self) -> bool1646     fn is_quit_now(&self) -> bool {
1647         self.quit_now.load(Ordering::SeqCst)
1648     }
1649 
1650     /// Returns the number of pending jobs.
num_pending(&self) -> usize1651     fn num_pending(&self) -> usize {
1652         self.num_pending.load(Ordering::SeqCst)
1653     }
1654 
1655     /// Send work.
send(&self, work: Work)1656     fn send(&self, work: Work) {
1657         self.num_pending.fetch_add(1, Ordering::SeqCst);
1658         let mut stack = self.stack.lock().unwrap();
1659         stack.push(Message::Work(work));
1660     }
1661 
1662     /// Send a quit message.
send_quit(&self)1663     fn send_quit(&self) {
1664         let mut stack = self.stack.lock().unwrap();
1665         stack.push(Message::Quit);
1666     }
1667 
1668     /// Receive work.
recv(&self) -> Option<Message>1669     fn recv(&self) -> Option<Message> {
1670         let mut stack = self.stack.lock().unwrap();
1671         stack.pop()
1672     }
1673 
1674     /// Signal that work has been received.
work_done(&self)1675     fn work_done(&self) {
1676         self.num_pending.fetch_sub(1, Ordering::SeqCst);
1677     }
1678 }
1679 
check_symlink_loop( ig_parent: &Ignore, child_path: &Path, child_depth: usize, ) -> Result<(), Error>1680 fn check_symlink_loop(
1681     ig_parent: &Ignore,
1682     child_path: &Path,
1683     child_depth: usize,
1684 ) -> Result<(), Error> {
1685     let hchild = Handle::from_path(child_path).map_err(|err| {
1686         Error::from(err).with_path(child_path).with_depth(child_depth)
1687     })?;
1688     for ig in ig_parent.parents().take_while(|ig| !ig.is_absolute_parent()) {
1689         let h = Handle::from_path(ig.path()).map_err(|err| {
1690             Error::from(err).with_path(child_path).with_depth(child_depth)
1691         })?;
1692         if hchild == h {
1693             return Err(Error::Loop {
1694                 ancestor: ig.path().to_path_buf(),
1695                 child: child_path.to_path_buf(),
1696             }
1697             .with_depth(child_depth));
1698         }
1699     }
1700     Ok(())
1701 }
1702 
1703 // Before calling this function, make sure that you ensure that is really
1704 // necessary as the arguments imply a file stat.
skip_filesize( max_filesize: u64, path: &Path, ent: &Option<Metadata>, ) -> bool1705 fn skip_filesize(
1706     max_filesize: u64,
1707     path: &Path,
1708     ent: &Option<Metadata>,
1709 ) -> bool {
1710     let filesize = match *ent {
1711         Some(ref md) => Some(md.len()),
1712         None => None,
1713     };
1714 
1715     if let Some(fs) = filesize {
1716         if fs > max_filesize {
1717             debug!("ignoring {}: {} bytes", path.display(), fs);
1718             true
1719         } else {
1720             false
1721         }
1722     } else {
1723         false
1724     }
1725 }
1726 
should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool1727 fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool {
1728     let m = ig.matched_dir_entry(dent);
1729     if m.is_ignore() {
1730         debug!("ignoring {}: {:?}", dent.path().display(), m);
1731         true
1732     } else if m.is_whitelist() {
1733         debug!("whitelisting {}: {:?}", dent.path().display(), m);
1734         false
1735     } else {
1736         false
1737     }
1738 }
1739 
1740 /// Returns a handle to stdout for filtering search.
1741 ///
1742 /// A handle is returned if and only if stdout is being redirected to a file.
1743 /// The handle returned corresponds to that file.
1744 ///
1745 /// This can be used to ensure that we do not attempt to search a file that we
1746 /// may also be writing to.
stdout_handle() -> Option<Handle>1747 fn stdout_handle() -> Option<Handle> {
1748     let h = match Handle::stdout() {
1749         Err(_) => return None,
1750         Ok(h) => h,
1751     };
1752     let md = match h.as_file().metadata() {
1753         Err(_) => return None,
1754         Ok(md) => md,
1755     };
1756     if !md.is_file() {
1757         return None;
1758     }
1759     Some(h)
1760 }
1761 
1762 /// Returns true if and only if the given directory entry is believed to be
1763 /// equivalent to the given handle. If there was a problem querying the path
1764 /// for information to determine equality, then that error is returned.
path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error>1765 fn path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error> {
1766     #[cfg(unix)]
1767     fn never_equal(dent: &DirEntry, handle: &Handle) -> bool {
1768         dent.ino() != Some(handle.ino())
1769     }
1770 
1771     #[cfg(not(unix))]
1772     fn never_equal(_: &DirEntry, _: &Handle) -> bool {
1773         false
1774     }
1775 
1776     // If we know for sure that these two things aren't equal, then avoid
1777     // the costly extra stat call to determine equality.
1778     if dent.is_stdin() || never_equal(dent, handle) {
1779         return Ok(false);
1780     }
1781     Handle::from_path(dent.path())
1782         .map(|h| &h == handle)
1783         .map_err(|err| Error::Io(err).with_path(dent.path()))
1784 }
1785 
1786 /// Returns true if the given walkdir entry corresponds to a directory.
1787 ///
1788 /// This is normally just `dent.file_type().is_dir()`, but when we aren't
1789 /// following symlinks, the root directory entry may be a symlink to a
1790 /// directory that we *do* follow---by virtue of it being specified by the user
1791 /// explicitly. In that case, we need to follow the symlink and query whether
1792 /// it's a directory or not. But we only do this for root entries to avoid an
1793 /// additional stat check in most cases.
walkdir_is_dir(dent: &walkdir::DirEntry) -> bool1794 fn walkdir_is_dir(dent: &walkdir::DirEntry) -> bool {
1795     if dent.file_type().is_dir() {
1796         return true;
1797     }
1798     if !dent.file_type().is_symlink() || dent.depth() > 0 {
1799         return false;
1800     }
1801     dent.path().metadata().ok().map_or(false, |md| md.file_type().is_dir())
1802 }
1803 
1804 /// Returns true if and only if the given path is on the same device as the
1805 /// given root device.
is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error>1806 fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
1807     let dent_device =
1808         device_num(path).map_err(|err| Error::Io(err).with_path(path))?;
1809     Ok(root_device == dent_device)
1810 }
1811 
1812 #[cfg(unix)]
device_num<P: AsRef<Path>>(path: P) -> io::Result<u64>1813 fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1814     use std::os::unix::fs::MetadataExt;
1815 
1816     path.as_ref().metadata().map(|md| md.dev())
1817 }
1818 
1819 #[cfg(windows)]
device_num<P: AsRef<Path>>(path: P) -> io::Result<u64>1820 fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1821     use winapi_util::{file, Handle};
1822 
1823     let h = Handle::from_path_any(path)?;
1824     file::information(h).map(|info| info.volume_serial_number())
1825 }
1826 
1827 #[cfg(not(any(unix, windows)))]
device_num<P: AsRef<Path>>(_: P) -> io::Result<u64>1828 fn device_num<P: AsRef<Path>>(_: P) -> io::Result<u64> {
1829     Err(io::Error::new(
1830         io::ErrorKind::Other,
1831         "walkdir: same_file_system option not supported on this platform",
1832     ))
1833 }
1834 
1835 #[cfg(test)]
1836 mod tests {
1837     use std::ffi::OsStr;
1838     use std::fs::{self, File};
1839     use std::io::Write;
1840     use std::path::Path;
1841     use std::sync::{Arc, Mutex};
1842 
1843     use super::{DirEntry, WalkBuilder, WalkState};
1844     use tests::TempDir;
1845 
wfile<P: AsRef<Path>>(path: P, contents: &str)1846     fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
1847         let mut file = File::create(path).unwrap();
1848         file.write_all(contents.as_bytes()).unwrap();
1849     }
1850 
wfile_size<P: AsRef<Path>>(path: P, size: u64)1851     fn wfile_size<P: AsRef<Path>>(path: P, size: u64) {
1852         let file = File::create(path).unwrap();
1853         file.set_len(size).unwrap();
1854     }
1855 
1856     #[cfg(unix)]
symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q)1857     fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) {
1858         use std::os::unix::fs::symlink;
1859         symlink(src, dst).unwrap();
1860     }
1861 
mkdirp<P: AsRef<Path>>(path: P)1862     fn mkdirp<P: AsRef<Path>>(path: P) {
1863         fs::create_dir_all(path).unwrap();
1864     }
1865 
normal_path(unix: &str) -> String1866     fn normal_path(unix: &str) -> String {
1867         if cfg!(windows) {
1868             unix.replace("\\", "/")
1869         } else {
1870             unix.to_string()
1871         }
1872     }
1873 
1874     fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
1875         let mut paths = vec![];
1876         for result in builder.build() {
1877             let dent = match result {
1878                 Err(_) => continue,
1879                 Ok(dent) => dent,
1880             };
1881             let path = dent.path().strip_prefix(prefix).unwrap();
1882             if path.as_os_str().is_empty() {
1883                 continue;
1884             }
1885             paths.push(normal_path(path.to_str().unwrap()));
1886         }
1887         paths.sort();
1888         paths
1889     }
1890 
1891     fn walk_collect_parallel(
1892         prefix: &Path,
1893         builder: &WalkBuilder,
1894     ) -> Vec<String> {
1895         let mut paths = vec![];
1896         for dent in walk_collect_entries_parallel(builder) {
1897             let path = dent.path().strip_prefix(prefix).unwrap();
1898             if path.as_os_str().is_empty() {
1899                 continue;
1900             }
1901             paths.push(normal_path(path.to_str().unwrap()));
1902         }
1903         paths.sort();
1904         paths
1905     }
1906 
1907     fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec<DirEntry> {
1908         let dents = Arc::new(Mutex::new(vec![]));
1909         builder.build_parallel().run(|| {
1910             let dents = dents.clone();
1911             Box::new(move |result| {
1912                 if let Ok(dent) = result {
1913                     dents.lock().unwrap().push(dent);
1914                 }
1915                 WalkState::Continue
1916             })
1917         });
1918 
1919         let dents = dents.lock().unwrap();
1920         dents.to_vec()
1921     }
1922 
1923     fn mkpaths(paths: &[&str]) -> Vec<String> {
1924         let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect();
1925         paths.sort();
1926         paths
1927     }
1928 
1929     fn tmpdir() -> TempDir {
1930         TempDir::new().unwrap()
1931     }
1932 
1933     fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) {
1934         let got = walk_collect(prefix, builder);
1935         assert_eq!(got, mkpaths(expected), "single threaded");
1936         let got = walk_collect_parallel(prefix, builder);
1937         assert_eq!(got, mkpaths(expected), "parallel");
1938     }
1939 
1940     #[test]
1941     fn no_ignores() {
1942         let td = tmpdir();
1943         mkdirp(td.path().join("a/b/c"));
1944         mkdirp(td.path().join("x/y"));
1945         wfile(td.path().join("a/b/foo"), "");
1946         wfile(td.path().join("x/y/foo"), "");
1947 
1948         assert_paths(
1949             td.path(),
1950             &WalkBuilder::new(td.path()),
1951             &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
1952         );
1953     }
1954 
1955     #[test]
1956     fn custom_ignore() {
1957         let td = tmpdir();
1958         let custom_ignore = ".customignore";
1959         mkdirp(td.path().join("a"));
1960         wfile(td.path().join(custom_ignore), "foo");
1961         wfile(td.path().join("foo"), "");
1962         wfile(td.path().join("a/foo"), "");
1963         wfile(td.path().join("bar"), "");
1964         wfile(td.path().join("a/bar"), "");
1965 
1966         let mut builder = WalkBuilder::new(td.path());
1967         builder.add_custom_ignore_filename(&custom_ignore);
1968         assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
1969     }
1970 
1971     #[test]
1972     fn custom_ignore_exclusive_use() {
1973         let td = tmpdir();
1974         let custom_ignore = ".customignore";
1975         mkdirp(td.path().join("a"));
1976         wfile(td.path().join(custom_ignore), "foo");
1977         wfile(td.path().join("foo"), "");
1978         wfile(td.path().join("a/foo"), "");
1979         wfile(td.path().join("bar"), "");
1980         wfile(td.path().join("a/bar"), "");
1981 
1982         let mut builder = WalkBuilder::new(td.path());
1983         builder.ignore(false);
1984         builder.git_ignore(false);
1985         builder.git_global(false);
1986         builder.git_exclude(false);
1987         builder.add_custom_ignore_filename(&custom_ignore);
1988         assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
1989     }
1990 
1991     #[test]
1992     fn gitignore() {
1993         let td = tmpdir();
1994         mkdirp(td.path().join(".git"));
1995         mkdirp(td.path().join("a"));
1996         wfile(td.path().join(".gitignore"), "foo");
1997         wfile(td.path().join("foo"), "");
1998         wfile(td.path().join("a/foo"), "");
1999         wfile(td.path().join("bar"), "");
2000         wfile(td.path().join("a/bar"), "");
2001 
2002         assert_paths(
2003             td.path(),
2004             &WalkBuilder::new(td.path()),
2005             &["bar", "a", "a/bar"],
2006         );
2007     }
2008 
2009     #[test]
2010     fn explicit_ignore() {
2011         let td = tmpdir();
2012         let igpath = td.path().join(".not-an-ignore");
2013         mkdirp(td.path().join("a"));
2014         wfile(&igpath, "foo");
2015         wfile(td.path().join("foo"), "");
2016         wfile(td.path().join("a/foo"), "");
2017         wfile(td.path().join("bar"), "");
2018         wfile(td.path().join("a/bar"), "");
2019 
2020         let mut builder = WalkBuilder::new(td.path());
2021         assert!(builder.add_ignore(&igpath).is_none());
2022         assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2023     }
2024 
2025     #[test]
2026     fn explicit_ignore_exclusive_use() {
2027         let td = tmpdir();
2028         let igpath = td.path().join(".not-an-ignore");
2029         mkdirp(td.path().join("a"));
2030         wfile(&igpath, "foo");
2031         wfile(td.path().join("foo"), "");
2032         wfile(td.path().join("a/foo"), "");
2033         wfile(td.path().join("bar"), "");
2034         wfile(td.path().join("a/bar"), "");
2035 
2036         let mut builder = WalkBuilder::new(td.path());
2037         builder.standard_filters(false);
2038         assert!(builder.add_ignore(&igpath).is_none());
2039         assert_paths(
2040             td.path(),
2041             &builder,
2042             &[".not-an-ignore", "bar", "a", "a/bar"],
2043         );
2044     }
2045 
2046     #[test]
2047     fn gitignore_parent() {
2048         let td = tmpdir();
2049         mkdirp(td.path().join(".git"));
2050         mkdirp(td.path().join("a"));
2051         wfile(td.path().join(".gitignore"), "foo");
2052         wfile(td.path().join("a/foo"), "");
2053         wfile(td.path().join("a/bar"), "");
2054 
2055         let root = td.path().join("a");
2056         assert_paths(&root, &WalkBuilder::new(&root), &["bar"]);
2057     }
2058 
2059     #[test]
2060     fn max_depth() {
2061         let td = tmpdir();
2062         mkdirp(td.path().join("a/b/c"));
2063         wfile(td.path().join("foo"), "");
2064         wfile(td.path().join("a/foo"), "");
2065         wfile(td.path().join("a/b/foo"), "");
2066         wfile(td.path().join("a/b/c/foo"), "");
2067 
2068         let mut builder = WalkBuilder::new(td.path());
2069         assert_paths(
2070             td.path(),
2071             &builder,
2072             &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
2073         );
2074         assert_paths(td.path(), builder.max_depth(Some(0)), &[]);
2075         assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]);
2076         assert_paths(
2077             td.path(),
2078             builder.max_depth(Some(2)),
2079             &["a", "a/b", "foo", "a/foo"],
2080         );
2081     }
2082 
2083     #[test]
2084     fn max_filesize() {
2085         let td = tmpdir();
2086         mkdirp(td.path().join("a/b"));
2087         wfile_size(td.path().join("foo"), 0);
2088         wfile_size(td.path().join("bar"), 400);
2089         wfile_size(td.path().join("baz"), 600);
2090         wfile_size(td.path().join("a/foo"), 600);
2091         wfile_size(td.path().join("a/bar"), 500);
2092         wfile_size(td.path().join("a/baz"), 200);
2093 
2094         let mut builder = WalkBuilder::new(td.path());
2095         assert_paths(
2096             td.path(),
2097             &builder,
2098             &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2099         );
2100         assert_paths(
2101             td.path(),
2102             builder.max_filesize(Some(0)),
2103             &["a", "a/b", "foo"],
2104         );
2105         assert_paths(
2106             td.path(),
2107             builder.max_filesize(Some(500)),
2108             &["a", "a/b", "foo", "bar", "a/bar", "a/baz"],
2109         );
2110         assert_paths(
2111             td.path(),
2112             builder.max_filesize(Some(50000)),
2113             &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2114         );
2115     }
2116 
2117     #[cfg(unix)] // because symlinks on windows are weird
2118     #[test]
2119     fn symlinks() {
2120         let td = tmpdir();
2121         mkdirp(td.path().join("a/b"));
2122         symlink(td.path().join("a/b"), td.path().join("z"));
2123         wfile(td.path().join("a/b/foo"), "");
2124 
2125         let mut builder = WalkBuilder::new(td.path());
2126         assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]);
2127         assert_paths(
2128             td.path(),
2129             &builder.follow_links(true),
2130             &["a", "a/b", "a/b/foo", "z", "z/foo"],
2131         );
2132     }
2133 
2134     #[cfg(unix)] // because symlinks on windows are weird
2135     #[test]
2136     fn first_path_not_symlink() {
2137         let td = tmpdir();
2138         mkdirp(td.path().join("foo"));
2139 
2140         let dents = WalkBuilder::new(td.path().join("foo"))
2141             .build()
2142             .into_iter()
2143             .collect::<Result<Vec<_>, _>>()
2144             .unwrap();
2145         assert_eq!(1, dents.len());
2146         assert!(!dents[0].path_is_symlink());
2147 
2148         let dents = walk_collect_entries_parallel(&WalkBuilder::new(
2149             td.path().join("foo"),
2150         ));
2151         assert_eq!(1, dents.len());
2152         assert!(!dents[0].path_is_symlink());
2153     }
2154 
2155     #[cfg(unix)] // because symlinks on windows are weird
2156     #[test]
2157     fn symlink_loop() {
2158         let td = tmpdir();
2159         mkdirp(td.path().join("a/b"));
2160         symlink(td.path().join("a"), td.path().join("a/b/c"));
2161 
2162         let mut builder = WalkBuilder::new(td.path());
2163         assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]);
2164         assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]);
2165     }
2166 
2167     // It's a little tricky to test the 'same_file_system' option since
2168     // we need an environment with more than one file system. We adopt a
2169     // heuristic where /sys is typically a distinct volume on Linux and roll
2170     // with that.
2171     #[test]
2172     #[cfg(target_os = "linux")]
2173     fn same_file_system() {
2174         use super::device_num;
2175 
2176         // If for some reason /sys doesn't exist or isn't a directory, just
2177         // skip this test.
2178         if !Path::new("/sys").is_dir() {
2179             return;
2180         }
2181 
2182         // If our test directory actually isn't a different volume from /sys,
2183         // then this test is meaningless and we shouldn't run it.
2184         let td = tmpdir();
2185         if device_num(td.path()).unwrap() == device_num("/sys").unwrap() {
2186             return;
2187         }
2188 
2189         mkdirp(td.path().join("same_file"));
2190         symlink("/sys", td.path().join("same_file").join("alink"));
2191 
2192         // Create a symlink to sys and enable following symlinks. If the
2193         // same_file_system option doesn't work, then this probably will hit a
2194         // permission error. Otherwise, it should just skip over the symlink
2195         // completely.
2196         let mut builder = WalkBuilder::new(td.path());
2197         builder.follow_links(true).same_file_system(true);
2198         assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]);
2199     }
2200 
2201     #[cfg(target_os = "linux")]
2202     #[test]
2203     fn no_read_permissions() {
2204         let dir_path = Path::new("/root");
2205 
2206         // There's no /etc/sudoers.d, skip the test.
2207         if !dir_path.is_dir() {
2208             return;
2209         }
2210         // We're the root, so the test won't check what we want it to.
2211         if fs::read_dir(&dir_path).is_ok() {
2212             return;
2213         }
2214 
2215         // Check that we can't descend but get an entry for the parent dir.
2216         let builder = WalkBuilder::new(&dir_path);
2217         assert_paths(dir_path.parent().unwrap(), &builder, &["root"]);
2218     }
2219 
2220     #[test]
2221     fn filter() {
2222         let td = tmpdir();
2223         mkdirp(td.path().join("a/b/c"));
2224         mkdirp(td.path().join("x/y"));
2225         wfile(td.path().join("a/b/foo"), "");
2226         wfile(td.path().join("x/y/foo"), "");
2227 
2228         assert_paths(
2229             td.path(),
2230             &WalkBuilder::new(td.path()),
2231             &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
2232         );
2233 
2234         assert_paths(
2235             td.path(),
2236             &WalkBuilder::new(td.path())
2237                 .filter_entry(|entry| entry.file_name() != OsStr::new("a")),
2238             &["x", "x/y", "x/y/foo"],
2239         );
2240     }
2241 }
2242