1 use std::cmp;
2 use std::ffi::OsStr;
3 use std::fmt;
4 use std::fs::{self, FileType, Metadata};
5 use std::io;
6 use std::path::{Path, PathBuf};
7 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
8 use std::sync::{Arc, Mutex};
9 use std::thread;
10 use std::time::Duration;
11 use std::vec;
12 
13 use same_file::Handle;
14 use walkdir::{self, WalkDir};
15 
16 use crate::dir::{Ignore, IgnoreBuilder};
17 use crate::gitignore::GitignoreBuilder;
18 use crate::overrides::Override;
19 use crate::types::Types;
20 use crate::{Error, PartialErrorBuilder};
21 
22 /// A directory entry with a possible error attached.
23 ///
24 /// The error typically refers to a problem parsing ignore files in a
25 /// particular directory.
26 #[derive(Clone, Debug)]
27 pub struct DirEntry {
28     dent: DirEntryInner,
29     err: Option<Error>,
30 }
31 
32 impl DirEntry {
33     /// The full path that this entry represents.
path(&self) -> &Path34     pub fn path(&self) -> &Path {
35         self.dent.path()
36     }
37 
38     /// The full path that this entry represents.
39     /// Analogous to [`path`], but moves ownership of the path.
40     ///
41     /// [`path`]: struct.DirEntry.html#method.path
into_path(self) -> PathBuf42     pub fn into_path(self) -> PathBuf {
43         self.dent.into_path()
44     }
45 
46     /// Whether this entry corresponds to a symbolic link or not.
path_is_symlink(&self) -> bool47     pub fn path_is_symlink(&self) -> bool {
48         self.dent.path_is_symlink()
49     }
50 
51     /// Returns true if and only if this entry corresponds to stdin.
52     ///
53     /// i.e., The entry has depth 0 and its file name is `-`.
is_stdin(&self) -> bool54     pub fn is_stdin(&self) -> bool {
55         self.dent.is_stdin()
56     }
57 
58     /// Return the metadata for the file that this entry points to.
metadata(&self) -> Result<Metadata, Error>59     pub fn metadata(&self) -> Result<Metadata, Error> {
60         self.dent.metadata()
61     }
62 
63     /// Return the file type for the file that this entry points to.
64     ///
65     /// This entry doesn't have a file type if it corresponds to stdin.
file_type(&self) -> Option<FileType>66     pub fn file_type(&self) -> Option<FileType> {
67         self.dent.file_type()
68     }
69 
70     /// Return the file name of this entry.
71     ///
72     /// If this entry has no file name (e.g., `/`), then the full path is
73     /// returned.
file_name(&self) -> &OsStr74     pub fn file_name(&self) -> &OsStr {
75         self.dent.file_name()
76     }
77 
78     /// Returns the depth at which this entry was created relative to the root.
depth(&self) -> usize79     pub fn depth(&self) -> usize {
80         self.dent.depth()
81     }
82 
83     /// Returns the underlying inode number if one exists.
84     ///
85     /// If this entry doesn't have an inode number, then `None` is returned.
86     #[cfg(unix)]
ino(&self) -> Option<u64>87     pub fn ino(&self) -> Option<u64> {
88         self.dent.ino()
89     }
90 
91     /// Returns an error, if one exists, associated with processing this entry.
92     ///
93     /// An example of an error is one that occurred while parsing an ignore
94     /// file. Errors related to traversing a directory tree itself are reported
95     /// as part of yielding the directory entry, and not with this method.
error(&self) -> Option<&Error>96     pub fn error(&self) -> Option<&Error> {
97         self.err.as_ref()
98     }
99 
100     /// Returns true if and only if this entry points to a directory.
is_dir(&self) -> bool101     pub(crate) fn is_dir(&self) -> bool {
102         self.dent.is_dir()
103     }
104 
new_stdin() -> DirEntry105     fn new_stdin() -> DirEntry {
106         DirEntry { dent: DirEntryInner::Stdin, err: None }
107     }
108 
new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry109     fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry {
110         DirEntry { dent: DirEntryInner::Walkdir(dent), err: err }
111     }
112 
new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry113     fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry {
114         DirEntry { dent: DirEntryInner::Raw(dent), err: err }
115     }
116 }
117 
118 /// DirEntryInner is the implementation of DirEntry.
119 ///
120 /// It specifically represents three distinct sources of directory entries:
121 ///
122 /// 1. From the walkdir crate.
123 /// 2. Special entries that represent things like stdin.
124 /// 3. From a path.
125 ///
126 /// Specifically, (3) has to essentially re-create the DirEntry implementation
127 /// from WalkDir.
128 #[derive(Clone, Debug)]
129 enum DirEntryInner {
130     Stdin,
131     Walkdir(walkdir::DirEntry),
132     Raw(DirEntryRaw),
133 }
134 
135 impl DirEntryInner {
path(&self) -> &Path136     fn path(&self) -> &Path {
137         use self::DirEntryInner::*;
138         match *self {
139             Stdin => Path::new("<stdin>"),
140             Walkdir(ref x) => x.path(),
141             Raw(ref x) => x.path(),
142         }
143     }
144 
into_path(self) -> PathBuf145     fn into_path(self) -> PathBuf {
146         use self::DirEntryInner::*;
147         match self {
148             Stdin => PathBuf::from("<stdin>"),
149             Walkdir(x) => x.into_path(),
150             Raw(x) => x.into_path(),
151         }
152     }
153 
path_is_symlink(&self) -> bool154     fn path_is_symlink(&self) -> bool {
155         use self::DirEntryInner::*;
156         match *self {
157             Stdin => false,
158             Walkdir(ref x) => x.path_is_symlink(),
159             Raw(ref x) => x.path_is_symlink(),
160         }
161     }
162 
is_stdin(&self) -> bool163     fn is_stdin(&self) -> bool {
164         match *self {
165             DirEntryInner::Stdin => true,
166             _ => false,
167         }
168     }
169 
metadata(&self) -> Result<Metadata, Error>170     fn metadata(&self) -> Result<Metadata, Error> {
171         use self::DirEntryInner::*;
172         match *self {
173             Stdin => {
174                 let err = Error::Io(io::Error::new(
175                     io::ErrorKind::Other,
176                     "<stdin> has no metadata",
177                 ));
178                 Err(err.with_path("<stdin>"))
179             }
180             Walkdir(ref x) => x.metadata().map_err(|err| {
181                 Error::Io(io::Error::from(err)).with_path(x.path())
182             }),
183             Raw(ref x) => x.metadata(),
184         }
185     }
186 
file_type(&self) -> Option<FileType>187     fn file_type(&self) -> Option<FileType> {
188         use self::DirEntryInner::*;
189         match *self {
190             Stdin => None,
191             Walkdir(ref x) => Some(x.file_type()),
192             Raw(ref x) => Some(x.file_type()),
193         }
194     }
195 
file_name(&self) -> &OsStr196     fn file_name(&self) -> &OsStr {
197         use self::DirEntryInner::*;
198         match *self {
199             Stdin => OsStr::new("<stdin>"),
200             Walkdir(ref x) => x.file_name(),
201             Raw(ref x) => x.file_name(),
202         }
203     }
204 
depth(&self) -> usize205     fn depth(&self) -> usize {
206         use self::DirEntryInner::*;
207         match *self {
208             Stdin => 0,
209             Walkdir(ref x) => x.depth(),
210             Raw(ref x) => x.depth(),
211         }
212     }
213 
214     #[cfg(unix)]
ino(&self) -> Option<u64>215     fn ino(&self) -> Option<u64> {
216         use self::DirEntryInner::*;
217         use walkdir::DirEntryExt;
218         match *self {
219             Stdin => None,
220             Walkdir(ref x) => Some(x.ino()),
221             Raw(ref x) => Some(x.ino()),
222         }
223     }
224 
225     /// Returns true if and only if this entry points to a directory.
is_dir(&self) -> bool226     fn is_dir(&self) -> bool {
227         self.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
228     }
229 }
230 
231 /// DirEntryRaw is essentially copied from the walkdir crate so that we can
232 /// build `DirEntry`s from whole cloth in the parallel iterator.
233 #[derive(Clone)]
234 struct DirEntryRaw {
235     /// The path as reported by the `fs::ReadDir` iterator (even if it's a
236     /// symbolic link).
237     path: PathBuf,
238     /// The file type. Necessary for recursive iteration, so store it.
239     ty: FileType,
240     /// Is set when this entry was created from a symbolic link and the user
241     /// expects the iterator to follow symbolic links.
242     follow_link: bool,
243     /// The depth at which this entry was generated relative to the root.
244     depth: usize,
245     /// The underlying inode number (Unix only).
246     #[cfg(unix)]
247     ino: u64,
248     /// The underlying metadata (Windows only). We store this on Windows
249     /// because this comes for free while reading a directory.
250     #[cfg(windows)]
251     metadata: fs::Metadata,
252 }
253 
254 impl fmt::Debug for DirEntryRaw {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result255     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
256         // Leaving out FileType because it doesn't have a debug impl
257         // in Rust 1.9. We could add it if we really wanted to by manually
258         // querying each possibly file type. Meh. ---AG
259         f.debug_struct("DirEntryRaw")
260             .field("path", &self.path)
261             .field("follow_link", &self.follow_link)
262             .field("depth", &self.depth)
263             .finish()
264     }
265 }
266 
267 impl DirEntryRaw {
path(&self) -> &Path268     fn path(&self) -> &Path {
269         &self.path
270     }
271 
into_path(self) -> PathBuf272     fn into_path(self) -> PathBuf {
273         self.path
274     }
275 
path_is_symlink(&self) -> bool276     fn path_is_symlink(&self) -> bool {
277         self.ty.is_symlink() || self.follow_link
278     }
279 
metadata(&self) -> Result<Metadata, Error>280     fn metadata(&self) -> Result<Metadata, Error> {
281         self.metadata_internal()
282     }
283 
284     #[cfg(windows)]
metadata_internal(&self) -> Result<fs::Metadata, Error>285     fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
286         if self.follow_link {
287             fs::metadata(&self.path)
288         } else {
289             Ok(self.metadata.clone())
290         }
291         .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
292     }
293 
294     #[cfg(not(windows))]
metadata_internal(&self) -> Result<fs::Metadata, Error>295     fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
296         if self.follow_link {
297             fs::metadata(&self.path)
298         } else {
299             fs::symlink_metadata(&self.path)
300         }
301         .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
302     }
303 
file_type(&self) -> FileType304     fn file_type(&self) -> FileType {
305         self.ty
306     }
307 
file_name(&self) -> &OsStr308     fn file_name(&self) -> &OsStr {
309         self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
310     }
311 
depth(&self) -> usize312     fn depth(&self) -> usize {
313         self.depth
314     }
315 
316     #[cfg(unix)]
ino(&self) -> u64317     fn ino(&self) -> u64 {
318         self.ino
319     }
320 
from_entry( depth: usize, ent: &fs::DirEntry, ) -> Result<DirEntryRaw, Error>321     fn from_entry(
322         depth: usize,
323         ent: &fs::DirEntry,
324     ) -> Result<DirEntryRaw, Error> {
325         let ty = ent.file_type().map_err(|err| {
326             let err = Error::Io(io::Error::from(err)).with_path(ent.path());
327             Error::WithDepth { depth: depth, err: Box::new(err) }
328         })?;
329         DirEntryRaw::from_entry_os(depth, ent, ty)
330     }
331 
332     #[cfg(windows)]
from_entry_os( depth: usize, ent: &fs::DirEntry, ty: fs::FileType, ) -> Result<DirEntryRaw, Error>333     fn from_entry_os(
334         depth: usize,
335         ent: &fs::DirEntry,
336         ty: fs::FileType,
337     ) -> Result<DirEntryRaw, Error> {
338         let md = ent.metadata().map_err(|err| {
339             let err = Error::Io(io::Error::from(err)).with_path(ent.path());
340             Error::WithDepth { depth: depth, err: Box::new(err) }
341         })?;
342         Ok(DirEntryRaw {
343             path: ent.path(),
344             ty: ty,
345             follow_link: false,
346             depth: depth,
347             metadata: md,
348         })
349     }
350 
351     #[cfg(unix)]
from_entry_os( depth: usize, ent: &fs::DirEntry, ty: fs::FileType, ) -> Result<DirEntryRaw, Error>352     fn from_entry_os(
353         depth: usize,
354         ent: &fs::DirEntry,
355         ty: fs::FileType,
356     ) -> Result<DirEntryRaw, Error> {
357         use std::os::unix::fs::DirEntryExt;
358 
359         Ok(DirEntryRaw {
360             path: ent.path(),
361             ty: ty,
362             follow_link: false,
363             depth: depth,
364             ino: ent.ino(),
365         })
366     }
367 
368     // Placeholder implementation to allow compiling on non-standard platforms
369     // (e.g. wasm32).
370     #[cfg(not(any(windows, unix)))]
from_entry_os( depth: usize, ent: &fs::DirEntry, ty: fs::FileType, ) -> Result<DirEntryRaw, Error>371     fn from_entry_os(
372         depth: usize,
373         ent: &fs::DirEntry,
374         ty: fs::FileType,
375     ) -> Result<DirEntryRaw, Error> {
376         Err(Error::Io(io::Error::new(
377             io::ErrorKind::Other,
378             "unsupported platform",
379         )))
380     }
381 
382     #[cfg(windows)]
from_path( depth: usize, pb: PathBuf, link: bool, ) -> Result<DirEntryRaw, Error>383     fn from_path(
384         depth: usize,
385         pb: PathBuf,
386         link: bool,
387     ) -> Result<DirEntryRaw, Error> {
388         let md =
389             fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
390         Ok(DirEntryRaw {
391             path: pb,
392             ty: md.file_type(),
393             follow_link: link,
394             depth: depth,
395             metadata: md,
396         })
397     }
398 
399     #[cfg(unix)]
from_path( depth: usize, pb: PathBuf, link: bool, ) -> Result<DirEntryRaw, Error>400     fn from_path(
401         depth: usize,
402         pb: PathBuf,
403         link: bool,
404     ) -> Result<DirEntryRaw, Error> {
405         use std::os::unix::fs::MetadataExt;
406 
407         let md =
408             fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
409         Ok(DirEntryRaw {
410             path: pb,
411             ty: md.file_type(),
412             follow_link: link,
413             depth: depth,
414             ino: md.ino(),
415         })
416     }
417 
418     // Placeholder implementation to allow compiling on non-standard platforms
419     // (e.g. wasm32).
420     #[cfg(not(any(windows, unix)))]
from_path( depth: usize, pb: PathBuf, link: bool, ) -> Result<DirEntryRaw, Error>421     fn from_path(
422         depth: usize,
423         pb: PathBuf,
424         link: bool,
425     ) -> Result<DirEntryRaw, Error> {
426         Err(Error::Io(io::Error::new(
427             io::ErrorKind::Other,
428             "unsupported platform",
429         )))
430     }
431 }
432 
433 /// WalkBuilder builds a recursive directory iterator.
434 ///
435 /// The builder supports a large number of configurable options. This includes
436 /// specific glob overrides, file type matching, toggling whether hidden
437 /// files are ignored or not, and of course, support for respecting gitignore
438 /// files.
439 ///
440 /// By default, all ignore files found are respected. This includes `.ignore`,
441 /// `.gitignore`, `.git/info/exclude` and even your global gitignore
442 /// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
443 ///
444 /// Some standard recursive directory options are also supported, such as
445 /// limiting the recursive depth or whether to follow symbolic links (disabled
446 /// by default).
447 ///
448 /// # Ignore rules
449 ///
450 /// There are many rules that influence whether a particular file or directory
451 /// is skipped by this iterator. Those rules are documented here. Note that
452 /// the rules assume a default configuration.
453 ///
454 /// * First, glob overrides are checked. If a path matches a glob override,
455 /// then matching stops. The path is then only skipped if the glob that matched
456 /// the path is an ignore glob. (An override glob is a whitelist glob unless it
457 /// starts with a `!`, in which case it is an ignore glob.)
458 /// * Second, ignore files are checked. Ignore files currently only come from
459 /// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
460 /// global gitignore file), plain `.ignore` files, which have the same format
461 /// as gitignore files, or explicitly added ignore files. The precedence order
462 /// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
463 /// finally explicitly added ignore files. Note that precedence between
464 /// different types of ignore files is not impacted by the directory hierarchy;
465 /// any `.ignore` file overrides all `.gitignore` files. Within each precedence
466 /// level, more nested ignore files have a higher precedence than less nested
467 /// ignore files.
468 /// * Third, if the previous step yields an ignore match, then all matching
469 /// is stopped and the path is skipped. If it yields a whitelist match, then
470 /// matching continues. A whitelist match can be overridden by a later matcher.
471 /// * Fourth, unless the path is a directory, the file type matcher is run on
472 /// the path. As above, if it yields an ignore match, then all matching is
473 /// stopped and the path is skipped. If it yields a whitelist match, then
474 /// matching continues.
475 /// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
476 /// path is skipped.
477 /// * Sixth, unless the path is a directory, the size of the file is compared
478 /// against the max filesize limit. If it exceeds the limit, it is skipped.
479 /// * Seventh, if the path has made it this far then it is yielded in the
480 /// iterator.
481 #[derive(Clone)]
482 pub struct WalkBuilder {
483     paths: Vec<PathBuf>,
484     ig_builder: IgnoreBuilder,
485     max_depth: Option<usize>,
486     max_filesize: Option<u64>,
487     follow_links: bool,
488     same_file_system: bool,
489     sorter: Option<Sorter>,
490     threads: usize,
491     skip: Option<Arc<Handle>>,
492     filter: Option<Filter>,
493 }
494 
495 #[derive(Clone)]
496 enum Sorter {
497     ByName(
498         Arc<dyn Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static>,
499     ),
500     ByPath(Arc<dyn Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static>),
501 }
502 
503 #[derive(Clone)]
504 struct Filter(Arc<dyn Fn(&DirEntry) -> bool + Send + Sync + 'static>);
505 
506 impl fmt::Debug for WalkBuilder {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result507     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
508         f.debug_struct("WalkBuilder")
509             .field("paths", &self.paths)
510             .field("ig_builder", &self.ig_builder)
511             .field("max_depth", &self.max_depth)
512             .field("max_filesize", &self.max_filesize)
513             .field("follow_links", &self.follow_links)
514             .field("threads", &self.threads)
515             .field("skip", &self.skip)
516             .finish()
517     }
518 }
519 
520 impl WalkBuilder {
521     /// Create a new builder for a recursive directory iterator for the
522     /// directory given.
523     ///
524     /// Note that if you want to traverse multiple different directories, it
525     /// is better to call `add` on this builder than to create multiple
526     /// `Walk` values.
new<P: AsRef<Path>>(path: P) -> WalkBuilder527     pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
528         WalkBuilder {
529             paths: vec![path.as_ref().to_path_buf()],
530             ig_builder: IgnoreBuilder::new(),
531             max_depth: None,
532             max_filesize: None,
533             follow_links: false,
534             same_file_system: false,
535             sorter: None,
536             threads: 0,
537             skip: None,
538             filter: None,
539         }
540     }
541 
542     /// Build a new `Walk` iterator.
build(&self) -> Walk543     pub fn build(&self) -> Walk {
544         let follow_links = self.follow_links;
545         let max_depth = self.max_depth;
546         let sorter = self.sorter.clone();
547         let its = self
548             .paths
549             .iter()
550             .map(move |p| {
551                 if p == Path::new("-") {
552                     (p.to_path_buf(), None)
553                 } else {
554                     let mut wd = WalkDir::new(p);
555                     wd = wd.follow_links(follow_links || p.is_file());
556                     wd = wd.same_file_system(self.same_file_system);
557                     if let Some(max_depth) = max_depth {
558                         wd = wd.max_depth(max_depth);
559                     }
560                     if let Some(ref sorter) = sorter {
561                         match sorter.clone() {
562                             Sorter::ByName(cmp) => {
563                                 wd = wd.sort_by(move |a, b| {
564                                     cmp(a.file_name(), b.file_name())
565                                 });
566                             }
567                             Sorter::ByPath(cmp) => {
568                                 wd = wd.sort_by(move |a, b| {
569                                     cmp(a.path(), b.path())
570                                 });
571                             }
572                         }
573                     }
574                     (p.to_path_buf(), Some(WalkEventIter::from(wd)))
575                 }
576             })
577             .collect::<Vec<_>>()
578             .into_iter();
579         let ig_root = self.ig_builder.build();
580         Walk {
581             its: its,
582             it: None,
583             ig_root: ig_root.clone(),
584             ig: ig_root.clone(),
585             max_filesize: self.max_filesize,
586             skip: self.skip.clone(),
587             filter: self.filter.clone(),
588         }
589     }
590 
591     /// Build a new `WalkParallel` iterator.
592     ///
593     /// Note that this *doesn't* return something that implements `Iterator`.
594     /// Instead, the returned value must be run with a closure. e.g.,
595     /// `builder.build_parallel().run(|| |path| println!("{:?}", path))`.
build_parallel(&self) -> WalkParallel596     pub fn build_parallel(&self) -> WalkParallel {
597         WalkParallel {
598             paths: self.paths.clone().into_iter(),
599             ig_root: self.ig_builder.build(),
600             max_depth: self.max_depth,
601             max_filesize: self.max_filesize,
602             follow_links: self.follow_links,
603             same_file_system: self.same_file_system,
604             threads: self.threads,
605             skip: self.skip.clone(),
606             filter: self.filter.clone(),
607         }
608     }
609 
610     /// Add a file path to the iterator.
611     ///
612     /// Each additional file path added is traversed recursively. This should
613     /// be preferred over building multiple `Walk` iterators since this
614     /// enables reusing resources across iteration.
add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder615     pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
616         self.paths.push(path.as_ref().to_path_buf());
617         self
618     }
619 
620     /// The maximum depth to recurse.
621     ///
622     /// The default, `None`, imposes no depth restriction.
max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder623     pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
624         self.max_depth = depth;
625         self
626     }
627 
628     /// Whether to follow symbolic links or not.
follow_links(&mut self, yes: bool) -> &mut WalkBuilder629     pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
630         self.follow_links = yes;
631         self
632     }
633 
634     /// Whether to ignore files above the specified limit.
max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder635     pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
636         self.max_filesize = filesize;
637         self
638     }
639 
640     /// The number of threads to use for traversal.
641     ///
642     /// Note that this only has an effect when using `build_parallel`.
643     ///
644     /// The default setting is `0`, which chooses the number of threads
645     /// automatically using heuristics.
threads(&mut self, n: usize) -> &mut WalkBuilder646     pub fn threads(&mut self, n: usize) -> &mut WalkBuilder {
647         self.threads = n;
648         self
649     }
650 
651     /// Add a global ignore file to the matcher.
652     ///
653     /// This has lower precedence than all other sources of ignore rules.
654     ///
655     /// If there was a problem adding the ignore file, then an error is
656     /// returned. Note that the error may indicate *partial* failure. For
657     /// example, if an ignore file contains an invalid glob, all other globs
658     /// are still applied.
add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error>659     pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
660         let mut builder = GitignoreBuilder::new("");
661         let mut errs = PartialErrorBuilder::default();
662         errs.maybe_push(builder.add(path));
663         match builder.build() {
664             Ok(gi) => {
665                 self.ig_builder.add_ignore(gi);
666             }
667             Err(err) => {
668                 errs.push(err);
669             }
670         }
671         errs.into_error_option()
672     }
673 
674     /// Add a custom ignore file name
675     ///
676     /// These ignore files have higher precedence than all other ignore files.
677     ///
678     /// When specifying multiple names, earlier names have lower precedence than
679     /// later names.
add_custom_ignore_filename<S: AsRef<OsStr>>( &mut self, file_name: S, ) -> &mut WalkBuilder680     pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
681         &mut self,
682         file_name: S,
683     ) -> &mut WalkBuilder {
684         self.ig_builder.add_custom_ignore_filename(file_name);
685         self
686     }
687 
688     /// Add an override matcher.
689     ///
690     /// By default, no override matcher is used.
691     ///
692     /// This overrides any previous setting.
overrides(&mut self, overrides: Override) -> &mut WalkBuilder693     pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
694         self.ig_builder.overrides(overrides);
695         self
696     }
697 
698     /// Add a file type matcher.
699     ///
700     /// By default, no file type matcher is used.
701     ///
702     /// This overrides any previous setting.
types(&mut self, types: Types) -> &mut WalkBuilder703     pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
704         self.ig_builder.types(types);
705         self
706     }
707 
708     /// Enables all the standard ignore filters.
709     ///
710     /// This toggles, as a group, all the filters that are enabled by default:
711     ///
712     /// - [hidden()](#method.hidden)
713     /// - [parents()](#method.parents)
714     /// - [ignore()](#method.ignore)
715     /// - [git_ignore()](#method.git_ignore)
716     /// - [git_global()](#method.git_global)
717     /// - [git_exclude()](#method.git_exclude)
718     ///
719     /// They may still be toggled individually after calling this function.
720     ///
721     /// This is (by definition) enabled by default.
standard_filters(&mut self, yes: bool) -> &mut WalkBuilder722     pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder {
723         self.hidden(yes)
724             .parents(yes)
725             .ignore(yes)
726             .git_ignore(yes)
727             .git_global(yes)
728             .git_exclude(yes)
729     }
730 
731     /// Enables ignoring hidden files.
732     ///
733     /// This is enabled by default.
hidden(&mut self, yes: bool) -> &mut WalkBuilder734     pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
735         self.ig_builder.hidden(yes);
736         self
737     }
738 
739     /// Enables reading ignore files from parent directories.
740     ///
741     /// If this is enabled, then .gitignore files in parent directories of each
742     /// file path given are respected. Otherwise, they are ignored.
743     ///
744     /// This is enabled by default.
parents(&mut self, yes: bool) -> &mut WalkBuilder745     pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
746         self.ig_builder.parents(yes);
747         self
748     }
749 
750     /// Enables reading `.ignore` files.
751     ///
752     /// `.ignore` files have the same semantics as `gitignore` files and are
753     /// supported by search tools such as ripgrep and The Silver Searcher.
754     ///
755     /// This is enabled by default.
ignore(&mut self, yes: bool) -> &mut WalkBuilder756     pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
757         self.ig_builder.ignore(yes);
758         self
759     }
760 
761     /// Enables reading a global gitignore file, whose path is specified in
762     /// git's `core.excludesFile` config option.
763     ///
764     /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
765     /// does not exist or does not specify `core.excludesFile`, then
766     /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
767     /// set or is empty, then `$HOME/.config/git/ignore` is used instead.
768     ///
769     /// This is enabled by default.
git_global(&mut self, yes: bool) -> &mut WalkBuilder770     pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
771         self.ig_builder.git_global(yes);
772         self
773     }
774 
775     /// Enables reading `.gitignore` files.
776     ///
777     /// `.gitignore` files have match semantics as described in the `gitignore`
778     /// man page.
779     ///
780     /// This is enabled by default.
git_ignore(&mut self, yes: bool) -> &mut WalkBuilder781     pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
782         self.ig_builder.git_ignore(yes);
783         self
784     }
785 
786     /// Enables reading `.git/info/exclude` files.
787     ///
788     /// `.git/info/exclude` files have match semantics as described in the
789     /// `gitignore` man page.
790     ///
791     /// This is enabled by default.
git_exclude(&mut self, yes: bool) -> &mut WalkBuilder792     pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
793         self.ig_builder.git_exclude(yes);
794         self
795     }
796 
797     /// Whether a git repository is required to apply git-related ignore
798     /// rules (global rules, .gitignore and local exclude rules).
799     ///
800     /// When disabled, git-related ignore rules are applied even when searching
801     /// outside a git repository.
require_git(&mut self, yes: bool) -> &mut WalkBuilder802     pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder {
803         self.ig_builder.require_git(yes);
804         self
805     }
806 
807     /// Process ignore files case insensitively
808     ///
809     /// This is disabled by default.
ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder810     pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder {
811         self.ig_builder.ignore_case_insensitive(yes);
812         self
813     }
814 
815     /// Set a function for sorting directory entries by their path.
816     ///
817     /// If a compare function is set, the resulting iterator will return all
818     /// paths in sorted order. The compare function will be called to compare
819     /// entries from the same directory.
820     ///
821     /// This is like `sort_by_file_name`, except the comparator accepts
822     /// a `&Path` instead of the base file name, which permits it to sort by
823     /// more criteria.
824     ///
825     /// This method will override any previous sorter set by this method or
826     /// by `sort_by_file_name`.
827     ///
828     /// Note that this is not used in the parallel iterator.
sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder where F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static,829     pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder
830     where
831         F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static,
832     {
833         self.sorter = Some(Sorter::ByPath(Arc::new(cmp)));
834         self
835     }
836 
837     /// Set a function for sorting directory entries by file name.
838     ///
839     /// If a compare function is set, the resulting iterator will return all
840     /// paths in sorted order. The compare function will be called to compare
841     /// names from entries from the same directory using only the name of the
842     /// entry.
843     ///
844     /// This method will override any previous sorter set by this method or
845     /// by `sort_by_file_path`.
846     ///
847     /// Note that this is not used in the parallel iterator.
sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder where F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static,848     pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder
849     where
850         F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static,
851     {
852         self.sorter = Some(Sorter::ByName(Arc::new(cmp)));
853         self
854     }
855 
856     /// Do not cross file system boundaries.
857     ///
858     /// When this option is enabled, directory traversal will not descend into
859     /// directories that are on a different file system from the root path.
860     ///
861     /// Currently, this option is only supported on Unix and Windows. If this
862     /// option is used on an unsupported platform, then directory traversal
863     /// will immediately return an error and will not yield any entries.
same_file_system(&mut self, yes: bool) -> &mut WalkBuilder864     pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder {
865         self.same_file_system = yes;
866         self
867     }
868 
869     /// Do not yield directory entries that are believed to correspond to
870     /// stdout.
871     ///
872     /// This is useful when a command is invoked via shell redirection to a
873     /// file that is also being read. For example, `grep -r foo ./ > results`
874     /// might end up trying to search `results` even though it is also writing
875     /// to it, which could cause an unbounded feedback loop. Setting this
876     /// option prevents this from happening by skipping over the `results`
877     /// file.
878     ///
879     /// This is disabled by default.
skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder880     pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder {
881         if yes {
882             self.skip = stdout_handle().map(Arc::new);
883         } else {
884             self.skip = None;
885         }
886         self
887     }
888 
889     /// Yields only entries which satisfy the given predicate and skips
890     /// descending into directories that do not satisfy the given predicate.
891     ///
892     /// The predicate is applied to all entries. If the predicate is
893     /// true, iteration carries on as normal. If the predicate is false, the
894     /// entry is ignored and if it is a directory, it is not descended into.
895     ///
896     /// Note that the errors for reading entries that may not satisfy the
897     /// predicate will still be yielded.
filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder where P: Fn(&DirEntry) -> bool + Send + Sync + 'static,898     pub fn filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder
899     where
900         P: Fn(&DirEntry) -> bool + Send + Sync + 'static,
901     {
902         self.filter = Some(Filter(Arc::new(filter)));
903         self
904     }
905 }
906 
907 /// Walk is a recursive directory iterator over file paths in one or more
908 /// directories.
909 ///
910 /// Only file and directory paths matching the rules are returned. By default,
911 /// ignore files like `.gitignore` are respected. The precise matching rules
912 /// and precedence is explained in the documentation for `WalkBuilder`.
913 pub struct Walk {
914     its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
915     it: Option<WalkEventIter>,
916     ig_root: Ignore,
917     ig: Ignore,
918     max_filesize: Option<u64>,
919     skip: Option<Arc<Handle>>,
920     filter: Option<Filter>,
921 }
922 
923 impl Walk {
924     /// Creates a new recursive directory iterator for the file path given.
925     ///
926     /// Note that this uses default settings, which include respecting
927     /// `.gitignore` files. To configure the iterator, use `WalkBuilder`
928     /// instead.
new<P: AsRef<Path>>(path: P) -> Walk929     pub fn new<P: AsRef<Path>>(path: P) -> Walk {
930         WalkBuilder::new(path).build()
931     }
932 
skip_entry(&self, ent: &DirEntry) -> Result<bool, Error>933     fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> {
934         if ent.depth() == 0 {
935             return Ok(false);
936         }
937         // We ensure that trivial skipping is done before any other potentially
938         // expensive operations (stat, filesystem other) are done. This seems
939         // like an obvious optimization but becomes critical when filesystem
940         // operations even as simple as stat can result in significant
941         // overheads; an example of this was a bespoke filesystem layer in
942         // Windows that hosted files remotely and would download them on-demand
943         // when particular filesystem operations occurred. Users of this system
944         // who ensured correct file-type fileters were being used could still
945         // get unnecessary file access resulting in large downloads.
946         if should_skip_entry(&self.ig, ent) {
947             return Ok(true);
948         }
949         if let Some(ref stdout) = self.skip {
950             if path_equals(ent, stdout)? {
951                 return Ok(true);
952             }
953         }
954         if self.max_filesize.is_some() && !ent.is_dir() {
955             return Ok(skip_filesize(
956                 self.max_filesize.unwrap(),
957                 ent.path(),
958                 &ent.metadata().ok(),
959             ));
960         }
961         if let Some(Filter(filter)) = &self.filter {
962             if !filter(ent) {
963                 return Ok(true);
964             }
965         }
966         Ok(false)
967     }
968 }
969 
970 impl Iterator for Walk {
971     type Item = Result<DirEntry, Error>;
972 
973     #[inline(always)]
next(&mut self) -> Option<Result<DirEntry, Error>>974     fn next(&mut self) -> Option<Result<DirEntry, Error>> {
975         loop {
976             let ev = match self.it.as_mut().and_then(|it| it.next()) {
977                 Some(ev) => ev,
978                 None => {
979                     match self.its.next() {
980                         None => return None,
981                         Some((_, None)) => {
982                             return Some(Ok(DirEntry::new_stdin()));
983                         }
984                         Some((path, Some(it))) => {
985                             self.it = Some(it);
986                             if path.is_dir() {
987                                 let (ig, err) = self.ig_root.add_parents(path);
988                                 self.ig = ig;
989                                 if let Some(err) = err {
990                                     return Some(Err(err));
991                                 }
992                             } else {
993                                 self.ig = self.ig_root.clone();
994                             }
995                         }
996                     }
997                     continue;
998                 }
999             };
1000             match ev {
1001                 Err(err) => {
1002                     return Some(Err(Error::from_walkdir(err)));
1003                 }
1004                 Ok(WalkEvent::Exit) => {
1005                     self.ig = self.ig.parent().unwrap();
1006                 }
1007                 Ok(WalkEvent::Dir(ent)) => {
1008                     let mut ent = DirEntry::new_walkdir(ent, None);
1009                     let should_skip = match self.skip_entry(&ent) {
1010                         Err(err) => return Some(Err(err)),
1011                         Ok(should_skip) => should_skip,
1012                     };
1013                     if should_skip {
1014                         self.it.as_mut().unwrap().it.skip_current_dir();
1015                         // Still need to push this on the stack because
1016                         // we'll get a WalkEvent::Exit event for this dir.
1017                         // We don't care if it errors though.
1018                         let (igtmp, _) = self.ig.add_child(ent.path());
1019                         self.ig = igtmp;
1020                         continue;
1021                     }
1022                     let (igtmp, err) = self.ig.add_child(ent.path());
1023                     self.ig = igtmp;
1024                     ent.err = err;
1025                     return Some(Ok(ent));
1026                 }
1027                 Ok(WalkEvent::File(ent)) => {
1028                     let ent = DirEntry::new_walkdir(ent, None);
1029                     let should_skip = match self.skip_entry(&ent) {
1030                         Err(err) => return Some(Err(err)),
1031                         Ok(should_skip) => should_skip,
1032                     };
1033                     if should_skip {
1034                         continue;
1035                     }
1036                     return Some(Ok(ent));
1037                 }
1038             }
1039         }
1040     }
1041 }
1042 
1043 /// WalkEventIter transforms a WalkDir iterator into an iterator that more
1044 /// accurately describes the directory tree. Namely, it emits events that are
1045 /// one of three types: directory, file or "exit." An "exit" event means that
1046 /// the entire contents of a directory have been enumerated.
1047 struct WalkEventIter {
1048     depth: usize,
1049     it: walkdir::IntoIter,
1050     next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
1051 }
1052 
1053 #[derive(Debug)]
1054 enum WalkEvent {
1055     Dir(walkdir::DirEntry),
1056     File(walkdir::DirEntry),
1057     Exit,
1058 }
1059 
1060 impl From<WalkDir> for WalkEventIter {
from(it: WalkDir) -> WalkEventIter1061     fn from(it: WalkDir) -> WalkEventIter {
1062         WalkEventIter { depth: 0, it: it.into_iter(), next: None }
1063     }
1064 }
1065 
1066 impl Iterator for WalkEventIter {
1067     type Item = walkdir::Result<WalkEvent>;
1068 
1069     #[inline(always)]
next(&mut self) -> Option<walkdir::Result<WalkEvent>>1070     fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
1071         let dent = self.next.take().or_else(|| self.it.next());
1072         let depth = match dent {
1073             None => 0,
1074             Some(Ok(ref dent)) => dent.depth(),
1075             Some(Err(ref err)) => err.depth(),
1076         };
1077         if depth < self.depth {
1078             self.depth -= 1;
1079             self.next = dent;
1080             return Some(Ok(WalkEvent::Exit));
1081         }
1082         self.depth = depth;
1083         match dent {
1084             None => None,
1085             Some(Err(err)) => Some(Err(err)),
1086             Some(Ok(dent)) => {
1087                 if walkdir_is_dir(&dent) {
1088                     self.depth += 1;
1089                     Some(Ok(WalkEvent::Dir(dent)))
1090                 } else {
1091                     Some(Ok(WalkEvent::File(dent)))
1092                 }
1093             }
1094         }
1095     }
1096 }
1097 
1098 /// WalkState is used in the parallel recursive directory iterator to indicate
1099 /// whether walking should continue as normal, skip descending into a
1100 /// particular directory or quit the walk entirely.
1101 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
1102 pub enum WalkState {
1103     /// Continue walking as normal.
1104     Continue,
1105     /// If the directory entry given is a directory, don't descend into it.
1106     /// In all other cases, this has no effect.
1107     Skip,
1108     /// Quit the entire iterator as soon as possible.
1109     ///
1110     /// Note that this is an inherently asynchronous action. It is possible
1111     /// for more entries to be yielded even after instructing the iterator
1112     /// to quit.
1113     Quit,
1114 }
1115 
1116 impl WalkState {
is_continue(&self) -> bool1117     fn is_continue(&self) -> bool {
1118         *self == WalkState::Continue
1119     }
1120 
is_quit(&self) -> bool1121     fn is_quit(&self) -> bool {
1122         *self == WalkState::Quit
1123     }
1124 }
1125 
1126 /// A builder for constructing a visitor when using
1127 /// [`WalkParallel::visit`](struct.WalkParallel.html#method.visit). The builder
1128 /// will be called for each thread started by `WalkParallel`. The visitor
1129 /// returned from each builder is then called for every directory entry.
1130 pub trait ParallelVisitorBuilder<'s> {
1131     /// Create per-thread `ParallelVisitor`s for `WalkParallel`.
build(&mut self) -> Box<dyn ParallelVisitor + 's>1132     fn build(&mut self) -> Box<dyn ParallelVisitor + 's>;
1133 }
1134 
1135 impl<'a, 's, P: ParallelVisitorBuilder<'s>> ParallelVisitorBuilder<'s>
1136     for &'a mut P
1137 {
build(&mut self) -> Box<dyn ParallelVisitor + 's>1138     fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1139         (**self).build()
1140     }
1141 }
1142 
1143 /// Receives files and directories for the current thread.
1144 ///
1145 /// Setup for the traversal can be implemented as part of
1146 /// [`ParallelVisitorBuilder::build`](trait.ParallelVisitorBuilder.html#tymethod.build).
1147 /// Teardown when traversal finishes can be implemented by implementing the
1148 /// `Drop` trait on your traversal type.
1149 pub trait ParallelVisitor: Send {
1150     /// Receives files and directories for the current thread. This is called
1151     /// once for every directory entry visited by traversal.
visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState1152     fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState;
1153 }
1154 
1155 struct FnBuilder<F> {
1156     builder: F,
1157 }
1158 
1159 impl<'s, F: FnMut() -> FnVisitor<'s>> ParallelVisitorBuilder<'s>
1160     for FnBuilder<F>
1161 {
build(&mut self) -> Box<dyn ParallelVisitor + 's>1162     fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1163         let visitor = (self.builder)();
1164         Box::new(FnVisitorImp { visitor })
1165     }
1166 }
1167 
1168 type FnVisitor<'s> =
1169     Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 's>;
1170 
1171 struct FnVisitorImp<'s> {
1172     visitor: FnVisitor<'s>,
1173 }
1174 
1175 impl<'s> ParallelVisitor for FnVisitorImp<'s> {
visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState1176     fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState {
1177         (self.visitor)(entry)
1178     }
1179 }
1180 
1181 /// WalkParallel is a parallel recursive directory iterator over files paths
1182 /// in one or more directories.
1183 ///
1184 /// Only file and directory paths matching the rules are returned. By default,
1185 /// ignore files like `.gitignore` are respected. The precise matching rules
1186 /// and precedence is explained in the documentation for `WalkBuilder`.
1187 ///
1188 /// Unlike `Walk`, this uses multiple threads for traversing a directory.
1189 pub struct WalkParallel {
1190     paths: vec::IntoIter<PathBuf>,
1191     ig_root: Ignore,
1192     max_filesize: Option<u64>,
1193     max_depth: Option<usize>,
1194     follow_links: bool,
1195     same_file_system: bool,
1196     threads: usize,
1197     skip: Option<Arc<Handle>>,
1198     filter: Option<Filter>,
1199 }
1200 
1201 impl WalkParallel {
1202     /// Execute the parallel recursive directory iterator. `mkf` is called
1203     /// for each thread used for iteration. The function produced by `mkf`
1204     /// is then in turn called for each visited file path.
run<'s, F>(self, mkf: F) where F: FnMut() -> FnVisitor<'s>,1205     pub fn run<'s, F>(self, mkf: F)
1206     where
1207         F: FnMut() -> FnVisitor<'s>,
1208     {
1209         self.visit(&mut FnBuilder { builder: mkf })
1210     }
1211 
1212     /// Execute the parallel recursive directory iterator using a custom
1213     /// visitor.
1214     ///
1215     /// The builder given is used to construct a visitor for every thread
1216     /// used by this traversal. The visitor returned from each builder is then
1217     /// called for every directory entry seen by that thread.
1218     ///
1219     /// Typically, creating a custom visitor is useful if you need to perform
1220     /// some kind of cleanup once traversal is finished. This can be achieved
1221     /// by implementing `Drop` for your builder (or for your visitor, if you
1222     /// want to execute cleanup for every thread that is launched).
1223     ///
1224     /// For example, each visitor might build up a data structure of results
1225     /// corresponding to the directory entries seen for each thread. Since each
1226     /// visitor runs on only one thread, this build-up can be done without
1227     /// synchronization. Then, once traversal is complete, all of the results
1228     /// can be merged together into a single data structure.
visit(mut self, builder: &mut dyn ParallelVisitorBuilder<'_>)1229     pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder<'_>) {
1230         let threads = self.threads();
1231         let stack = Arc::new(Mutex::new(vec![]));
1232         {
1233             let mut stack = stack.lock().unwrap();
1234             let mut visitor = builder.build();
1235             let mut paths = Vec::new().into_iter();
1236             std::mem::swap(&mut paths, &mut self.paths);
1237             // Send the initial set of root paths to the pool of workers. Note
1238             // that we only send directories. For files, we send to them the
1239             // callback directly.
1240             for path in paths {
1241                 let (dent, root_device) = if path == Path::new("-") {
1242                     (DirEntry::new_stdin(), None)
1243                 } else {
1244                     let root_device = if !self.same_file_system {
1245                         None
1246                     } else {
1247                         match device_num(&path) {
1248                             Ok(root_device) => Some(root_device),
1249                             Err(err) => {
1250                                 let err = Error::Io(err).with_path(path);
1251                                 if visitor.visit(Err(err)).is_quit() {
1252                                     return;
1253                                 }
1254                                 continue;
1255                             }
1256                         }
1257                     };
1258                     match DirEntryRaw::from_path(0, path, false) {
1259                         Ok(dent) => {
1260                             (DirEntry::new_raw(dent, None), root_device)
1261                         }
1262                         Err(err) => {
1263                             if visitor.visit(Err(err)).is_quit() {
1264                                 return;
1265                             }
1266                             continue;
1267                         }
1268                     }
1269                 };
1270                 stack.push(Message::Work(Work {
1271                     dent: dent,
1272                     ignore: self.ig_root.clone(),
1273                     root_device: root_device,
1274                 }));
1275             }
1276             // ... but there's no need to start workers if we don't need them.
1277             if stack.is_empty() {
1278                 return;
1279             }
1280         }
1281         // Create the workers and then wait for them to finish.
1282         let quit_now = Arc::new(AtomicBool::new(false));
1283         let num_pending =
1284             Arc::new(AtomicUsize::new(stack.lock().unwrap().len()));
1285         crossbeam_utils::thread::scope(|s| {
1286             let mut handles = vec![];
1287             for _ in 0..threads {
1288                 let worker = Worker {
1289                     visitor: builder.build(),
1290                     stack: stack.clone(),
1291                     quit_now: quit_now.clone(),
1292                     num_pending: num_pending.clone(),
1293                     max_depth: self.max_depth,
1294                     max_filesize: self.max_filesize,
1295                     follow_links: self.follow_links,
1296                     skip: self.skip.clone(),
1297                     filter: self.filter.clone(),
1298                 };
1299                 handles.push(s.spawn(|_| worker.run()));
1300             }
1301             for handle in handles {
1302                 handle.join().unwrap();
1303             }
1304         })
1305         .unwrap(); // Pass along panics from threads
1306     }
1307 
threads(&self) -> usize1308     fn threads(&self) -> usize {
1309         if self.threads == 0 {
1310             2
1311         } else {
1312             self.threads
1313         }
1314     }
1315 }
1316 
1317 /// Message is the set of instructions that a worker knows how to process.
1318 enum Message {
1319     /// A work item corresponds to a directory that should be descended into.
1320     /// Work items for entries that should be skipped or ignored should not
1321     /// be produced.
1322     Work(Work),
1323     /// This instruction indicates that the worker should quit.
1324     Quit,
1325 }
1326 
1327 /// A unit of work for each worker to process.
1328 ///
1329 /// Each unit of work corresponds to a directory that should be descended
1330 /// into.
1331 struct Work {
1332     /// The directory entry.
1333     dent: DirEntry,
1334     /// Any ignore matchers that have been built for this directory's parents.
1335     ignore: Ignore,
1336     /// The root device number. When present, only files with the same device
1337     /// number should be considered.
1338     root_device: Option<u64>,
1339 }
1340 
1341 impl Work {
1342     /// Returns true if and only if this work item is a directory.
is_dir(&self) -> bool1343     fn is_dir(&self) -> bool {
1344         self.dent.is_dir()
1345     }
1346 
1347     /// Returns true if and only if this work item is a symlink.
is_symlink(&self) -> bool1348     fn is_symlink(&self) -> bool {
1349         self.dent.file_type().map_or(false, |ft| ft.is_symlink())
1350     }
1351 
1352     /// Adds ignore rules for parent directories.
1353     ///
1354     /// Note that this only applies to entries at depth 0. On all other
1355     /// entries, this is a no-op.
add_parents(&mut self) -> Option<Error>1356     fn add_parents(&mut self) -> Option<Error> {
1357         if self.dent.depth() > 0 {
1358             return None;
1359         }
1360         // At depth 0, the path of this entry is a root path, so we can
1361         // use it directly to add parent ignore rules.
1362         let (ig, err) = self.ignore.add_parents(self.dent.path());
1363         self.ignore = ig;
1364         err
1365     }
1366 
1367     /// Reads the directory contents of this work item and adds ignore
1368     /// rules for this directory.
1369     ///
1370     /// If there was a problem with reading the directory contents, then
1371     /// an error is returned. If there was a problem reading the ignore
1372     /// rules for this directory, then the error is attached to this
1373     /// work item's directory entry.
read_dir(&mut self) -> Result<fs::ReadDir, Error>1374     fn read_dir(&mut self) -> Result<fs::ReadDir, Error> {
1375         let readdir = match fs::read_dir(self.dent.path()) {
1376             Ok(readdir) => readdir,
1377             Err(err) => {
1378                 let err = Error::from(err)
1379                     .with_path(self.dent.path())
1380                     .with_depth(self.dent.depth());
1381                 return Err(err);
1382             }
1383         };
1384         let (ig, err) = self.ignore.add_child(self.dent.path());
1385         self.ignore = ig;
1386         self.dent.err = err;
1387         Ok(readdir)
1388     }
1389 }
1390 
1391 /// A worker is responsible for descending into directories, updating the
1392 /// ignore matchers, producing new work and invoking the caller's callback.
1393 ///
1394 /// Note that a worker is *both* a producer and a consumer.
1395 struct Worker<'s> {
1396     /// The caller's callback.
1397     visitor: Box<dyn ParallelVisitor + 's>,
1398     /// A stack of work to do.
1399     ///
1400     /// We use a stack instead of a channel because a stack lets us visit
1401     /// directories in depth first order. This can substantially reduce peak
1402     /// memory usage by keeping both the number of files path and gitignore
1403     /// matchers in memory lower.
1404     stack: Arc<Mutex<Vec<Message>>>,
1405     /// Whether all workers should terminate at the next opportunity. Note
1406     /// that we need this because we don't want other `Work` to be done after
1407     /// we quit. We wouldn't need this if have a priority channel.
1408     quit_now: Arc<AtomicBool>,
1409     /// The number of outstanding work items.
1410     num_pending: Arc<AtomicUsize>,
1411     /// The maximum depth of directories to descend. A value of `0` means no
1412     /// descension at all.
1413     max_depth: Option<usize>,
1414     /// The maximum size a searched file can be (in bytes). If a file exceeds
1415     /// this size it will be skipped.
1416     max_filesize: Option<u64>,
1417     /// Whether to follow symbolic links or not. When this is enabled, loop
1418     /// detection is performed.
1419     follow_links: bool,
1420     /// A file handle to skip, currently is either `None` or stdout, if it's
1421     /// a file and it has been requested to skip files identical to stdout.
1422     skip: Option<Arc<Handle>>,
1423     /// A predicate applied to dir entries. If true, the entry and all
1424     /// children will be skipped.
1425     filter: Option<Filter>,
1426 }
1427 
1428 impl<'s> Worker<'s> {
1429     /// Runs this worker until there is no more work left to do.
1430     ///
1431     /// The worker will call the caller's callback for all entries that aren't
1432     /// skipped by the ignore matcher.
run(mut self)1433     fn run(mut self) {
1434         while let Some(work) = self.get_work() {
1435             if let WalkState::Quit = self.run_one(work) {
1436                 self.quit_now();
1437             }
1438             self.work_done();
1439         }
1440     }
1441 
run_one(&mut self, mut work: Work) -> WalkState1442     fn run_one(&mut self, mut work: Work) -> WalkState {
1443         // If the work is not a directory, then we can just execute the
1444         // caller's callback immediately and move on.
1445         if work.is_symlink() || !work.is_dir() {
1446             return self.visitor.visit(Ok(work.dent));
1447         }
1448         if let Some(err) = work.add_parents() {
1449             let state = self.visitor.visit(Err(err));
1450             if state.is_quit() {
1451                 return state;
1452             }
1453         }
1454 
1455         let descend = if let Some(root_device) = work.root_device {
1456             match is_same_file_system(root_device, work.dent.path()) {
1457                 Ok(true) => true,
1458                 Ok(false) => false,
1459                 Err(err) => {
1460                     let state = self.visitor.visit(Err(err));
1461                     if state.is_quit() {
1462                         return state;
1463                     }
1464                     false
1465                 }
1466             }
1467         } else {
1468             true
1469         };
1470 
1471         // Try to read the directory first before we transfer ownership
1472         // to the provided closure. Do not unwrap it immediately, though,
1473         // as we may receive an `Err` value e.g. in the case when we do not
1474         // have sufficient read permissions to list the directory.
1475         // In that case we still want to provide the closure with a valid
1476         // entry before passing the error value.
1477         let readdir = work.read_dir();
1478         let depth = work.dent.depth();
1479         let state = self.visitor.visit(Ok(work.dent));
1480         if !state.is_continue() {
1481             return state;
1482         }
1483         if !descend {
1484             return WalkState::Skip;
1485         }
1486 
1487         let readdir = match readdir {
1488             Ok(readdir) => readdir,
1489             Err(err) => {
1490                 return self.visitor.visit(Err(err));
1491             }
1492         };
1493 
1494         if self.max_depth.map_or(false, |max| depth >= max) {
1495             return WalkState::Skip;
1496         }
1497         for result in readdir {
1498             let state = self.generate_work(
1499                 &work.ignore,
1500                 depth + 1,
1501                 work.root_device,
1502                 result,
1503             );
1504             if state.is_quit() {
1505                 return state;
1506             }
1507         }
1508         WalkState::Continue
1509     }
1510 
1511     /// Decides whether to submit the given directory entry as a file to
1512     /// search.
1513     ///
1514     /// If the entry is a path that should be ignored, then this is a no-op.
1515     /// Otherwise, the entry is pushed on to the queue. (The actual execution
1516     /// of the callback happens in `run_one`.)
1517     ///
1518     /// If an error occurs while reading the entry, then it is sent to the
1519     /// caller's callback.
1520     ///
1521     /// `ig` is the `Ignore` matcher for the parent directory. `depth` should
1522     /// be the depth of this entry. `result` should be the item yielded by
1523     /// a directory iterator.
generate_work( &mut self, ig: &Ignore, depth: usize, root_device: Option<u64>, result: Result<fs::DirEntry, io::Error>, ) -> WalkState1524     fn generate_work(
1525         &mut self,
1526         ig: &Ignore,
1527         depth: usize,
1528         root_device: Option<u64>,
1529         result: Result<fs::DirEntry, io::Error>,
1530     ) -> WalkState {
1531         let fs_dent = match result {
1532             Ok(fs_dent) => fs_dent,
1533             Err(err) => {
1534                 return self
1535                     .visitor
1536                     .visit(Err(Error::from(err).with_depth(depth)));
1537             }
1538         };
1539         let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) {
1540             Ok(dent) => DirEntry::new_raw(dent, None),
1541             Err(err) => {
1542                 return self.visitor.visit(Err(err));
1543             }
1544         };
1545         let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink());
1546         if self.follow_links && is_symlink {
1547             let path = dent.path().to_path_buf();
1548             dent = match DirEntryRaw::from_path(depth, path, true) {
1549                 Ok(dent) => DirEntry::new_raw(dent, None),
1550                 Err(err) => {
1551                     return self.visitor.visit(Err(err));
1552                 }
1553             };
1554             if dent.is_dir() {
1555                 if let Err(err) = check_symlink_loop(ig, dent.path(), depth) {
1556                     return self.visitor.visit(Err(err));
1557                 }
1558             }
1559         }
1560         // N.B. See analogous call in the single-threaded implementation about
1561         // why it's important for this to come before the checks below.
1562         if should_skip_entry(ig, &dent) {
1563             return WalkState::Continue;
1564         }
1565         if let Some(ref stdout) = self.skip {
1566             let is_stdout = match path_equals(&dent, stdout) {
1567                 Ok(is_stdout) => is_stdout,
1568                 Err(err) => return self.visitor.visit(Err(err)),
1569             };
1570             if is_stdout {
1571                 return WalkState::Continue;
1572             }
1573         }
1574         let should_skip_filesize =
1575             if self.max_filesize.is_some() && !dent.is_dir() {
1576                 skip_filesize(
1577                     self.max_filesize.unwrap(),
1578                     dent.path(),
1579                     &dent.metadata().ok(),
1580                 )
1581             } else {
1582                 false
1583             };
1584         let should_skip_filtered =
1585             if let Some(Filter(predicate)) = &self.filter {
1586                 !predicate(&dent)
1587             } else {
1588                 false
1589             };
1590         if !should_skip_filesize && !should_skip_filtered {
1591             self.send(Work { dent, ignore: ig.clone(), root_device });
1592         }
1593         WalkState::Continue
1594     }
1595 
1596     /// Returns the next directory to descend into.
1597     ///
1598     /// If all work has been exhausted, then this returns None. The worker
1599     /// should then subsequently quit.
get_work(&mut self) -> Option<Work>1600     fn get_work(&mut self) -> Option<Work> {
1601         let mut value = self.recv();
1602         loop {
1603             // Simulate a priority channel: If quit_now flag is set, we can
1604             // receive only quit messages.
1605             if self.is_quit_now() {
1606                 value = Some(Message::Quit)
1607             }
1608             match value {
1609                 Some(Message::Work(work)) => {
1610                     return Some(work);
1611                 }
1612                 Some(Message::Quit) => {
1613                     // Repeat quit message to wake up sleeping threads, if
1614                     // any. The domino effect will ensure that every thread
1615                     // will quit.
1616                     self.send_quit();
1617                     return None;
1618                 }
1619                 None => {
1620                     // Once num_pending reaches 0, it is impossible for it to
1621                     // ever increase again. Namely, it only reaches 0 once
1622                     // all jobs have run such that no jobs have produced more
1623                     // work. We have this guarantee because num_pending is
1624                     // always incremented before each job is submitted and only
1625                     // decremented once each job is completely finished.
1626                     // Therefore, if this reaches zero, then there can be no
1627                     // other job running.
1628                     if self.num_pending() == 0 {
1629                         // Every other thread is blocked at the next recv().
1630                         // Send the initial quit message and quit.
1631                         self.send_quit();
1632                         return None;
1633                     }
1634                     // Wait for next `Work` or `Quit` message.
1635                     loop {
1636                         if let Some(v) = self.recv() {
1637                             value = Some(v);
1638                             break;
1639                         }
1640                         // Our stack isn't blocking. Instead of burning the
1641                         // CPU waiting, we let the thread sleep for a bit. In
1642                         // general, this tends to only occur once the search is
1643                         // approaching termination.
1644                         thread::sleep(Duration::from_millis(1));
1645                     }
1646                 }
1647             }
1648         }
1649     }
1650 
1651     /// Indicates that all workers should quit immediately.
quit_now(&self)1652     fn quit_now(&self) {
1653         self.quit_now.store(true, Ordering::SeqCst);
1654     }
1655 
1656     /// Returns true if this worker should quit immediately.
is_quit_now(&self) -> bool1657     fn is_quit_now(&self) -> bool {
1658         self.quit_now.load(Ordering::SeqCst)
1659     }
1660 
1661     /// Returns the number of pending jobs.
num_pending(&self) -> usize1662     fn num_pending(&self) -> usize {
1663         self.num_pending.load(Ordering::SeqCst)
1664     }
1665 
1666     /// Send work.
send(&self, work: Work)1667     fn send(&self, work: Work) {
1668         self.num_pending.fetch_add(1, Ordering::SeqCst);
1669         let mut stack = self.stack.lock().unwrap();
1670         stack.push(Message::Work(work));
1671     }
1672 
1673     /// Send a quit message.
send_quit(&self)1674     fn send_quit(&self) {
1675         let mut stack = self.stack.lock().unwrap();
1676         stack.push(Message::Quit);
1677     }
1678 
1679     /// Receive work.
recv(&self) -> Option<Message>1680     fn recv(&self) -> Option<Message> {
1681         let mut stack = self.stack.lock().unwrap();
1682         stack.pop()
1683     }
1684 
1685     /// Signal that work has been received.
work_done(&self)1686     fn work_done(&self) {
1687         self.num_pending.fetch_sub(1, Ordering::SeqCst);
1688     }
1689 }
1690 
check_symlink_loop( ig_parent: &Ignore, child_path: &Path, child_depth: usize, ) -> Result<(), Error>1691 fn check_symlink_loop(
1692     ig_parent: &Ignore,
1693     child_path: &Path,
1694     child_depth: usize,
1695 ) -> Result<(), Error> {
1696     let hchild = Handle::from_path(child_path).map_err(|err| {
1697         Error::from(err).with_path(child_path).with_depth(child_depth)
1698     })?;
1699     for ig in ig_parent.parents().take_while(|ig| !ig.is_absolute_parent()) {
1700         let h = Handle::from_path(ig.path()).map_err(|err| {
1701             Error::from(err).with_path(child_path).with_depth(child_depth)
1702         })?;
1703         if hchild == h {
1704             return Err(Error::Loop {
1705                 ancestor: ig.path().to_path_buf(),
1706                 child: child_path.to_path_buf(),
1707             }
1708             .with_depth(child_depth));
1709         }
1710     }
1711     Ok(())
1712 }
1713 
1714 // Before calling this function, make sure that you ensure that is really
1715 // necessary as the arguments imply a file stat.
skip_filesize( max_filesize: u64, path: &Path, ent: &Option<Metadata>, ) -> bool1716 fn skip_filesize(
1717     max_filesize: u64,
1718     path: &Path,
1719     ent: &Option<Metadata>,
1720 ) -> bool {
1721     let filesize = match *ent {
1722         Some(ref md) => Some(md.len()),
1723         None => None,
1724     };
1725 
1726     if let Some(fs) = filesize {
1727         if fs > max_filesize {
1728             log::debug!("ignoring {}: {} bytes", path.display(), fs);
1729             true
1730         } else {
1731             false
1732         }
1733     } else {
1734         false
1735     }
1736 }
1737 
should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool1738 fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool {
1739     let m = ig.matched_dir_entry(dent);
1740     if m.is_ignore() {
1741         log::debug!("ignoring {}: {:?}", dent.path().display(), m);
1742         true
1743     } else if m.is_whitelist() {
1744         log::debug!("whitelisting {}: {:?}", dent.path().display(), m);
1745         false
1746     } else {
1747         false
1748     }
1749 }
1750 
1751 /// Returns a handle to stdout for filtering search.
1752 ///
1753 /// A handle is returned if and only if stdout is being redirected to a file.
1754 /// The handle returned corresponds to that file.
1755 ///
1756 /// This can be used to ensure that we do not attempt to search a file that we
1757 /// may also be writing to.
stdout_handle() -> Option<Handle>1758 fn stdout_handle() -> Option<Handle> {
1759     let h = match Handle::stdout() {
1760         Err(_) => return None,
1761         Ok(h) => h,
1762     };
1763     let md = match h.as_file().metadata() {
1764         Err(_) => return None,
1765         Ok(md) => md,
1766     };
1767     if !md.is_file() {
1768         return None;
1769     }
1770     Some(h)
1771 }
1772 
1773 /// Returns true if and only if the given directory entry is believed to be
1774 /// equivalent to the given handle. If there was a problem querying the path
1775 /// for information to determine equality, then that error is returned.
path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error>1776 fn path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error> {
1777     #[cfg(unix)]
1778     fn never_equal(dent: &DirEntry, handle: &Handle) -> bool {
1779         dent.ino() != Some(handle.ino())
1780     }
1781 
1782     #[cfg(not(unix))]
1783     fn never_equal(_: &DirEntry, _: &Handle) -> bool {
1784         false
1785     }
1786 
1787     // If we know for sure that these two things aren't equal, then avoid
1788     // the costly extra stat call to determine equality.
1789     if dent.is_stdin() || never_equal(dent, handle) {
1790         return Ok(false);
1791     }
1792     Handle::from_path(dent.path())
1793         .map(|h| &h == handle)
1794         .map_err(|err| Error::Io(err).with_path(dent.path()))
1795 }
1796 
1797 /// Returns true if the given walkdir entry corresponds to a directory.
1798 ///
1799 /// This is normally just `dent.file_type().is_dir()`, but when we aren't
1800 /// following symlinks, the root directory entry may be a symlink to a
1801 /// directory that we *do* follow---by virtue of it being specified by the user
1802 /// explicitly. In that case, we need to follow the symlink and query whether
1803 /// it's a directory or not. But we only do this for root entries to avoid an
1804 /// additional stat check in most cases.
walkdir_is_dir(dent: &walkdir::DirEntry) -> bool1805 fn walkdir_is_dir(dent: &walkdir::DirEntry) -> bool {
1806     if dent.file_type().is_dir() {
1807         return true;
1808     }
1809     if !dent.file_type().is_symlink() || dent.depth() > 0 {
1810         return false;
1811     }
1812     dent.path().metadata().ok().map_or(false, |md| md.file_type().is_dir())
1813 }
1814 
1815 /// Returns true if and only if the given path is on the same device as the
1816 /// given root device.
is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error>1817 fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
1818     let dent_device =
1819         device_num(path).map_err(|err| Error::Io(err).with_path(path))?;
1820     Ok(root_device == dent_device)
1821 }
1822 
1823 #[cfg(unix)]
device_num<P: AsRef<Path>>(path: P) -> io::Result<u64>1824 fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1825     use std::os::unix::fs::MetadataExt;
1826 
1827     path.as_ref().metadata().map(|md| md.dev())
1828 }
1829 
1830 #[cfg(windows)]
device_num<P: AsRef<Path>>(path: P) -> io::Result<u64>1831 fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1832     use winapi_util::{file, Handle};
1833 
1834     let h = Handle::from_path_any(path)?;
1835     file::information(h).map(|info| info.volume_serial_number())
1836 }
1837 
1838 #[cfg(not(any(unix, windows)))]
device_num<P: AsRef<Path>>(_: P) -> io::Result<u64>1839 fn device_num<P: AsRef<Path>>(_: P) -> io::Result<u64> {
1840     Err(io::Error::new(
1841         io::ErrorKind::Other,
1842         "walkdir: same_file_system option not supported on this platform",
1843     ))
1844 }
1845 
1846 #[cfg(test)]
1847 mod tests {
1848     use std::ffi::OsStr;
1849     use std::fs::{self, File};
1850     use std::io::Write;
1851     use std::path::Path;
1852     use std::sync::{Arc, Mutex};
1853 
1854     use super::{DirEntry, WalkBuilder, WalkState};
1855     use crate::tests::TempDir;
1856 
wfile<P: AsRef<Path>>(path: P, contents: &str)1857     fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
1858         let mut file = File::create(path).unwrap();
1859         file.write_all(contents.as_bytes()).unwrap();
1860     }
1861 
wfile_size<P: AsRef<Path>>(path: P, size: u64)1862     fn wfile_size<P: AsRef<Path>>(path: P, size: u64) {
1863         let file = File::create(path).unwrap();
1864         file.set_len(size).unwrap();
1865     }
1866 
1867     #[cfg(unix)]
symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q)1868     fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) {
1869         use std::os::unix::fs::symlink;
1870         symlink(src, dst).unwrap();
1871     }
1872 
mkdirp<P: AsRef<Path>>(path: P)1873     fn mkdirp<P: AsRef<Path>>(path: P) {
1874         fs::create_dir_all(path).unwrap();
1875     }
1876 
normal_path(unix: &str) -> String1877     fn normal_path(unix: &str) -> String {
1878         if cfg!(windows) {
1879             unix.replace("\\", "/")
1880         } else {
1881             unix.to_string()
1882         }
1883     }
1884 
1885     fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
1886         let mut paths = vec![];
1887         for result in builder.build() {
1888             let dent = match result {
1889                 Err(_) => continue,
1890                 Ok(dent) => dent,
1891             };
1892             let path = dent.path().strip_prefix(prefix).unwrap();
1893             if path.as_os_str().is_empty() {
1894                 continue;
1895             }
1896             paths.push(normal_path(path.to_str().unwrap()));
1897         }
1898         paths.sort();
1899         paths
1900     }
1901 
1902     fn walk_collect_parallel(
1903         prefix: &Path,
1904         builder: &WalkBuilder,
1905     ) -> Vec<String> {
1906         let mut paths = vec![];
1907         for dent in walk_collect_entries_parallel(builder) {
1908             let path = dent.path().strip_prefix(prefix).unwrap();
1909             if path.as_os_str().is_empty() {
1910                 continue;
1911             }
1912             paths.push(normal_path(path.to_str().unwrap()));
1913         }
1914         paths.sort();
1915         paths
1916     }
1917 
1918     fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec<DirEntry> {
1919         let dents = Arc::new(Mutex::new(vec![]));
1920         builder.build_parallel().run(|| {
1921             let dents = dents.clone();
1922             Box::new(move |result| {
1923                 if let Ok(dent) = result {
1924                     dents.lock().unwrap().push(dent);
1925                 }
1926                 WalkState::Continue
1927             })
1928         });
1929 
1930         let dents = dents.lock().unwrap();
1931         dents.to_vec()
1932     }
1933 
1934     fn mkpaths(paths: &[&str]) -> Vec<String> {
1935         let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect();
1936         paths.sort();
1937         paths
1938     }
1939 
1940     fn tmpdir() -> TempDir {
1941         TempDir::new().unwrap()
1942     }
1943 
1944     fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) {
1945         let got = walk_collect(prefix, builder);
1946         assert_eq!(got, mkpaths(expected), "single threaded");
1947         let got = walk_collect_parallel(prefix, builder);
1948         assert_eq!(got, mkpaths(expected), "parallel");
1949     }
1950 
1951     #[test]
1952     fn no_ignores() {
1953         let td = tmpdir();
1954         mkdirp(td.path().join("a/b/c"));
1955         mkdirp(td.path().join("x/y"));
1956         wfile(td.path().join("a/b/foo"), "");
1957         wfile(td.path().join("x/y/foo"), "");
1958 
1959         assert_paths(
1960             td.path(),
1961             &WalkBuilder::new(td.path()),
1962             &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
1963         );
1964     }
1965 
1966     #[test]
1967     fn custom_ignore() {
1968         let td = tmpdir();
1969         let custom_ignore = ".customignore";
1970         mkdirp(td.path().join("a"));
1971         wfile(td.path().join(custom_ignore), "foo");
1972         wfile(td.path().join("foo"), "");
1973         wfile(td.path().join("a/foo"), "");
1974         wfile(td.path().join("bar"), "");
1975         wfile(td.path().join("a/bar"), "");
1976 
1977         let mut builder = WalkBuilder::new(td.path());
1978         builder.add_custom_ignore_filename(&custom_ignore);
1979         assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
1980     }
1981 
1982     #[test]
1983     fn custom_ignore_exclusive_use() {
1984         let td = tmpdir();
1985         let custom_ignore = ".customignore";
1986         mkdirp(td.path().join("a"));
1987         wfile(td.path().join(custom_ignore), "foo");
1988         wfile(td.path().join("foo"), "");
1989         wfile(td.path().join("a/foo"), "");
1990         wfile(td.path().join("bar"), "");
1991         wfile(td.path().join("a/bar"), "");
1992 
1993         let mut builder = WalkBuilder::new(td.path());
1994         builder.ignore(false);
1995         builder.git_ignore(false);
1996         builder.git_global(false);
1997         builder.git_exclude(false);
1998         builder.add_custom_ignore_filename(&custom_ignore);
1999         assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2000     }
2001 
2002     #[test]
2003     fn gitignore() {
2004         let td = tmpdir();
2005         mkdirp(td.path().join(".git"));
2006         mkdirp(td.path().join("a"));
2007         wfile(td.path().join(".gitignore"), "foo");
2008         wfile(td.path().join("foo"), "");
2009         wfile(td.path().join("a/foo"), "");
2010         wfile(td.path().join("bar"), "");
2011         wfile(td.path().join("a/bar"), "");
2012 
2013         assert_paths(
2014             td.path(),
2015             &WalkBuilder::new(td.path()),
2016             &["bar", "a", "a/bar"],
2017         );
2018     }
2019 
2020     #[test]
2021     fn explicit_ignore() {
2022         let td = tmpdir();
2023         let igpath = td.path().join(".not-an-ignore");
2024         mkdirp(td.path().join("a"));
2025         wfile(&igpath, "foo");
2026         wfile(td.path().join("foo"), "");
2027         wfile(td.path().join("a/foo"), "");
2028         wfile(td.path().join("bar"), "");
2029         wfile(td.path().join("a/bar"), "");
2030 
2031         let mut builder = WalkBuilder::new(td.path());
2032         assert!(builder.add_ignore(&igpath).is_none());
2033         assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2034     }
2035 
2036     #[test]
2037     fn explicit_ignore_exclusive_use() {
2038         let td = tmpdir();
2039         let igpath = td.path().join(".not-an-ignore");
2040         mkdirp(td.path().join("a"));
2041         wfile(&igpath, "foo");
2042         wfile(td.path().join("foo"), "");
2043         wfile(td.path().join("a/foo"), "");
2044         wfile(td.path().join("bar"), "");
2045         wfile(td.path().join("a/bar"), "");
2046 
2047         let mut builder = WalkBuilder::new(td.path());
2048         builder.standard_filters(false);
2049         assert!(builder.add_ignore(&igpath).is_none());
2050         assert_paths(
2051             td.path(),
2052             &builder,
2053             &[".not-an-ignore", "bar", "a", "a/bar"],
2054         );
2055     }
2056 
2057     #[test]
2058     fn gitignore_parent() {
2059         let td = tmpdir();
2060         mkdirp(td.path().join(".git"));
2061         mkdirp(td.path().join("a"));
2062         wfile(td.path().join(".gitignore"), "foo");
2063         wfile(td.path().join("a/foo"), "");
2064         wfile(td.path().join("a/bar"), "");
2065 
2066         let root = td.path().join("a");
2067         assert_paths(&root, &WalkBuilder::new(&root), &["bar"]);
2068     }
2069 
2070     #[test]
2071     fn max_depth() {
2072         let td = tmpdir();
2073         mkdirp(td.path().join("a/b/c"));
2074         wfile(td.path().join("foo"), "");
2075         wfile(td.path().join("a/foo"), "");
2076         wfile(td.path().join("a/b/foo"), "");
2077         wfile(td.path().join("a/b/c/foo"), "");
2078 
2079         let mut builder = WalkBuilder::new(td.path());
2080         assert_paths(
2081             td.path(),
2082             &builder,
2083             &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
2084         );
2085         assert_paths(td.path(), builder.max_depth(Some(0)), &[]);
2086         assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]);
2087         assert_paths(
2088             td.path(),
2089             builder.max_depth(Some(2)),
2090             &["a", "a/b", "foo", "a/foo"],
2091         );
2092     }
2093 
2094     #[test]
2095     fn max_filesize() {
2096         let td = tmpdir();
2097         mkdirp(td.path().join("a/b"));
2098         wfile_size(td.path().join("foo"), 0);
2099         wfile_size(td.path().join("bar"), 400);
2100         wfile_size(td.path().join("baz"), 600);
2101         wfile_size(td.path().join("a/foo"), 600);
2102         wfile_size(td.path().join("a/bar"), 500);
2103         wfile_size(td.path().join("a/baz"), 200);
2104 
2105         let mut builder = WalkBuilder::new(td.path());
2106         assert_paths(
2107             td.path(),
2108             &builder,
2109             &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2110         );
2111         assert_paths(
2112             td.path(),
2113             builder.max_filesize(Some(0)),
2114             &["a", "a/b", "foo"],
2115         );
2116         assert_paths(
2117             td.path(),
2118             builder.max_filesize(Some(500)),
2119             &["a", "a/b", "foo", "bar", "a/bar", "a/baz"],
2120         );
2121         assert_paths(
2122             td.path(),
2123             builder.max_filesize(Some(50000)),
2124             &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2125         );
2126     }
2127 
2128     #[cfg(unix)] // because symlinks on windows are weird
2129     #[test]
2130     fn symlinks() {
2131         let td = tmpdir();
2132         mkdirp(td.path().join("a/b"));
2133         symlink(td.path().join("a/b"), td.path().join("z"));
2134         wfile(td.path().join("a/b/foo"), "");
2135 
2136         let mut builder = WalkBuilder::new(td.path());
2137         assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]);
2138         assert_paths(
2139             td.path(),
2140             &builder.follow_links(true),
2141             &["a", "a/b", "a/b/foo", "z", "z/foo"],
2142         );
2143     }
2144 
2145     #[cfg(unix)] // because symlinks on windows are weird
2146     #[test]
2147     fn first_path_not_symlink() {
2148         let td = tmpdir();
2149         mkdirp(td.path().join("foo"));
2150 
2151         let dents = WalkBuilder::new(td.path().join("foo"))
2152             .build()
2153             .into_iter()
2154             .collect::<Result<Vec<_>, _>>()
2155             .unwrap();
2156         assert_eq!(1, dents.len());
2157         assert!(!dents[0].path_is_symlink());
2158 
2159         let dents = walk_collect_entries_parallel(&WalkBuilder::new(
2160             td.path().join("foo"),
2161         ));
2162         assert_eq!(1, dents.len());
2163         assert!(!dents[0].path_is_symlink());
2164     }
2165 
2166     #[cfg(unix)] // because symlinks on windows are weird
2167     #[test]
2168     fn symlink_loop() {
2169         let td = tmpdir();
2170         mkdirp(td.path().join("a/b"));
2171         symlink(td.path().join("a"), td.path().join("a/b/c"));
2172 
2173         let mut builder = WalkBuilder::new(td.path());
2174         assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]);
2175         assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]);
2176     }
2177 
2178     // It's a little tricky to test the 'same_file_system' option since
2179     // we need an environment with more than one file system. We adopt a
2180     // heuristic where /sys is typically a distinct volume on Linux and roll
2181     // with that.
2182     #[test]
2183     #[cfg(target_os = "linux")]
2184     fn same_file_system() {
2185         use super::device_num;
2186 
2187         // If for some reason /sys doesn't exist or isn't a directory, just
2188         // skip this test.
2189         if !Path::new("/sys").is_dir() {
2190             return;
2191         }
2192 
2193         // If our test directory actually isn't a different volume from /sys,
2194         // then this test is meaningless and we shouldn't run it.
2195         let td = tmpdir();
2196         if device_num(td.path()).unwrap() == device_num("/sys").unwrap() {
2197             return;
2198         }
2199 
2200         mkdirp(td.path().join("same_file"));
2201         symlink("/sys", td.path().join("same_file").join("alink"));
2202 
2203         // Create a symlink to sys and enable following symlinks. If the
2204         // same_file_system option doesn't work, then this probably will hit a
2205         // permission error. Otherwise, it should just skip over the symlink
2206         // completely.
2207         let mut builder = WalkBuilder::new(td.path());
2208         builder.follow_links(true).same_file_system(true);
2209         assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]);
2210     }
2211 
2212     #[cfg(target_os = "linux")]
2213     #[test]
2214     fn no_read_permissions() {
2215         let dir_path = Path::new("/root");
2216 
2217         // There's no /etc/sudoers.d, skip the test.
2218         if !dir_path.is_dir() {
2219             return;
2220         }
2221         // We're the root, so the test won't check what we want it to.
2222         if fs::read_dir(&dir_path).is_ok() {
2223             return;
2224         }
2225 
2226         // Check that we can't descend but get an entry for the parent dir.
2227         let builder = WalkBuilder::new(&dir_path);
2228         assert_paths(dir_path.parent().unwrap(), &builder, &["root"]);
2229     }
2230 
2231     #[test]
2232     fn filter() {
2233         let td = tmpdir();
2234         mkdirp(td.path().join("a/b/c"));
2235         mkdirp(td.path().join("x/y"));
2236         wfile(td.path().join("a/b/foo"), "");
2237         wfile(td.path().join("x/y/foo"), "");
2238 
2239         assert_paths(
2240             td.path(),
2241             &WalkBuilder::new(td.path()),
2242             &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
2243         );
2244 
2245         assert_paths(
2246             td.path(),
2247             &WalkBuilder::new(td.path())
2248                 .filter_entry(|entry| entry.file_name() != OsStr::new("a")),
2249             &["x", "x/y", "x/y/foo"],
2250         );
2251     }
2252 }
2253