1 /*!
2 Crate `walkdir` provides an efficient and cross platform implementation
3 of recursive directory traversal. Several options are exposed to control
4 iteration, such as whether to follow symbolic links (default off), limit the
5 maximum number of simultaneous open file descriptors and the ability to
6 efficiently skip descending into directories.
7 
8 To use this crate, add `walkdir` as a dependency to your project's
9 `Cargo.toml`:
10 
11 ```toml
12 [dependencies]
13 walkdir = "2"
14 ```
15 
16 # From the top
17 
18 The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values
19 yielded by the iterator. Finally, the [`Error`] type is a small wrapper around
20 [`std::io::Error`] with additional information, such as if a loop was detected
21 while following symbolic links (not enabled by default).
22 
23 [`WalkDir`]: struct.WalkDir.html
24 [`DirEntry`]: struct.DirEntry.html
25 [`Error`]: struct.Error.html
26 [`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
27 
28 # Example
29 
30 The following code recursively iterates over the directory given and prints
31 the path for each entry:
32 
33 ```no_run
34 use walkdir::WalkDir;
35 # use walkdir::Error;
36 
37 # fn try_main() -> Result<(), Error> {
38 for entry in WalkDir::new("foo") {
39     println!("{}", entry?.path().display());
40 }
41 # Ok(())
42 # }
43 ```
44 
45 Or, if you'd like to iterate over all entries and ignore any errors that
46 may arise, use [`filter_map`]. (e.g., This code below will silently skip
47 directories that the owner of the running process does not have permission to
48 access.)
49 
50 ```no_run
51 use walkdir::WalkDir;
52 
53 for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) {
54     println!("{}", entry.path().display());
55 }
56 ```
57 
58 [`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map
59 
60 # Example: follow symbolic links
61 
62 The same code as above, except [`follow_links`] is enabled:
63 
64 ```no_run
65 use walkdir::WalkDir;
66 # use walkdir::Error;
67 
68 # fn try_main() -> Result<(), Error> {
69 for entry in WalkDir::new("foo").follow_links(true) {
70     println!("{}", entry?.path().display());
71 }
72 # Ok(())
73 # }
74 ```
75 
76 [`follow_links`]: struct.WalkDir.html#method.follow_links
77 
78 # Example: skip hidden files and directories on unix
79 
80 This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files
81 and directories efficiently (i.e. without recursing into hidden directories):
82 
83 ```no_run
84 use walkdir::{DirEntry, WalkDir};
85 # use walkdir::Error;
86 
87 fn is_hidden(entry: &DirEntry) -> bool {
88     entry.file_name()
89          .to_str()
90          .map(|s| s.starts_with("."))
91          .unwrap_or(false)
92 }
93 
94 # fn try_main() -> Result<(), Error> {
95 let walker = WalkDir::new("foo").into_iter();
96 for entry in walker.filter_entry(|e| !is_hidden(e)) {
97     println!("{}", entry?.path().display());
98 }
99 # Ok(())
100 # }
101 ```
102 
103 [`filter_entry`]: struct.IntoIter.html#method.filter_entry
104 */
105 
106 #![deny(missing_docs)]
107 #![allow(unknown_lints)]
108 
109 #[cfg(doctest)]
110 doc_comment::doctest!("../README.md");
111 
112 use std::cmp::{min, Ordering};
113 use std::fmt;
114 use std::fs::{self, ReadDir};
115 use std::io;
116 use std::path::{Path, PathBuf};
117 use std::result;
118 use std::vec;
119 
120 use same_file::Handle;
121 
122 pub use crate::dent::DirEntry;
123 #[cfg(unix)]
124 pub use crate::dent::DirEntryExt;
125 pub use crate::error::Error;
126 
127 mod dent;
128 mod error;
129 #[cfg(test)]
130 mod tests;
131 mod util;
132 
133 /// Like try, but for iterators that return [`Option<Result<_, _>>`].
134 ///
135 /// [`Option<Result<_, _>>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
136 macro_rules! itry {
137     ($e:expr) => {
138         match $e {
139             Ok(v) => v,
140             Err(err) => return Some(Err(From::from(err))),
141         }
142     };
143 }
144 
145 /// A result type for walkdir operations.
146 ///
147 /// Note that this result type embeds the error type in this crate. This
148 /// is only useful if you care about the additional information provided by
149 /// the error (such as the path associated with the error or whether a loop
150 /// was dectected). If you want things to Just Work, then you can use
151 /// [`io::Result`] instead since the error type in this package will
152 /// automatically convert to an [`io::Result`] when using the [`try!`] macro.
153 ///
154 /// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html
155 /// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html
156 pub type Result<T> = ::std::result::Result<T, Error>;
157 
158 /// A builder to create an iterator for recursively walking a directory.
159 ///
160 /// Results are returned in depth first fashion, with directories yielded
161 /// before their contents. If [`contents_first`] is true, contents are yielded
162 /// before their directories. The order is unspecified but if [`sort_by`] is
163 /// given, directory entries are sorted according to this function. Directory
164 /// entries `.` and `..` are always omitted.
165 ///
166 /// If an error occurs at any point during iteration, then it is returned in
167 /// place of its corresponding directory entry and iteration continues as
168 /// normal. If an error occurs while opening a directory for reading, then it
169 /// is not descended into (but the error is still yielded by the iterator).
170 /// Iteration may be stopped at any time. When the iterator is destroyed, all
171 /// resources associated with it are freed.
172 ///
173 /// [`contents_first`]: struct.WalkDir.html#method.contents_first
174 /// [`sort_by`]: struct.WalkDir.html#method.sort_by
175 ///
176 /// # Usage
177 ///
178 /// This type implements [`IntoIterator`] so that it may be used as the subject
179 /// of a `for` loop. You may need to call [`into_iter`] explicitly if you want
180 /// to use iterator adapters such as [`filter_entry`].
181 ///
182 /// Idiomatic use of this type should use method chaining to set desired
183 /// options. For example, this only shows entries with a depth of `1`, `2` or
184 /// `3` (relative to `foo`):
185 ///
186 /// ```no_run
187 /// use walkdir::WalkDir;
188 /// # use walkdir::Error;
189 ///
190 /// # fn try_main() -> Result<(), Error> {
191 /// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) {
192 ///     println!("{}", entry?.path().display());
193 /// }
194 /// # Ok(())
195 /// # }
196 /// ```
197 ///
198 /// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html
199 /// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter
200 /// [`filter_entry`]: struct.IntoIter.html#method.filter_entry
201 ///
202 /// Note that the iterator by default includes the top-most directory. Since
203 /// this is the only directory yielded with depth `0`, it is easy to ignore it
204 /// with the [`min_depth`] setting:
205 ///
206 /// ```no_run
207 /// use walkdir::WalkDir;
208 /// # use walkdir::Error;
209 ///
210 /// # fn try_main() -> Result<(), Error> {
211 /// for entry in WalkDir::new("foo").min_depth(1) {
212 ///     println!("{}", entry?.path().display());
213 /// }
214 /// # Ok(())
215 /// # }
216 /// ```
217 ///
218 /// [`min_depth`]: struct.WalkDir.html#method.min_depth
219 ///
220 /// This will only return descendents of the `foo` directory and not `foo`
221 /// itself.
222 ///
223 /// # Loops
224 ///
225 /// This iterator (like most/all recursive directory iterators) assumes that
226 /// no loops can be made with *hard* links on your file system. In particular,
227 /// this would require creating a hard link to a directory such that it creates
228 /// a loop. On most platforms, this operation is illegal.
229 ///
230 /// Note that when following symbolic/soft links, loops are detected and an
231 /// error is reported.
232 #[derive(Debug)]
233 pub struct WalkDir {
234     opts: WalkDirOptions,
235     root: PathBuf,
236 }
237 
238 struct WalkDirOptions {
239     follow_links: bool,
240     max_open: usize,
241     min_depth: usize,
242     max_depth: usize,
243     sorter: Option<
244         Box<
245             dyn FnMut(&DirEntry, &DirEntry) -> Ordering
246                 + Send
247                 + Sync
248                 + 'static,
249         >,
250     >,
251     contents_first: bool,
252     same_file_system: bool,
253 }
254 
255 impl fmt::Debug for WalkDirOptions {
fmt( &self, f: &mut fmt::Formatter<'_>, ) -> result::Result<(), fmt::Error>256     fn fmt(
257         &self,
258         f: &mut fmt::Formatter<'_>,
259     ) -> result::Result<(), fmt::Error> {
260         let sorter_str = if self.sorter.is_some() {
261             // FnMut isn't `Debug`
262             "Some(...)"
263         } else {
264             "None"
265         };
266         f.debug_struct("WalkDirOptions")
267             .field("follow_links", &self.follow_links)
268             .field("max_open", &self.max_open)
269             .field("min_depth", &self.min_depth)
270             .field("max_depth", &self.max_depth)
271             .field("sorter", &sorter_str)
272             .field("contents_first", &self.contents_first)
273             .field("same_file_system", &self.same_file_system)
274             .finish()
275     }
276 }
277 
278 impl WalkDir {
279     /// Create a builder for a recursive directory iterator starting at the
280     /// file path `root`. If `root` is a directory, then it is the first item
281     /// yielded by the iterator. If `root` is a file, then it is the first
282     /// and only item yielded by the iterator. If `root` is a symlink, then it
283     /// is always followed for the purposes of directory traversal. (A root
284     /// `DirEntry` still obeys its documentation with respect to symlinks and
285     /// the `follow_links` setting.)
new<P: AsRef<Path>>(root: P) -> Self286     pub fn new<P: AsRef<Path>>(root: P) -> Self {
287         WalkDir {
288             opts: WalkDirOptions {
289                 follow_links: false,
290                 max_open: 10,
291                 min_depth: 0,
292                 max_depth: ::std::usize::MAX,
293                 sorter: None,
294                 contents_first: false,
295                 same_file_system: false,
296             },
297             root: root.as_ref().to_path_buf(),
298         }
299     }
300 
301     /// Set the minimum depth of entries yielded by the iterator.
302     ///
303     /// The smallest depth is `0` and always corresponds to the path given
304     /// to the `new` function on this type. Its direct descendents have depth
305     /// `1`, and their descendents have depth `2`, and so on.
min_depth(mut self, depth: usize) -> Self306     pub fn min_depth(mut self, depth: usize) -> Self {
307         self.opts.min_depth = depth;
308         if self.opts.min_depth > self.opts.max_depth {
309             self.opts.min_depth = self.opts.max_depth;
310         }
311         self
312     }
313 
314     /// Set the maximum depth of entries yield by the iterator.
315     ///
316     /// The smallest depth is `0` and always corresponds to the path given
317     /// to the `new` function on this type. Its direct descendents have depth
318     /// `1`, and their descendents have depth `2`, and so on.
319     ///
320     /// Note that this will not simply filter the entries of the iterator, but
321     /// it will actually avoid descending into directories when the depth is
322     /// exceeded.
max_depth(mut self, depth: usize) -> Self323     pub fn max_depth(mut self, depth: usize) -> Self {
324         self.opts.max_depth = depth;
325         if self.opts.max_depth < self.opts.min_depth {
326             self.opts.max_depth = self.opts.min_depth;
327         }
328         self
329     }
330 
331     /// Follow symbolic links. By default, this is disabled.
332     ///
333     /// When `yes` is `true`, symbolic links are followed as if they were
334     /// normal directories and files. If a symbolic link is broken or is
335     /// involved in a loop, an error is yielded.
336     ///
337     /// When enabled, the yielded [`DirEntry`] values represent the target of
338     /// the link while the path corresponds to the link. See the [`DirEntry`]
339     /// type for more details.
340     ///
341     /// [`DirEntry`]: struct.DirEntry.html
follow_links(mut self, yes: bool) -> Self342     pub fn follow_links(mut self, yes: bool) -> Self {
343         self.opts.follow_links = yes;
344         self
345     }
346 
347     /// Set the maximum number of simultaneously open file descriptors used
348     /// by the iterator.
349     ///
350     /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set
351     /// to `1` automatically. If this is not set, then it defaults to some
352     /// reasonably low number.
353     ///
354     /// This setting has no impact on the results yielded by the iterator
355     /// (even when `n` is `1`). Instead, this setting represents a trade off
356     /// between scarce resources (file descriptors) and memory. Namely, when
357     /// the maximum number of file descriptors is reached and a new directory
358     /// needs to be opened to continue iteration, then a previous directory
359     /// handle is closed and has its unyielded entries stored in memory. In
360     /// practice, this is a satisfying trade off because it scales with respect
361     /// to the *depth* of your file tree. Therefore, low values (even `1`) are
362     /// acceptable.
363     ///
364     /// Note that this value does not impact the number of system calls made by
365     /// an exhausted iterator.
366     ///
367     /// # Platform behavior
368     ///
369     /// On Windows, if `follow_links` is enabled, then this limit is not
370     /// respected. In particular, the maximum number of file descriptors opened
371     /// is proportional to the depth of the directory tree traversed.
max_open(mut self, mut n: usize) -> Self372     pub fn max_open(mut self, mut n: usize) -> Self {
373         if n == 0 {
374             n = 1;
375         }
376         self.opts.max_open = n;
377         self
378     }
379 
380     /// Set a function for sorting directory entries with a comparator
381     /// function.
382     ///
383     /// If a compare function is set, the resulting iterator will return all
384     /// paths in sorted order. The compare function will be called to compare
385     /// entries from the same directory.
386     ///
387     /// ```rust,no_run
388     /// use std::cmp;
389     /// use std::ffi::OsString;
390     /// use walkdir::WalkDir;
391     ///
392     /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name()));
393     /// ```
sort_by<F>(mut self, cmp: F) -> Self where F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static,394     pub fn sort_by<F>(mut self, cmp: F) -> Self
395     where
396         F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static,
397     {
398         self.opts.sorter = Some(Box::new(cmp));
399         self
400     }
401 
402     /// Set a function for sorting directory entries with a key extraction
403     /// function.
404     ///
405     /// If a compare function is set, the resulting iterator will return all
406     /// paths in sorted order. The compare function will be called to compare
407     /// entries from the same directory.
408     ///
409     /// ```rust,no_run
410     /// use std::cmp;
411     /// use std::ffi::OsString;
412     /// use walkdir::WalkDir;
413     ///
414     /// WalkDir::new("foo").sort_by_key(|a| a.file_name().to_owned());
415     /// ```
sort_by_key<K, F>(self, mut cmp: F) -> Self where F: FnMut(&DirEntry) -> K + Send + Sync + 'static, K: Ord,416     pub fn sort_by_key<K, F>(self, mut cmp: F) -> Self
417     where
418         F: FnMut(&DirEntry) -> K + Send + Sync + 'static,
419         K: Ord,
420     {
421         self.sort_by(move |a, b| cmp(a).cmp(&cmp(b)))
422     }
423 
424     /// Sort directory entries by file name, to ensure a deterministic order.
425     ///
426     /// This is a convenience function for calling `Self::sort_by()`.
427     ///
428     /// ```rust,no_run
429     /// use walkdir::WalkDir;
430     ///
431     /// WalkDir::new("foo").sort_by_file_name();
432     /// ```
sort_by_file_name(self) -> Self433     pub fn sort_by_file_name(self) -> Self {
434         self.sort_by(|a, b| a.file_name().cmp(b.file_name()))
435     }
436 
437     /// Yield a directory's contents before the directory itself. By default,
438     /// this is disabled.
439     ///
440     /// When `yes` is `false` (as is the default), the directory is yielded
441     /// before its contents are read. This is useful when, e.g. you want to
442     /// skip processing of some directories.
443     ///
444     /// When `yes` is `true`, the iterator yields the contents of a directory
445     /// before yielding the directory itself. This is useful when, e.g. you
446     /// want to recursively delete a directory.
447     ///
448     /// # Example
449     ///
450     /// Assume the following directory tree:
451     ///
452     /// ```text
453     /// foo/
454     ///   abc/
455     ///     qrs
456     ///     tuv
457     ///   def/
458     /// ```
459     ///
460     /// With contents_first disabled (the default), the following code visits
461     /// the directory tree in depth-first order:
462     ///
463     /// ```no_run
464     /// use walkdir::WalkDir;
465     ///
466     /// for entry in WalkDir::new("foo") {
467     ///     let entry = entry.unwrap();
468     ///     println!("{}", entry.path().display());
469     /// }
470     ///
471     /// // foo
472     /// // foo/abc
473     /// // foo/abc/qrs
474     /// // foo/abc/tuv
475     /// // foo/def
476     /// ```
477     ///
478     /// With contents_first enabled:
479     ///
480     /// ```no_run
481     /// use walkdir::WalkDir;
482     ///
483     /// for entry in WalkDir::new("foo").contents_first(true) {
484     ///     let entry = entry.unwrap();
485     ///     println!("{}", entry.path().display());
486     /// }
487     ///
488     /// // foo/abc/qrs
489     /// // foo/abc/tuv
490     /// // foo/abc
491     /// // foo/def
492     /// // foo
493     /// ```
contents_first(mut self, yes: bool) -> Self494     pub fn contents_first(mut self, yes: bool) -> Self {
495         self.opts.contents_first = yes;
496         self
497     }
498 
499     /// Do not cross file system boundaries.
500     ///
501     /// When this option is enabled, directory traversal will not descend into
502     /// directories that are on a different file system from the root path.
503     ///
504     /// Currently, this option is only supported on Unix and Windows. If this
505     /// option is used on an unsupported platform, then directory traversal
506     /// will immediately return an error and will not yield any entries.
same_file_system(mut self, yes: bool) -> Self507     pub fn same_file_system(mut self, yes: bool) -> Self {
508         self.opts.same_file_system = yes;
509         self
510     }
511 }
512 
513 impl IntoIterator for WalkDir {
514     type Item = Result<DirEntry>;
515     type IntoIter = IntoIter;
516 
into_iter(self) -> IntoIter517     fn into_iter(self) -> IntoIter {
518         IntoIter {
519             opts: self.opts,
520             start: Some(self.root),
521             stack_list: vec![],
522             stack_path: vec![],
523             oldest_opened: 0,
524             depth: 0,
525             deferred_dirs: vec![],
526             root_device: None,
527         }
528     }
529 }
530 
531 /// An iterator for recursively descending into a directory.
532 ///
533 /// A value with this type must be constructed with the [`WalkDir`] type, which
534 /// uses a builder pattern to set options such as min/max depth, max open file
535 /// descriptors and whether the iterator should follow symbolic links. After
536 /// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain.
537 ///
538 /// The order of elements yielded by this iterator is unspecified.
539 ///
540 /// [`WalkDir`]: struct.WalkDir.html
541 /// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
542 #[derive(Debug)]
543 pub struct IntoIter {
544     /// Options specified in the builder. Depths, max fds, etc.
545     opts: WalkDirOptions,
546     /// The start path.
547     ///
548     /// This is only `Some(...)` at the beginning. After the first iteration,
549     /// this is always `None`.
550     start: Option<PathBuf>,
551     /// A stack of open (up to max fd) or closed handles to directories.
552     /// An open handle is a plain [`fs::ReadDir`] while a closed handle is
553     /// a `Vec<fs::DirEntry>` corresponding to the as-of-yet consumed entries.
554     ///
555     /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
556     stack_list: Vec<DirList>,
557     /// A stack of file paths.
558     ///
559     /// This is *only* used when [`follow_links`] is enabled. In all other
560     /// cases this stack is empty.
561     ///
562     /// [`follow_links`]: struct.WalkDir.html#method.follow_links
563     stack_path: Vec<Ancestor>,
564     /// An index into `stack_list` that points to the oldest open directory
565     /// handle. If the maximum fd limit is reached and a new directory needs to
566     /// be read, the handle at this index is closed before the new directory is
567     /// opened.
568     oldest_opened: usize,
569     /// The current depth of iteration (the length of the stack at the
570     /// beginning of each iteration).
571     depth: usize,
572     /// A list of DirEntries corresponding to directories, that are
573     /// yielded after their contents has been fully yielded. This is only
574     /// used when `contents_first` is enabled.
575     deferred_dirs: Vec<DirEntry>,
576     /// The device of the root file path when the first call to `next` was
577     /// made.
578     ///
579     /// If the `same_file_system` option isn't enabled, then this is always
580     /// `None`. Conversely, if it is enabled, this is always `Some(...)` after
581     /// handling the root path.
582     root_device: Option<u64>,
583 }
584 
585 /// An ancestor is an item in the directory tree traversed by walkdir, and is
586 /// used to check for loops in the tree when traversing symlinks.
587 #[derive(Debug)]
588 struct Ancestor {
589     /// The path of this ancestor.
590     path: PathBuf,
591     /// An open file to this ancesor. This is only used on Windows where
592     /// opening a file handle appears to be quite expensive, so we choose to
593     /// cache it. This comes at the cost of not respecting the file descriptor
594     /// limit set by the user.
595     #[cfg(windows)]
596     handle: Handle,
597 }
598 
599 impl Ancestor {
600     /// Create a new ancestor from the given directory path.
601     #[cfg(windows)]
new(dent: &DirEntry) -> io::Result<Ancestor>602     fn new(dent: &DirEntry) -> io::Result<Ancestor> {
603         let handle = Handle::from_path(dent.path())?;
604         Ok(Ancestor { path: dent.path().to_path_buf(), handle: handle })
605     }
606 
607     /// Create a new ancestor from the given directory path.
608     #[cfg(not(windows))]
new(dent: &DirEntry) -> io::Result<Ancestor>609     fn new(dent: &DirEntry) -> io::Result<Ancestor> {
610         Ok(Ancestor { path: dent.path().to_path_buf() })
611     }
612 
613     /// Returns true if and only if the given open file handle corresponds to
614     /// the same directory as this ancestor.
615     #[cfg(windows)]
is_same(&self, child: &Handle) -> io::Result<bool>616     fn is_same(&self, child: &Handle) -> io::Result<bool> {
617         Ok(child == &self.handle)
618     }
619 
620     /// Returns true if and only if the given open file handle corresponds to
621     /// the same directory as this ancestor.
622     #[cfg(not(windows))]
is_same(&self, child: &Handle) -> io::Result<bool>623     fn is_same(&self, child: &Handle) -> io::Result<bool> {
624         Ok(child == &Handle::from_path(&self.path)?)
625     }
626 }
627 
628 /// A sequence of unconsumed directory entries.
629 ///
630 /// This represents the opened or closed state of a directory handle. When
631 /// open, future entries are read by iterating over the raw `fs::ReadDir`.
632 /// When closed, all future entries are read into memory. Iteration then
633 /// proceeds over a [`Vec<fs::DirEntry>`].
634 ///
635 /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
636 /// [`Vec<fs::DirEntry>`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html
637 #[derive(Debug)]
638 enum DirList {
639     /// An opened handle.
640     ///
641     /// This includes the depth of the handle itself.
642     ///
643     /// If there was an error with the initial [`fs::read_dir`] call, then it
644     /// is stored here. (We use an [`Option<...>`] to make yielding the error
645     /// exactly once simpler.)
646     ///
647     /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html
648     /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
649     Opened { depth: usize, it: result::Result<ReadDir, Option<Error>> },
650     /// A closed handle.
651     ///
652     /// All remaining directory entries are read into memory.
653     Closed(vec::IntoIter<Result<DirEntry>>),
654 }
655 
656 impl Iterator for IntoIter {
657     type Item = Result<DirEntry>;
658     /// Advances the iterator and returns the next value.
659     ///
660     /// # Errors
661     ///
662     /// If the iterator fails to retrieve the next value, this method returns
663     /// an error value. The error will be wrapped in an Option::Some.
next(&mut self) -> Option<Result<DirEntry>>664     fn next(&mut self) -> Option<Result<DirEntry>> {
665         if let Some(start) = self.start.take() {
666             if self.opts.same_file_system {
667                 let result = util::device_num(&start)
668                     .map_err(|e| Error::from_path(0, start.clone(), e));
669                 self.root_device = Some(itry!(result));
670             }
671             let dent = itry!(DirEntry::from_path(0, start, false));
672             if let Some(result) = self.handle_entry(dent) {
673                 return Some(result);
674             }
675         }
676         while !self.stack_list.is_empty() {
677             self.depth = self.stack_list.len();
678             if let Some(dentry) = self.get_deferred_dir() {
679                 return Some(Ok(dentry));
680             }
681             if self.depth > self.opts.max_depth {
682                 // If we've exceeded the max depth, pop the current dir
683                 // so that we don't descend.
684                 self.pop();
685                 continue;
686             }
687             // Unwrap is safe here because we've verified above that
688             // `self.stack_list` is not empty
689             let next = self
690                 .stack_list
691                 .last_mut()
692                 .expect("BUG: stack should be non-empty")
693                 .next();
694             match next {
695                 None => self.pop(),
696                 Some(Err(err)) => return Some(Err(err)),
697                 Some(Ok(dent)) => {
698                     if let Some(result) = self.handle_entry(dent) {
699                         return Some(result);
700                     }
701                 }
702             }
703         }
704         if self.opts.contents_first {
705             self.depth = self.stack_list.len();
706             if let Some(dentry) = self.get_deferred_dir() {
707                 return Some(Ok(dentry));
708             }
709         }
710         None
711     }
712 }
713 
714 impl IntoIter {
715     /// Skips the current directory.
716     ///
717     /// This causes the iterator to stop traversing the contents of the least
718     /// recently yielded directory. This means any remaining entries in that
719     /// directory will be skipped (including sub-directories).
720     ///
721     /// Note that the ergonomics of this method are questionable since it
722     /// borrows the iterator mutably. Namely, you must write out the looping
723     /// condition manually. For example, to skip hidden entries efficiently on
724     /// unix systems:
725     ///
726     /// ```no_run
727     /// use walkdir::{DirEntry, WalkDir};
728     ///
729     /// fn is_hidden(entry: &DirEntry) -> bool {
730     ///     entry.file_name()
731     ///          .to_str()
732     ///          .map(|s| s.starts_with("."))
733     ///          .unwrap_or(false)
734     /// }
735     ///
736     /// let mut it = WalkDir::new("foo").into_iter();
737     /// loop {
738     ///     let entry = match it.next() {
739     ///         None => break,
740     ///         Some(Err(err)) => panic!("ERROR: {}", err),
741     ///         Some(Ok(entry)) => entry,
742     ///     };
743     ///     if is_hidden(&entry) {
744     ///         if entry.file_type().is_dir() {
745     ///             it.skip_current_dir();
746     ///         }
747     ///         continue;
748     ///     }
749     ///     println!("{}", entry.path().display());
750     /// }
751     /// ```
752     ///
753     /// You may find it more convenient to use the [`filter_entry`] iterator
754     /// adapter. (See its documentation for the same example functionality as
755     /// above.)
756     ///
757     /// [`filter_entry`]: #method.filter_entry
skip_current_dir(&mut self)758     pub fn skip_current_dir(&mut self) {
759         if !self.stack_list.is_empty() {
760             self.pop();
761         }
762     }
763 
764     /// Yields only entries which satisfy the given predicate and skips
765     /// descending into directories that do not satisfy the given predicate.
766     ///
767     /// The predicate is applied to all entries. If the predicate is
768     /// true, iteration carries on as normal. If the predicate is false, the
769     /// entry is ignored and if it is a directory, it is not descended into.
770     ///
771     /// This is often more convenient to use than [`skip_current_dir`]. For
772     /// example, to skip hidden files and directories efficiently on unix
773     /// systems:
774     ///
775     /// ```no_run
776     /// use walkdir::{DirEntry, WalkDir};
777     /// # use walkdir::Error;
778     ///
779     /// fn is_hidden(entry: &DirEntry) -> bool {
780     ///     entry.file_name()
781     ///          .to_str()
782     ///          .map(|s| s.starts_with("."))
783     ///          .unwrap_or(false)
784     /// }
785     ///
786     /// # fn try_main() -> Result<(), Error> {
787     /// for entry in WalkDir::new("foo")
788     ///                      .into_iter()
789     ///                      .filter_entry(|e| !is_hidden(e)) {
790     ///     println!("{}", entry?.path().display());
791     /// }
792     /// # Ok(())
793     /// # }
794     /// ```
795     ///
796     /// Note that the iterator will still yield errors for reading entries that
797     /// may not satisfy the predicate.
798     ///
799     /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
800     /// passed to this predicate.
801     ///
802     /// Note that if the iterator has `contents_first` enabled, then this
803     /// method is no different than calling the standard `Iterator::filter`
804     /// method (because directory entries are yielded after they've been
805     /// descended into).
806     ///
807     /// [`skip_current_dir`]: #method.skip_current_dir
808     /// [`min_depth`]: struct.WalkDir.html#method.min_depth
809     /// [`max_depth`]: struct.WalkDir.html#method.max_depth
filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P> where P: FnMut(&DirEntry) -> bool,810     pub fn filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P>
811     where
812         P: FnMut(&DirEntry) -> bool,
813     {
814         FilterEntry { it: self, predicate: predicate }
815     }
816 
handle_entry( &mut self, mut dent: DirEntry, ) -> Option<Result<DirEntry>>817     fn handle_entry(
818         &mut self,
819         mut dent: DirEntry,
820     ) -> Option<Result<DirEntry>> {
821         if self.opts.follow_links && dent.file_type().is_symlink() {
822             dent = itry!(self.follow(dent));
823         }
824         let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir();
825         if is_normal_dir {
826             if self.opts.same_file_system && dent.depth() > 0 {
827                 if itry!(self.is_same_file_system(&dent)) {
828                     itry!(self.push(&dent));
829                 }
830             } else {
831                 itry!(self.push(&dent));
832             }
833         } else if dent.depth() == 0 && dent.file_type().is_symlink() {
834             // As a special case, if we are processing a root entry, then we
835             // always follow it even if it's a symlink and follow_links is
836             // false. We are careful to not let this change the semantics of
837             // the DirEntry however. Namely, the DirEntry should still respect
838             // the follow_links setting. When it's disabled, it should report
839             // itself as a symlink. When it's enabled, it should always report
840             // itself as the target.
841             let md = itry!(fs::metadata(dent.path()).map_err(|err| {
842                 Error::from_path(dent.depth(), dent.path().to_path_buf(), err)
843             }));
844             if md.file_type().is_dir() {
845                 itry!(self.push(&dent));
846             }
847         }
848         if is_normal_dir && self.opts.contents_first {
849             self.deferred_dirs.push(dent);
850             None
851         } else if self.skippable() {
852             None
853         } else {
854             Some(Ok(dent))
855         }
856     }
857 
get_deferred_dir(&mut self) -> Option<DirEntry>858     fn get_deferred_dir(&mut self) -> Option<DirEntry> {
859         if self.opts.contents_first {
860             if self.depth < self.deferred_dirs.len() {
861                 // Unwrap is safe here because we've guaranteed that
862                 // `self.deferred_dirs.len()` can never be less than 1
863                 let deferred: DirEntry = self
864                     .deferred_dirs
865                     .pop()
866                     .expect("BUG: deferred_dirs should be non-empty");
867                 if !self.skippable() {
868                     return Some(deferred);
869                 }
870             }
871         }
872         None
873     }
874 
push(&mut self, dent: &DirEntry) -> Result<()>875     fn push(&mut self, dent: &DirEntry) -> Result<()> {
876         // Make room for another open file descriptor if we've hit the max.
877         let free =
878             self.stack_list.len().checked_sub(self.oldest_opened).unwrap();
879         if free == self.opts.max_open {
880             self.stack_list[self.oldest_opened].close();
881         }
882         // Open a handle to reading the directory's entries.
883         let rd = fs::read_dir(dent.path()).map_err(|err| {
884             Some(Error::from_path(self.depth, dent.path().to_path_buf(), err))
885         });
886         let mut list = DirList::Opened { depth: self.depth, it: rd };
887         if let Some(ref mut cmp) = self.opts.sorter {
888             let mut entries: Vec<_> = list.collect();
889             entries.sort_by(|a, b| match (a, b) {
890                 (&Ok(ref a), &Ok(ref b)) => cmp(a, b),
891                 (&Err(_), &Err(_)) => Ordering::Equal,
892                 (&Ok(_), &Err(_)) => Ordering::Greater,
893                 (&Err(_), &Ok(_)) => Ordering::Less,
894             });
895             list = DirList::Closed(entries.into_iter());
896         }
897         if self.opts.follow_links {
898             let ancestor = Ancestor::new(&dent)
899                 .map_err(|err| Error::from_io(self.depth, err))?;
900             self.stack_path.push(ancestor);
901         }
902         // We push this after stack_path since creating the Ancestor can fail.
903         // If it fails, then we return the error and won't descend.
904         self.stack_list.push(list);
905         // If we had to close out a previous directory stream, then we need to
906         // increment our index the oldest still-open stream. We do this only
907         // after adding to our stack, in order to ensure that the oldest_opened
908         // index remains valid. The worst that can happen is that an already
909         // closed stream will be closed again, which is a no-op.
910         //
911         // We could move the close of the stream above into this if-body, but
912         // then we would have more than the maximum number of file descriptors
913         // open at a particular point in time.
914         if free == self.opts.max_open {
915             // Unwrap is safe here because self.oldest_opened is guaranteed to
916             // never be greater than `self.stack_list.len()`, which implies
917             // that the subtraction won't underflow and that adding 1 will
918             // never overflow.
919             self.oldest_opened = self.oldest_opened.checked_add(1).unwrap();
920         }
921         Ok(())
922     }
923 
pop(&mut self)924     fn pop(&mut self) {
925         self.stack_list.pop().expect("BUG: cannot pop from empty stack");
926         if self.opts.follow_links {
927             self.stack_path.pop().expect("BUG: list/path stacks out of sync");
928         }
929         // If everything in the stack is already closed, then there is
930         // room for at least one more open descriptor and it will
931         // always be at the top of the stack.
932         self.oldest_opened = min(self.oldest_opened, self.stack_list.len());
933     }
934 
follow(&self, mut dent: DirEntry) -> Result<DirEntry>935     fn follow(&self, mut dent: DirEntry) -> Result<DirEntry> {
936         dent =
937             DirEntry::from_path(self.depth, dent.path().to_path_buf(), true)?;
938         // The only way a symlink can cause a loop is if it points
939         // to a directory. Otherwise, it always points to a leaf
940         // and we can omit any loop checks.
941         if dent.is_dir() {
942             self.check_loop(dent.path())?;
943         }
944         Ok(dent)
945     }
946 
check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()>947     fn check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()> {
948         let hchild = Handle::from_path(&child)
949             .map_err(|err| Error::from_io(self.depth, err))?;
950         for ancestor in self.stack_path.iter().rev() {
951             let is_same = ancestor
952                 .is_same(&hchild)
953                 .map_err(|err| Error::from_io(self.depth, err))?;
954             if is_same {
955                 return Err(Error::from_loop(
956                     self.depth,
957                     &ancestor.path,
958                     child.as_ref(),
959                 ));
960             }
961         }
962         Ok(())
963     }
964 
is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool>965     fn is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool> {
966         let dent_device = util::device_num(dent.path())
967             .map_err(|err| Error::from_entry(dent, err))?;
968         Ok(self
969             .root_device
970             .map(|d| d == dent_device)
971             .expect("BUG: called is_same_file_system without root device"))
972     }
973 
skippable(&self) -> bool974     fn skippable(&self) -> bool {
975         self.depth < self.opts.min_depth || self.depth > self.opts.max_depth
976     }
977 }
978 
979 impl DirList {
close(&mut self)980     fn close(&mut self) {
981         if let DirList::Opened { .. } = *self {
982             *self = DirList::Closed(self.collect::<Vec<_>>().into_iter());
983         }
984     }
985 }
986 
987 impl Iterator for DirList {
988     type Item = Result<DirEntry>;
989 
990     #[inline(always)]
next(&mut self) -> Option<Result<DirEntry>>991     fn next(&mut self) -> Option<Result<DirEntry>> {
992         match *self {
993             DirList::Closed(ref mut it) => it.next(),
994             DirList::Opened { depth, ref mut it } => match *it {
995                 Err(ref mut err) => err.take().map(Err),
996                 Ok(ref mut rd) => rd.next().map(|r| match r {
997                     Ok(r) => DirEntry::from_entry(depth + 1, &r),
998                     Err(err) => Err(Error::from_io(depth + 1, err)),
999                 }),
1000             },
1001         }
1002     }
1003 }
1004 
1005 /// A recursive directory iterator that skips entries.
1006 ///
1007 /// Values of this type are created by calling [`.filter_entry()`] on an
1008 /// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`.
1009 ///
1010 /// Directories that fail the predicate `P` are skipped. Namely, they are
1011 /// never yielded and never descended into.
1012 ///
1013 /// Entries that are skipped with the [`min_depth`] and [`max_depth`] options
1014 /// are not passed through this filter.
1015 ///
1016 /// If opening a handle to a directory resulted in an error, then it is yielded
1017 /// and no corresponding call to the predicate is made.
1018 ///
1019 /// Type parameter `I` refers to the underlying iterator and `P` refers to the
1020 /// predicate, which is usually `FnMut(&DirEntry) -> bool`.
1021 ///
1022 /// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry
1023 /// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
1024 /// [`min_depth`]: struct.WalkDir.html#method.min_depth
1025 /// [`max_depth`]: struct.WalkDir.html#method.max_depth
1026 #[derive(Debug)]
1027 pub struct FilterEntry<I, P> {
1028     it: I,
1029     predicate: P,
1030 }
1031 
1032 impl<P> Iterator for FilterEntry<IntoIter, P>
1033 where
1034     P: FnMut(&DirEntry) -> bool,
1035 {
1036     type Item = Result<DirEntry>;
1037 
1038     /// Advances the iterator and returns the next value.
1039     ///
1040     /// # Errors
1041     ///
1042     /// If the iterator fails to retrieve the next value, this method returns
1043     /// an error value. The error will be wrapped in an `Option::Some`.
next(&mut self) -> Option<Result<DirEntry>>1044     fn next(&mut self) -> Option<Result<DirEntry>> {
1045         loop {
1046             let dent = match self.it.next() {
1047                 None => return None,
1048                 Some(result) => itry!(result),
1049             };
1050             if !(self.predicate)(&dent) {
1051                 if dent.is_dir() {
1052                     self.it.skip_current_dir();
1053                 }
1054                 continue;
1055             }
1056             return Some(Ok(dent));
1057         }
1058     }
1059 }
1060 
1061 impl<P> FilterEntry<IntoIter, P>
1062 where
1063     P: FnMut(&DirEntry) -> bool,
1064 {
1065     /// Yields only entries which satisfy the given predicate and skips
1066     /// descending into directories that do not satisfy the given predicate.
1067     ///
1068     /// The predicate is applied to all entries. If the predicate is
1069     /// true, iteration carries on as normal. If the predicate is false, the
1070     /// entry is ignored and if it is a directory, it is not descended into.
1071     ///
1072     /// This is often more convenient to use than [`skip_current_dir`]. For
1073     /// example, to skip hidden files and directories efficiently on unix
1074     /// systems:
1075     ///
1076     /// ```no_run
1077     /// use walkdir::{DirEntry, WalkDir};
1078     /// # use walkdir::Error;
1079     ///
1080     /// fn is_hidden(entry: &DirEntry) -> bool {
1081     ///     entry.file_name()
1082     ///          .to_str()
1083     ///          .map(|s| s.starts_with("."))
1084     ///          .unwrap_or(false)
1085     /// }
1086     ///
1087     /// # fn try_main() -> Result<(), Error> {
1088     /// for entry in WalkDir::new("foo")
1089     ///                      .into_iter()
1090     ///                      .filter_entry(|e| !is_hidden(e)) {
1091     ///     println!("{}", entry?.path().display());
1092     /// }
1093     /// # Ok(())
1094     /// # }
1095     /// ```
1096     ///
1097     /// Note that the iterator will still yield errors for reading entries that
1098     /// may not satisfy the predicate.
1099     ///
1100     /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
1101     /// passed to this predicate.
1102     ///
1103     /// Note that if the iterator has `contents_first` enabled, then this
1104     /// method is no different than calling the standard `Iterator::filter`
1105     /// method (because directory entries are yielded after they've been
1106     /// descended into).
1107     ///
1108     /// [`skip_current_dir`]: #method.skip_current_dir
1109     /// [`min_depth`]: struct.WalkDir.html#method.min_depth
1110     /// [`max_depth`]: struct.WalkDir.html#method.max_depth
filter_entry(self, predicate: P) -> FilterEntry<Self, P>1111     pub fn filter_entry(self, predicate: P) -> FilterEntry<Self, P> {
1112         FilterEntry { it: self, predicate: predicate }
1113     }
1114 
1115     /// Skips the current directory.
1116     ///
1117     /// This causes the iterator to stop traversing the contents of the least
1118     /// recently yielded directory. This means any remaining entries in that
1119     /// directory will be skipped (including sub-directories).
1120     ///
1121     /// Note that the ergonomics of this method are questionable since it
1122     /// borrows the iterator mutably. Namely, you must write out the looping
1123     /// condition manually. For example, to skip hidden entries efficiently on
1124     /// unix systems:
1125     ///
1126     /// ```no_run
1127     /// use walkdir::{DirEntry, WalkDir};
1128     ///
1129     /// fn is_hidden(entry: &DirEntry) -> bool {
1130     ///     entry.file_name()
1131     ///          .to_str()
1132     ///          .map(|s| s.starts_with("."))
1133     ///          .unwrap_or(false)
1134     /// }
1135     ///
1136     /// let mut it = WalkDir::new("foo").into_iter();
1137     /// loop {
1138     ///     let entry = match it.next() {
1139     ///         None => break,
1140     ///         Some(Err(err)) => panic!("ERROR: {}", err),
1141     ///         Some(Ok(entry)) => entry,
1142     ///     };
1143     ///     if is_hidden(&entry) {
1144     ///         if entry.file_type().is_dir() {
1145     ///             it.skip_current_dir();
1146     ///         }
1147     ///         continue;
1148     ///     }
1149     ///     println!("{}", entry.path().display());
1150     /// }
1151     /// ```
1152     ///
1153     /// You may find it more convenient to use the [`filter_entry`] iterator
1154     /// adapter. (See its documentation for the same example functionality as
1155     /// above.)
1156     ///
1157     /// [`filter_entry`]: #method.filter_entry
skip_current_dir(&mut self)1158     pub fn skip_current_dir(&mut self) {
1159         self.it.skip_current_dir();
1160     }
1161 }
1162