1 /*!
2 The ignore crate provides a fast recursive directory iterator that respects
3 various filters such as globs, file types and `.gitignore` files. The precise
4 matching rules and precedence is explained in the documentation for
5 `WalkBuilder`.
6 
7 Secondarily, this crate exposes gitignore and file type matchers for use cases
8 that demand more fine-grained control.
9 
10 # Example
11 
12 This example shows the most basic usage of this crate. This code will
13 recursively traverse the current directory while automatically filtering out
14 files and directories according to ignore globs found in files like
15 `.ignore` and `.gitignore`:
16 
17 
18 ```rust,no_run
19 use ignore::Walk;
20 
21 for result in Walk::new("./") {
22     // Each item yielded by the iterator is either a directory entry or an
23     // error, so either print the path or the error.
24     match result {
25         Ok(entry) => println!("{}", entry.path().display()),
26         Err(err) => println!("ERROR: {}", err),
27     }
28 }
29 ```
30 
31 # Example: advanced
32 
33 By default, the recursive directory iterator will ignore hidden files and
34 directories. This can be disabled by building the iterator with `WalkBuilder`:
35 
36 ```rust,no_run
37 use ignore::WalkBuilder;
38 
39 for result in WalkBuilder::new("./").hidden(false).build() {
40     println!("{:?}", result);
41 }
42 ```
43 
44 See the documentation for `WalkBuilder` for many other options.
45 */
46 
47 #![deny(missing_docs)]
48 
49 use std::error;
50 use std::fmt;
51 use std::io;
52 use std::path::{Path, PathBuf};
53 
54 pub use crate::walk::{
55     DirEntry, ParallelVisitor, ParallelVisitorBuilder, Walk, WalkBuilder,
56     WalkParallel, WalkState,
57 };
58 
59 mod default_types;
60 mod dir;
61 pub mod gitignore;
62 pub mod overrides;
63 mod pathutil;
64 pub mod types;
65 mod walk;
66 
67 /// Represents an error that can occur when parsing a gitignore file.
68 #[derive(Debug)]
69 pub enum Error {
70     /// A collection of "soft" errors. These occur when adding an ignore
71     /// file partially succeeded.
72     Partial(Vec<Error>),
73     /// An error associated with a specific line number.
74     WithLineNumber {
75         /// The line number.
76         line: u64,
77         /// The underlying error.
78         err: Box<Error>,
79     },
80     /// An error associated with a particular file path.
81     WithPath {
82         /// The file path.
83         path: PathBuf,
84         /// The underlying error.
85         err: Box<Error>,
86     },
87     /// An error associated with a particular directory depth when recursively
88     /// walking a directory.
89     WithDepth {
90         /// The directory depth.
91         depth: usize,
92         /// The underlying error.
93         err: Box<Error>,
94     },
95     /// An error that occurs when a file loop is detected when traversing
96     /// symbolic links.
97     Loop {
98         /// The ancestor file path in the loop.
99         ancestor: PathBuf,
100         /// The child file path in the loop.
101         child: PathBuf,
102     },
103     /// An error that occurs when doing I/O, such as reading an ignore file.
104     Io(io::Error),
105     /// An error that occurs when trying to parse a glob.
106     Glob {
107         /// The original glob that caused this error. This glob, when
108         /// available, always corresponds to the glob provided by an end user.
109         /// e.g., It is the glob as written in a `.gitignore` file.
110         ///
111         /// (This glob may be distinct from the glob that is actually
112         /// compiled, after accounting for `gitignore` semantics.)
113         glob: Option<String>,
114         /// The underlying glob error as a string.
115         err: String,
116     },
117     /// A type selection for a file type that is not defined.
118     UnrecognizedFileType(String),
119     /// A user specified file type definition could not be parsed.
120     InvalidDefinition,
121 }
122 
123 impl Clone for Error {
clone(&self) -> Error124     fn clone(&self) -> Error {
125         match *self {
126             Error::Partial(ref errs) => Error::Partial(errs.clone()),
127             Error::WithLineNumber { line, ref err } => {
128                 Error::WithLineNumber { line: line, err: err.clone() }
129             }
130             Error::WithPath { ref path, ref err } => {
131                 Error::WithPath { path: path.clone(), err: err.clone() }
132             }
133             Error::WithDepth { depth, ref err } => {
134                 Error::WithDepth { depth: depth, err: err.clone() }
135             }
136             Error::Loop { ref ancestor, ref child } => Error::Loop {
137                 ancestor: ancestor.clone(),
138                 child: child.clone(),
139             },
140             Error::Io(ref err) => match err.raw_os_error() {
141                 Some(e) => Error::Io(io::Error::from_raw_os_error(e)),
142                 None => Error::Io(io::Error::new(err.kind(), err.to_string())),
143             },
144             Error::Glob { ref glob, ref err } => {
145                 Error::Glob { glob: glob.clone(), err: err.clone() }
146             }
147             Error::UnrecognizedFileType(ref err) => {
148                 Error::UnrecognizedFileType(err.clone())
149             }
150             Error::InvalidDefinition => Error::InvalidDefinition,
151         }
152     }
153 }
154 
155 impl Error {
156     /// Returns true if this is a partial error.
157     ///
158     /// A partial error occurs when only some operations failed while others
159     /// may have succeeded. For example, an ignore file may contain an invalid
160     /// glob among otherwise valid globs.
is_partial(&self) -> bool161     pub fn is_partial(&self) -> bool {
162         match *self {
163             Error::Partial(_) => true,
164             Error::WithLineNumber { ref err, .. } => err.is_partial(),
165             Error::WithPath { ref err, .. } => err.is_partial(),
166             Error::WithDepth { ref err, .. } => err.is_partial(),
167             _ => false,
168         }
169     }
170 
171     /// Returns true if this error is exclusively an I/O error.
is_io(&self) -> bool172     pub fn is_io(&self) -> bool {
173         match *self {
174             Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(),
175             Error::WithLineNumber { ref err, .. } => err.is_io(),
176             Error::WithPath { ref err, .. } => err.is_io(),
177             Error::WithDepth { ref err, .. } => err.is_io(),
178             Error::Loop { .. } => false,
179             Error::Io(_) => true,
180             Error::Glob { .. } => false,
181             Error::UnrecognizedFileType(_) => false,
182             Error::InvalidDefinition => false,
183         }
184     }
185 
186     /// Inspect the original [`io::Error`] if there is one.
187     ///
188     /// [`None`] is returned if the [`Error`] doesn't correspond to an
189     /// [`io::Error`]. This might happen, for example, when the error was
190     /// produced because a cycle was found in the directory tree while
191     /// following symbolic links.
192     ///
193     /// This method returns a borrowed value that is bound to the lifetime of the [`Error`]. To
194     /// obtain an owned value, the [`into_io_error`] can be used instead.
195     ///
196     /// > This is the original [`io::Error`] and is _not_ the same as
197     /// > [`impl From<Error> for std::io::Error`][impl] which contains additional context about the
198     /// error.
199     ///
200     /// [`None`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html#variant.None
201     /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
202     /// [`From`]: https://doc.rust-lang.org/stable/std/convert/trait.From.html
203     /// [`Error`]: struct.Error.html
204     /// [`into_io_error`]: struct.Error.html#method.into_io_error
205     /// [impl]: struct.Error.html#impl-From%3CError%3E
io_error(&self) -> Option<&std::io::Error>206     pub fn io_error(&self) -> Option<&std::io::Error> {
207         match *self {
208             Error::Partial(ref errs) => {
209                 if errs.len() == 1 {
210                     errs[0].io_error()
211                 } else {
212                     None
213                 }
214             }
215             Error::WithLineNumber { ref err, .. } => err.io_error(),
216             Error::WithPath { ref err, .. } => err.io_error(),
217             Error::WithDepth { ref err, .. } => err.io_error(),
218             Error::Loop { .. } => None,
219             Error::Io(ref err) => Some(err),
220             Error::Glob { .. } => None,
221             Error::UnrecognizedFileType(_) => None,
222             Error::InvalidDefinition => None,
223         }
224     }
225 
226     /// Similar to [`io_error`] except consumes self to convert to the original
227     /// [`io::Error`] if one exists.
228     ///
229     /// [`io_error`]: struct.Error.html#method.io_error
230     /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
into_io_error(self) -> Option<std::io::Error>231     pub fn into_io_error(self) -> Option<std::io::Error> {
232         match self {
233             Error::Partial(mut errs) => {
234                 if errs.len() == 1 {
235                     errs.remove(0).into_io_error()
236                 } else {
237                     None
238                 }
239             }
240             Error::WithLineNumber { err, .. } => err.into_io_error(),
241             Error::WithPath { err, .. } => err.into_io_error(),
242             Error::WithDepth { err, .. } => err.into_io_error(),
243             Error::Loop { .. } => None,
244             Error::Io(err) => Some(err),
245             Error::Glob { .. } => None,
246             Error::UnrecognizedFileType(_) => None,
247             Error::InvalidDefinition => None,
248         }
249     }
250 
251     /// Returns a depth associated with recursively walking a directory (if
252     /// this error was generated from a recursive directory iterator).
depth(&self) -> Option<usize>253     pub fn depth(&self) -> Option<usize> {
254         match *self {
255             Error::WithPath { ref err, .. } => err.depth(),
256             Error::WithDepth { depth, .. } => Some(depth),
257             _ => None,
258         }
259     }
260 
261     /// Turn an error into a tagged error with the given file path.
with_path<P: AsRef<Path>>(self, path: P) -> Error262     fn with_path<P: AsRef<Path>>(self, path: P) -> Error {
263         Error::WithPath {
264             path: path.as_ref().to_path_buf(),
265             err: Box::new(self),
266         }
267     }
268 
269     /// Turn an error into a tagged error with the given depth.
with_depth(self, depth: usize) -> Error270     fn with_depth(self, depth: usize) -> Error {
271         Error::WithDepth { depth: depth, err: Box::new(self) }
272     }
273 
274     /// Turn an error into a tagged error with the given file path and line
275     /// number. If path is empty, then it is omitted from the error.
tagged<P: AsRef<Path>>(self, path: P, lineno: u64) -> Error276     fn tagged<P: AsRef<Path>>(self, path: P, lineno: u64) -> Error {
277         let errline =
278             Error::WithLineNumber { line: lineno, err: Box::new(self) };
279         if path.as_ref().as_os_str().is_empty() {
280             return errline;
281         }
282         errline.with_path(path)
283     }
284 
285     /// Build an error from a walkdir error.
from_walkdir(err: walkdir::Error) -> Error286     fn from_walkdir(err: walkdir::Error) -> Error {
287         let depth = err.depth();
288         if let (Some(anc), Some(child)) = (err.loop_ancestor(), err.path()) {
289             return Error::WithDepth {
290                 depth: depth,
291                 err: Box::new(Error::Loop {
292                     ancestor: anc.to_path_buf(),
293                     child: child.to_path_buf(),
294                 }),
295             };
296         }
297         let path = err.path().map(|p| p.to_path_buf());
298         let mut ig_err = Error::Io(io::Error::from(err));
299         if let Some(path) = path {
300             ig_err = Error::WithPath { path: path, err: Box::new(ig_err) };
301         }
302         ig_err
303     }
304 }
305 
306 impl error::Error for Error {
307     #[allow(deprecated)]
description(&self) -> &str308     fn description(&self) -> &str {
309         match *self {
310             Error::Partial(_) => "partial error",
311             Error::WithLineNumber { ref err, .. } => err.description(),
312             Error::WithPath { ref err, .. } => err.description(),
313             Error::WithDepth { ref err, .. } => err.description(),
314             Error::Loop { .. } => "file system loop found",
315             Error::Io(ref err) => err.description(),
316             Error::Glob { ref err, .. } => err,
317             Error::UnrecognizedFileType(_) => "unrecognized file type",
318             Error::InvalidDefinition => "invalid definition",
319         }
320     }
321 }
322 
323 impl fmt::Display for Error {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result324     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
325         match *self {
326             Error::Partial(ref errs) => {
327                 let msgs: Vec<String> =
328                     errs.iter().map(|err| err.to_string()).collect();
329                 write!(f, "{}", msgs.join("\n"))
330             }
331             Error::WithLineNumber { line, ref err } => {
332                 write!(f, "line {}: {}", line, err)
333             }
334             Error::WithPath { ref path, ref err } => {
335                 write!(f, "{}: {}", path.display(), err)
336             }
337             Error::WithDepth { ref err, .. } => err.fmt(f),
338             Error::Loop { ref ancestor, ref child } => write!(
339                 f,
340                 "File system loop found: \
341                            {} points to an ancestor {}",
342                 child.display(),
343                 ancestor.display()
344             ),
345             Error::Io(ref err) => err.fmt(f),
346             Error::Glob { glob: None, ref err } => write!(f, "{}", err),
347             Error::Glob { glob: Some(ref glob), ref err } => {
348                 write!(f, "error parsing glob '{}': {}", glob, err)
349             }
350             Error::UnrecognizedFileType(ref ty) => {
351                 write!(f, "unrecognized file type: {}", ty)
352             }
353             Error::InvalidDefinition => write!(
354                 f,
355                 "invalid definition (format is type:glob, e.g., \
356                            html:*.html)"
357             ),
358         }
359     }
360 }
361 
362 impl From<io::Error> for Error {
from(err: io::Error) -> Error363     fn from(err: io::Error) -> Error {
364         Error::Io(err)
365     }
366 }
367 
368 #[derive(Debug, Default)]
369 struct PartialErrorBuilder(Vec<Error>);
370 
371 impl PartialErrorBuilder {
push(&mut self, err: Error)372     fn push(&mut self, err: Error) {
373         self.0.push(err);
374     }
375 
push_ignore_io(&mut self, err: Error)376     fn push_ignore_io(&mut self, err: Error) {
377         if !err.is_io() {
378             self.push(err);
379         }
380     }
381 
maybe_push(&mut self, err: Option<Error>)382     fn maybe_push(&mut self, err: Option<Error>) {
383         if let Some(err) = err {
384             self.push(err);
385         }
386     }
387 
maybe_push_ignore_io(&mut self, err: Option<Error>)388     fn maybe_push_ignore_io(&mut self, err: Option<Error>) {
389         if let Some(err) = err {
390             self.push_ignore_io(err);
391         }
392     }
393 
into_error_option(mut self) -> Option<Error>394     fn into_error_option(mut self) -> Option<Error> {
395         if self.0.is_empty() {
396             None
397         } else if self.0.len() == 1 {
398             Some(self.0.pop().unwrap())
399         } else {
400             Some(Error::Partial(self.0))
401         }
402     }
403 }
404 
405 /// The result of a glob match.
406 ///
407 /// The type parameter `T` typically refers to a type that provides more
408 /// information about a particular match. For example, it might identify
409 /// the specific gitignore file and the specific glob pattern that caused
410 /// the match.
411 #[derive(Clone, Debug)]
412 pub enum Match<T> {
413     /// The path didn't match any glob.
414     None,
415     /// The highest precedent glob matched indicates the path should be
416     /// ignored.
417     Ignore(T),
418     /// The highest precedent glob matched indicates the path should be
419     /// whitelisted.
420     Whitelist(T),
421 }
422 
423 impl<T> Match<T> {
424     /// Returns true if the match result didn't match any globs.
is_none(&self) -> bool425     pub fn is_none(&self) -> bool {
426         match *self {
427             Match::None => true,
428             Match::Ignore(_) | Match::Whitelist(_) => false,
429         }
430     }
431 
432     /// Returns true if the match result implies the path should be ignored.
is_ignore(&self) -> bool433     pub fn is_ignore(&self) -> bool {
434         match *self {
435             Match::Ignore(_) => true,
436             Match::None | Match::Whitelist(_) => false,
437         }
438     }
439 
440     /// Returns true if the match result implies the path should be
441     /// whitelisted.
is_whitelist(&self) -> bool442     pub fn is_whitelist(&self) -> bool {
443         match *self {
444             Match::Whitelist(_) => true,
445             Match::None | Match::Ignore(_) => false,
446         }
447     }
448 
449     /// Inverts the match so that `Ignore` becomes `Whitelist` and
450     /// `Whitelist` becomes `Ignore`. A non-match remains the same.
invert(self) -> Match<T>451     pub fn invert(self) -> Match<T> {
452         match self {
453             Match::None => Match::None,
454             Match::Ignore(t) => Match::Whitelist(t),
455             Match::Whitelist(t) => Match::Ignore(t),
456         }
457     }
458 
459     /// Return the value inside this match if it exists.
inner(&self) -> Option<&T>460     pub fn inner(&self) -> Option<&T> {
461         match *self {
462             Match::None => None,
463             Match::Ignore(ref t) => Some(t),
464             Match::Whitelist(ref t) => Some(t),
465         }
466     }
467 
468     /// Apply the given function to the value inside this match.
469     ///
470     /// If the match has no value, then return the match unchanged.
map<U, F: FnOnce(T) -> U>(self, f: F) -> Match<U>471     pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Match<U> {
472         match self {
473             Match::None => Match::None,
474             Match::Ignore(t) => Match::Ignore(f(t)),
475             Match::Whitelist(t) => Match::Whitelist(f(t)),
476         }
477     }
478 
479     /// Return the match if it is not none. Otherwise, return other.
or(self, other: Self) -> Self480     pub fn or(self, other: Self) -> Self {
481         if self.is_none() {
482             other
483         } else {
484             self
485         }
486     }
487 }
488 
489 #[cfg(test)]
490 mod tests {
491     use std::env;
492     use std::error;
493     use std::fs;
494     use std::path::{Path, PathBuf};
495     use std::result;
496 
497     /// A convenient result type alias.
498     pub type Result<T> =
499         result::Result<T, Box<dyn error::Error + Send + Sync>>;
500 
501     macro_rules! err {
502         ($($tt:tt)*) => {
503             Box::<dyn error::Error + Send + Sync>::from(format!($($tt)*))
504         }
505     }
506 
507     /// A simple wrapper for creating a temporary directory that is
508     /// automatically deleted when it's dropped.
509     ///
510     /// We use this in lieu of tempfile because tempfile brings in too many
511     /// dependencies.
512     #[derive(Debug)]
513     pub struct TempDir(PathBuf);
514 
515     impl Drop for TempDir {
drop(&mut self)516         fn drop(&mut self) {
517             fs::remove_dir_all(&self.0).unwrap();
518         }
519     }
520 
521     impl TempDir {
522         /// Create a new empty temporary directory under the system's configured
523         /// temporary directory.
new() -> Result<TempDir>524         pub fn new() -> Result<TempDir> {
525             use std::sync::atomic::{AtomicUsize, Ordering};
526 
527             static TRIES: usize = 100;
528             static COUNTER: AtomicUsize = AtomicUsize::new(0);
529 
530             let tmpdir = env::temp_dir();
531             for _ in 0..TRIES {
532                 let count = COUNTER.fetch_add(1, Ordering::SeqCst);
533                 let path = tmpdir.join("rust-ignore").join(count.to_string());
534                 if path.is_dir() {
535                     continue;
536                 }
537                 fs::create_dir_all(&path).map_err(|e| {
538                     err!("failed to create {}: {}", path.display(), e)
539                 })?;
540                 return Ok(TempDir(path));
541             }
542             Err(err!("failed to create temp dir after {} tries", TRIES))
543         }
544 
545         /// Return the underlying path to this temporary directory.
path(&self) -> &Path546         pub fn path(&self) -> &Path {
547             &self.0
548         }
549     }
550 }
551