1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 //! Support for matching file paths against Unix shell style patterns.
12 //!
13 //! The `glob` and `glob_with` functions, in concert with the `Paths`
14 //! type, allow querying the filesystem for all files that match a particular
15 //! pattern - just like the libc `glob` function (for an example see the `glob`
16 //! documentation). The methods on the `Pattern` type provide functionality
17 //! for checking if individual paths match a particular pattern - in a similar
18 //! manner to the libc `fnmatch` function
19 //! For consistency across platforms, and for Windows support, this module
20 //! is implemented entirely in Rust rather than deferring to the libc
21 //! `glob`/`fnmatch` functions.
22
23 #![doc(html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
24 html_favicon_url = "https://www.rust-lang.org/favicon.ico",
25 html_root_url = "https://doc.rust-lang.org/glob/")]
26 #![cfg_attr(all(test, windows), feature(std_misc))]
27
28 use std::ascii::AsciiExt;
29 use std::cmp;
30 use std::fmt;
31 use std::fs;
32 use std::io::prelude::*;
33 use std::io;
34 use std::path::{self, Path, PathBuf, Component};
35 use std::str::FromStr;
36 use std::error::Error;
37
38 use PatternToken::{Char, AnyChar, AnySequence, AnyRecursiveSequence, AnyWithin};
39 use PatternToken::AnyExcept;
40 use CharSpecifier::{SingleChar, CharRange};
41 use MatchResult::{Match, SubPatternDoesntMatch, EntirePatternDoesntMatch};
42
43 /// An iterator that yields `Path`s from the filesystem that match a particular
44 /// pattern.
45 ///
46 /// Note that it yields `GlobResult` in order to report any `IoErrors` that may
47 /// arise during iteration. If a directory matches but is unreadable,
48 /// thereby preventing its contents from being checked for matches, a
49 /// `GlobError` is returned to express this.
50 ///
51 /// See the `glob` function for more details.
52 pub struct Paths {
53 dir_patterns: Vec<Pattern>,
54 require_dir: bool,
55 options: MatchOptions,
56 todo: Vec<Result<(PathBuf, usize), GlobError>>,
57 scope: Option<PathBuf>,
58 }
59
60 /// Return an iterator that produces all the Paths that match the given pattern,
61 /// which may be absolute or relative to the current working directory.
62 ///
63 /// This may return an error if the pattern is invalid.
64 ///
65 /// This method uses the default match options and is equivalent to calling
66 /// `glob_with(pattern, MatchOptions::new())`. Use `glob_with` directly if you
67 /// want to use non-default match options.
68 ///
69 /// When iterating, each result is a `GlobResult` which expresses the
70 /// possibility that there was an `IoError` when attempting to read the contents
71 /// of the matched path. In other words, each item returned by the iterator
72 /// will either be an `Ok(Path)` if the path matched, or an `Err(GlobError)` if
73 /// the path (partially) matched _but_ its contents could not be read in order
74 /// to determine if its contents matched.
75 ///
76 /// See the `Paths` documentation for more information.
77 ///
78 /// # Example
79 ///
80 /// Consider a directory `/media/pictures` containing only the files
81 /// `kittens.jpg`, `puppies.jpg` and `hamsters.gif`:
82 ///
83 /// ```rust
84 /// use glob::glob;
85 ///
86 /// for entry in glob("/media/pictures/*.jpg").unwrap() {
87 /// match entry {
88 /// Ok(path) => println!("{:?}", path.display()),
89 ///
90 /// // if the path matched but was unreadable,
91 /// // thereby preventing its contents from matching
92 /// Err(e) => println!("{:?}", e),
93 /// }
94 /// }
95 /// ```
96 ///
97 /// The above code will print:
98 ///
99 /// ```ignore
100 /// /media/pictures/kittens.jpg
101 /// /media/pictures/puppies.jpg
102 /// ```
103 ///
104 /// If you want to ignore unreadable paths, you can use something like
105 /// `filter_map`:
106 ///
107 /// ```rust
108 /// use glob::glob;
109 /// use std::result::Result;
110 ///
111 /// for path in glob("/media/pictures/*.jpg").unwrap().filter_map(Result::ok) {
112 /// println!("{}", path.display());
113 /// }
114 /// ```
115 ///
glob(pattern: &str) -> Result<Paths, PatternError>116 pub fn glob(pattern: &str) -> Result<Paths, PatternError> {
117 glob_with(pattern, &MatchOptions::new())
118 }
119
120 /// Return an iterator that produces all the Paths that match the given pattern,
121 /// which may be absolute or relative to the current working directory.
122 ///
123 /// This may return an error if the pattern is invalid.
124 ///
125 /// This function accepts Unix shell style patterns as described by
126 /// `Pattern::new(..)`. The options given are passed through unchanged to
127 /// `Pattern::matches_with(..)` with the exception that
128 /// `require_literal_separator` is always set to `true` regardless of the value
129 /// passed to this function.
130 ///
131 /// Paths are yielded in alphabetical order.
glob_with(pattern: &str, options: &MatchOptions) -> Result<Paths, PatternError>132 pub fn glob_with(pattern: &str, options: &MatchOptions) -> Result<Paths, PatternError> {
133 // make sure that the pattern is valid first, else early return with error
134 let _compiled = try!(Pattern::new(pattern));
135
136 #[cfg(windows)]
137 fn check_windows_verbatim(p: &Path) -> bool {
138 use std::path::Prefix;
139 match p.components().next() {
140 Some(Component::Prefix(ref p)) => p.kind().is_verbatim(),
141 _ => false,
142 }
143 }
144 #[cfg(not(windows))]
145 fn check_windows_verbatim(_: &Path) -> bool {
146 false
147 }
148
149 #[cfg(windows)]
150 fn to_scope(p: &Path) -> PathBuf {
151 // FIXME handle volume relative paths here
152 p.to_path_buf()
153 }
154 #[cfg(not(windows))]
155 fn to_scope(p: &Path) -> PathBuf {
156 p.to_path_buf()
157 }
158
159 let mut components = Path::new(pattern).components().peekable();
160 loop {
161 match components.peek() {
162 Some(&Component::Prefix(..)) |
163 Some(&Component::RootDir) => {
164 components.next();
165 }
166 _ => break,
167 }
168 }
169 let rest = components.map(|s| s.as_os_str()).collect::<PathBuf>();
170 let normalized_pattern = Path::new(pattern).iter().collect::<PathBuf>();
171 let root_len = normalized_pattern.to_str().unwrap().len() - rest.to_str().unwrap().len();
172 let root = if root_len > 0 {
173 Some(Path::new(&pattern[..root_len]))
174 } else {
175 None
176 };
177
178 if root_len > 0 && check_windows_verbatim(root.unwrap()) {
179 // FIXME: How do we want to handle verbatim paths? I'm inclined to
180 // return nothing, since we can't very well find all UNC shares with a
181 // 1-letter server name.
182 return Ok(Paths {
183 dir_patterns: Vec::new(),
184 require_dir: false,
185 options: options.clone(),
186 todo: Vec::new(),
187 scope: None,
188 });
189 }
190
191 let scope = root.map(to_scope).unwrap_or_else(|| PathBuf::from("."));
192
193 let mut dir_patterns = Vec::new();
194 let components = pattern[cmp::min(root_len, pattern.len())..]
195 .split_terminator(path::is_separator);
196
197 for component in components {
198 let compiled = try!(Pattern::new(component));
199 dir_patterns.push(compiled);
200 }
201
202 if root_len == pattern.len() {
203 dir_patterns.push(Pattern {
204 original: "".to_string(),
205 tokens: Vec::new(),
206 is_recursive: false,
207 });
208 }
209
210 let require_dir = pattern.chars().next_back().map(path::is_separator) == Some(true);
211 let todo = Vec::new();
212
213 Ok(Paths {
214 dir_patterns: dir_patterns,
215 require_dir: require_dir,
216 options: options.clone(),
217 todo: todo,
218 scope: Some(scope),
219 })
220 }
221
222 /// A glob iteration error.
223 ///
224 /// This is typically returned when a particular path cannot be read
225 /// to determine if its contents match the glob pattern. This is possible
226 /// if the program lacks the permissions, for example.
227 #[derive(Debug)]
228 pub struct GlobError {
229 path: PathBuf,
230 error: io::Error,
231 }
232
233 impl GlobError {
234 /// The Path that the error corresponds to.
path(&self) -> &Path235 pub fn path(&self) -> &Path {
236 &self.path
237 }
238
239 /// The error in question.
error(&self) -> &io::Error240 pub fn error(&self) -> &io::Error {
241 &self.error
242 }
243 }
244
245 impl Error for GlobError {
description(&self) -> &str246 fn description(&self) -> &str {
247 self.error.description()
248 }
cause(&self) -> Option<&Error>249 fn cause(&self) -> Option<&Error> {
250 Some(&self.error)
251 }
252 }
253
254 impl fmt::Display for GlobError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result255 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
256 write!(f,
257 "attempting to read `{}` resulted in an error: {}",
258 self.path.display(),
259 self.error)
260 }
261 }
262
is_dir(p: &Path) -> bool263 fn is_dir(p: &Path) -> bool {
264 fs::metadata(p).map(|m| m.is_dir()).unwrap_or(false)
265 }
266
267 /// An alias for a glob iteration result.
268 ///
269 /// This represents either a matched path or a glob iteration error,
270 /// such as failing to read a particular directory's contents.
271 pub type GlobResult = Result<PathBuf, GlobError>;
272
273 impl Iterator for Paths {
274 type Item = GlobResult;
275
next(&mut self) -> Option<GlobResult>276 fn next(&mut self) -> Option<GlobResult> {
277 // the todo buffer hasn't been initialized yet, so it's done at this
278 // point rather than in glob() so that the errors are unified that is,
279 // failing to fill the buffer is an iteration error construction of the
280 // iterator (i.e. glob()) only fails if it fails to compile the Pattern
281 if let Some(scope) = self.scope.take() {
282 if self.dir_patterns.len() > 0 {
283 // Shouldn't happen, but we're using -1 as a special index.
284 assert!(self.dir_patterns.len() < !0 as usize);
285
286 fill_todo(&mut self.todo, &self.dir_patterns, 0, &scope, &self.options);
287 }
288 }
289
290 loop {
291 if self.dir_patterns.is_empty() || self.todo.is_empty() {
292 return None;
293 }
294
295 let (path, mut idx) = match self.todo.pop().unwrap() {
296 Ok(pair) => pair,
297 Err(e) => return Some(Err(e)),
298 };
299
300 // idx -1: was already checked by fill_todo, maybe path was '.' or
301 // '..' that we can't match here because of normalization.
302 if idx == !0 as usize {
303 if self.require_dir && !is_dir(&path) {
304 continue;
305 }
306 return Some(Ok(path));
307 }
308
309 if self.dir_patterns[idx].is_recursive {
310 let mut next = idx;
311
312 // collapse consecutive recursive patterns
313 while (next + 1) < self.dir_patterns.len() &&
314 self.dir_patterns[next + 1].is_recursive {
315 next += 1;
316 }
317
318 if is_dir(&path) {
319 // the path is a directory, so it's a match
320
321 // push this directory's contents
322 fill_todo(&mut self.todo,
323 &self.dir_patterns,
324 next,
325 &path,
326 &self.options);
327
328 if next == self.dir_patterns.len() - 1 {
329 // pattern ends in recursive pattern, so return this
330 // directory as a result
331 return Some(Ok(path));
332 } else {
333 // advanced to the next pattern for this path
334 idx = next + 1;
335 }
336 } else if next != self.dir_patterns.len() - 1 {
337 // advanced to the next pattern for this path
338 idx = next + 1;
339 } else {
340 // not a directory and it's the last pattern, meaning no match
341 continue;
342 }
343 }
344
345 // not recursive, so match normally
346 if self.dir_patterns[idx].matches_with({
347 match path.file_name().and_then(|s| s.to_str()) {
348 // FIXME (#9639): How do we handle non-utf8 filenames?
349 // Ignore them for now Ideally we'd still match them
350 // against a *
351 None => continue,
352 Some(x) => x
353 }
354 }, &self.options) {
355 if idx == self.dir_patterns.len() - 1 {
356 // it is not possible for a pattern to match a directory
357 // *AND* its children so we don't need to check the
358 // children
359
360 if !self.require_dir || is_dir(&path) {
361 return Some(Ok(path));
362 }
363 } else {
364 fill_todo(&mut self.todo, &self.dir_patterns,
365 idx + 1, &path, &self.options);
366 }
367 }
368 }
369 }
370 }
371
372 /// A pattern parsing error.
373 #[derive(Debug)]
374 #[allow(missing_copy_implementations)]
375 pub struct PatternError {
376 /// The approximate character index of where the error occurred.
377 pub pos: usize,
378
379 /// A message describing the error.
380 pub msg: &'static str,
381 }
382
383 impl Error for PatternError {
description(&self) -> &str384 fn description(&self) -> &str {
385 self.msg
386 }
387 }
388
389 impl fmt::Display for PatternError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result390 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
391 write!(f,
392 "Pattern syntax error near position {}: {}",
393 self.pos,
394 self.msg)
395 }
396 }
397
398 /// A compiled Unix shell style pattern.
399 ///
400 /// `?` matches any single character
401 ///
402 /// `*` matches any (possibly empty) sequence of characters
403 ///
404 /// `**` matches the current directory and arbitrary subdirectories. This
405 /// sequence **must** form a single path component, so both `**a` and `b**` are
406 /// invalid and will result in an error. A sequence of more than two
407 /// consecutive `*` characters is also invalid.
408 ///
409 /// `[...]` matches any character inside the brackets.
410 /// Character sequences can also specify ranges
411 /// of characters, as ordered by Unicode, so e.g. `[0-9]` specifies any
412 /// character between 0 and 9 inclusive. An unclosed bracket is invalid.
413 ///
414 /// `[!...]` is the negation of `[...]`, i.e. it matches any characters **not**
415 /// in the brackets.
416 ///
417 /// The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets
418 /// (e.g. `[?]`). When a `]` occurs immediately following `[` or `[!` then
419 /// it is interpreted as being part of, rather then ending, the character
420 /// set, so `]` and NOT `]` can be matched by `[]]` and `[!]]` respectively.
421 /// The `-` character can be specified inside a character sequence pattern by
422 /// placing it at the start or the end, e.g. `[abc-]`.
423 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
424 pub struct Pattern {
425 original: String,
426 tokens: Vec<PatternToken>,
427 is_recursive: bool,
428 }
429
430 /// Show the original glob pattern.
431 impl fmt::Display for Pattern {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result432 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
433 self.original.fmt(f)
434 }
435 }
436
437 impl FromStr for Pattern {
438 type Err = PatternError;
439
from_str(s: &str) -> Result<Pattern, PatternError>440 fn from_str(s: &str) -> Result<Pattern, PatternError> {
441 Pattern::new(s)
442 }
443 }
444
445 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
446 enum PatternToken {
447 Char(char),
448 AnyChar,
449 AnySequence,
450 AnyRecursiveSequence,
451 AnyWithin(Vec<CharSpecifier>),
452 AnyExcept(Vec<CharSpecifier>),
453 }
454
455 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
456 enum CharSpecifier {
457 SingleChar(char),
458 CharRange(char, char),
459 }
460
461 #[derive(Copy, Clone, PartialEq)]
462 enum MatchResult {
463 Match,
464 SubPatternDoesntMatch,
465 EntirePatternDoesntMatch,
466 }
467
468 const ERROR_WILDCARDS: &'static str = "wildcards are either regular `*` or recursive `**`";
469 const ERROR_RECURSIVE_WILDCARDS: &'static str = "recursive wildcards must form a single path \
470 component";
471 const ERROR_INVALID_RANGE: &'static str = "invalid range pattern";
472
473 impl Pattern {
474 /// This function compiles Unix shell style patterns.
475 ///
476 /// An invalid glob pattern will yield an error.
new(pattern: &str) -> Result<Pattern, PatternError>477 pub fn new(pattern: &str) -> Result<Pattern, PatternError> {
478
479 let chars = pattern.chars().collect::<Vec<_>>();
480 let mut tokens = Vec::new();
481 let mut is_recursive = false;
482 let mut i = 0;
483
484 while i < chars.len() {
485 match chars[i] {
486 '?' => {
487 tokens.push(AnyChar);
488 i += 1;
489 }
490 '*' => {
491 let old = i;
492
493 while i < chars.len() && chars[i] == '*' {
494 i += 1;
495 }
496
497 let count = i - old;
498
499 if count > 2 {
500 return Err(PatternError {
501 pos: old + 2,
502 msg: ERROR_WILDCARDS,
503 });
504 } else if count == 2 {
505 // ** can only be an entire path component
506 // i.e. a/**/b is valid, but a**/b or a/**b is not
507 // invalid matches are treated literally
508 let is_valid = if i == 2 || path::is_separator(chars[i - count - 1]) {
509 // it ends in a '/'
510 if i < chars.len() && path::is_separator(chars[i]) {
511 i += 1;
512 true
513 // or the pattern ends here
514 // this enables the existing globbing mechanism
515 } else if i == chars.len() {
516 true
517 // `**` ends in non-separator
518 } else {
519 return Err(PatternError {
520 pos: i,
521 msg: ERROR_RECURSIVE_WILDCARDS,
522 });
523 }
524 // `**` begins with non-separator
525 } else {
526 return Err(PatternError {
527 pos: old - 1,
528 msg: ERROR_RECURSIVE_WILDCARDS,
529 });
530 };
531
532 let tokens_len = tokens.len();
533
534 if is_valid {
535 // collapse consecutive AnyRecursiveSequence to a
536 // single one
537 if !(tokens_len > 1 && tokens[tokens_len - 1] == AnyRecursiveSequence) {
538 is_recursive = true;
539 tokens.push(AnyRecursiveSequence);
540 }
541 }
542 } else {
543 tokens.push(AnySequence);
544 }
545 }
546 '[' => {
547
548 if i + 4 <= chars.len() && chars[i + 1] == '!' {
549 match chars[i + 3..].iter().position(|x| *x == ']') {
550 None => (),
551 Some(j) => {
552 let chars = &chars[i + 2..i + 3 + j];
553 let cs = parse_char_specifiers(chars);
554 tokens.push(AnyExcept(cs));
555 i += j + 4;
556 continue;
557 }
558 }
559 } else if i + 3 <= chars.len() && chars[i + 1] != '!' {
560 match chars[i + 2..].iter().position(|x| *x == ']') {
561 None => (),
562 Some(j) => {
563 let cs = parse_char_specifiers(&chars[i + 1..i + 2 + j]);
564 tokens.push(AnyWithin(cs));
565 i += j + 3;
566 continue;
567 }
568 }
569 }
570
571 // if we get here then this is not a valid range pattern
572 return Err(PatternError {
573 pos: i,
574 msg: ERROR_INVALID_RANGE,
575 });
576 }
577 c => {
578 tokens.push(Char(c));
579 i += 1;
580 }
581 }
582 }
583
584 Ok(Pattern {
585 tokens: tokens,
586 original: pattern.to_string(),
587 is_recursive: is_recursive,
588 })
589 }
590
591 /// Escape metacharacters within the given string by surrounding them in
592 /// brackets. The resulting string will, when compiled into a `Pattern`,
593 /// match the input string and nothing else.
escape(s: &str) -> String594 pub fn escape(s: &str) -> String {
595 let mut escaped = String::new();
596 for c in s.chars() {
597 match c {
598 // note that ! does not need escaping because it is only special
599 // inside brackets
600 '?' | '*' | '[' | ']' => {
601 escaped.push('[');
602 escaped.push(c);
603 escaped.push(']');
604 }
605 c => {
606 escaped.push(c);
607 }
608 }
609 }
610 escaped
611 }
612
613 /// Return if the given `str` matches this `Pattern` using the default
614 /// match options (i.e. `MatchOptions::new()`).
615 ///
616 /// # Example
617 ///
618 /// ```rust
619 /// use glob::Pattern;
620 ///
621 /// assert!(Pattern::new("c?t").unwrap().matches("cat"));
622 /// assert!(Pattern::new("k[!e]tteh").unwrap().matches("kitteh"));
623 /// assert!(Pattern::new("d*g").unwrap().matches("doog"));
624 /// ```
matches(&self, str: &str) -> bool625 pub fn matches(&self, str: &str) -> bool {
626 self.matches_with(str, &MatchOptions::new())
627 }
628
629 /// Return if the given `Path`, when converted to a `str`, matches this
630 /// `Pattern` using the default match options (i.e. `MatchOptions::new()`).
matches_path(&self, path: &Path) -> bool631 pub fn matches_path(&self, path: &Path) -> bool {
632 // FIXME (#9639): This needs to handle non-utf8 paths
633 path.to_str().map_or(false, |s| self.matches(s))
634 }
635
636 /// Return if the given `str` matches this `Pattern` using the specified
637 /// match options.
matches_with(&self, str: &str, options: &MatchOptions) -> bool638 pub fn matches_with(&self, str: &str, options: &MatchOptions) -> bool {
639 self.matches_from(true, str.chars(), 0, options) == Match
640 }
641
642 /// Return if the given `Path`, when converted to a `str`, matches this
643 /// `Pattern` using the specified match options.
matches_path_with(&self, path: &Path, options: &MatchOptions) -> bool644 pub fn matches_path_with(&self, path: &Path, options: &MatchOptions) -> bool {
645 // FIXME (#9639): This needs to handle non-utf8 paths
646 path.to_str().map_or(false, |s| self.matches_with(s, options))
647 }
648
649 /// Access the original glob pattern.
as_str<'a>(&'a self) -> &'a str650 pub fn as_str<'a>(&'a self) -> &'a str {
651 &self.original
652 }
653
matches_from(&self, mut follows_separator: bool, mut file: std::str::Chars, i: usize, options: &MatchOptions) -> MatchResult654 fn matches_from(&self,
655 mut follows_separator: bool,
656 mut file: std::str::Chars,
657 i: usize,
658 options: &MatchOptions)
659 -> MatchResult {
660
661 for (ti, token) in self.tokens[i..].iter().enumerate() {
662 match *token {
663 AnySequence | AnyRecursiveSequence => {
664 // ** must be at the start.
665 debug_assert!(match *token {
666 AnyRecursiveSequence => follows_separator,
667 _ => true,
668 });
669
670 // Empty match
671 match self.matches_from(follows_separator, file.clone(), i + ti + 1, options) {
672 SubPatternDoesntMatch => (), // keep trying
673 m => return m,
674 };
675
676 while let Some(c) = file.next() {
677 if follows_separator && options.require_literal_leading_dot && c == '.' {
678 return SubPatternDoesntMatch;
679 }
680 follows_separator = path::is_separator(c);
681 match *token {
682 AnyRecursiveSequence if !follows_separator => continue,
683 AnySequence if options.require_literal_separator &&
684 follows_separator => return SubPatternDoesntMatch,
685 _ => (),
686 }
687 match self.matches_from(follows_separator,
688 file.clone(),
689 i + ti + 1,
690 options) {
691 SubPatternDoesntMatch => (), // keep trying
692 m => return m,
693 }
694 }
695 }
696 _ => {
697 let c = match file.next() {
698 Some(c) => c,
699 None => return EntirePatternDoesntMatch,
700 };
701
702 let is_sep = path::is_separator(c);
703
704 if !match *token {
705 AnyChar | AnyWithin(..) | AnyExcept(..)
706 if (options.require_literal_separator && is_sep) ||
707 (follows_separator && options.require_literal_leading_dot &&
708 c == '.') => false,
709 AnyChar => true,
710 AnyWithin(ref specifiers) => in_char_specifiers(&specifiers, c, options),
711 AnyExcept(ref specifiers) => !in_char_specifiers(&specifiers, c, options),
712 Char(c2) => chars_eq(c, c2, options.case_sensitive),
713 AnySequence | AnyRecursiveSequence => unreachable!(),
714 } {
715 return SubPatternDoesntMatch;
716 }
717 follows_separator = is_sep;
718 }
719 }
720 }
721
722 // Iter is fused.
723 if file.next().is_none() {
724 Match
725 } else {
726 SubPatternDoesntMatch
727 }
728 }
729 }
730
731 // Fills `todo` with paths under `path` to be matched by `patterns[idx]`,
732 // special-casing patterns to match `.` and `..`, and avoiding `readdir()`
733 // calls when there are no metacharacters in the pattern.
fill_todo(todo: &mut Vec<Result<(PathBuf, usize), GlobError>>, patterns: &[Pattern], idx: usize, path: &Path, options: &MatchOptions)734 fn fill_todo(todo: &mut Vec<Result<(PathBuf, usize), GlobError>>,
735 patterns: &[Pattern],
736 idx: usize,
737 path: &Path,
738 options: &MatchOptions) {
739 // convert a pattern that's just many Char(_) to a string
740 fn pattern_as_str(pattern: &Pattern) -> Option<String> {
741 let mut s = String::new();
742 for token in pattern.tokens.iter() {
743 match *token {
744 Char(c) => s.push(c),
745 _ => return None,
746 }
747 }
748 return Some(s);
749 }
750
751 let add = |todo: &mut Vec<_>, next_path: PathBuf| {
752 if idx + 1 == patterns.len() {
753 // We know it's good, so don't make the iterator match this path
754 // against the pattern again. In particular, it can't match
755 // . or .. globs since these never show up as path components.
756 todo.push(Ok((next_path, !0 as usize)));
757 } else {
758 fill_todo(todo, patterns, idx + 1, &next_path, options);
759 }
760 };
761
762 let pattern = &patterns[idx];
763 let is_dir = is_dir(path);
764 let curdir = path == Path::new(".");
765 match pattern_as_str(pattern) {
766 Some(s) => {
767 // This pattern component doesn't have any metacharacters, so we
768 // don't need to read the current directory to know where to
769 // continue. So instead of passing control back to the iterator,
770 // we can just check for that one entry and potentially recurse
771 // right away.
772 let special = "." == s || ".." == s;
773 let next_path = if curdir {
774 PathBuf::from(s)
775 } else {
776 path.join(&s)
777 };
778 if (special && is_dir) || (!special && fs::metadata(&next_path).is_ok()) {
779 add(todo, next_path);
780 }
781 }
782 None if is_dir => {
783 let dirs = fs::read_dir(path).and_then(|d| {
784 d.map(|e| {
785 e.map(|e| {
786 if curdir {
787 PathBuf::from(e.path().file_name().unwrap())
788 } else {
789 e.path()
790 }
791 })
792 })
793 .collect::<Result<Vec<_>, _>>()
794 });
795 match dirs {
796 Ok(mut children) => {
797 children.sort_by(|p1, p2| p2.file_name().cmp(&p1.file_name()));
798 todo.extend(children.into_iter().map(|x| Ok((x, idx))));
799
800 // Matching the special directory entries . and .. that
801 // refer to the current and parent directory respectively
802 // requires that the pattern has a leading dot, even if the
803 // `MatchOptions` field `require_literal_leading_dot` is not
804 // set.
805 if pattern.tokens.len() > 0 && pattern.tokens[0] == Char('.') {
806 for &special in [".", ".."].iter() {
807 if pattern.matches_with(special, options) {
808 add(todo, path.join(special));
809 }
810 }
811 }
812 }
813 Err(e) => {
814 todo.push(Err(GlobError {
815 path: path.to_path_buf(),
816 error: e,
817 }));
818 }
819 }
820 }
821 None => {
822 // not a directory, nothing more to find
823 }
824 }
825 }
826
parse_char_specifiers(s: &[char]) -> Vec<CharSpecifier>827 fn parse_char_specifiers(s: &[char]) -> Vec<CharSpecifier> {
828 let mut cs = Vec::new();
829 let mut i = 0;
830 while i < s.len() {
831 if i + 3 <= s.len() && s[i + 1] == '-' {
832 cs.push(CharRange(s[i], s[i + 2]));
833 i += 3;
834 } else {
835 cs.push(SingleChar(s[i]));
836 i += 1;
837 }
838 }
839 cs
840 }
841
in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: &MatchOptions) -> bool842 fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: &MatchOptions) -> bool {
843
844 for &specifier in specifiers.iter() {
845 match specifier {
846 SingleChar(sc) => {
847 if chars_eq(c, sc, options.case_sensitive) {
848 return true;
849 }
850 }
851 CharRange(start, end) => {
852
853 // FIXME: work with non-ascii chars properly (issue #1347)
854 if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() {
855
856 let start = start.to_ascii_lowercase();
857 let end = end.to_ascii_lowercase();
858
859 let start_up = start.to_uppercase().next().unwrap();
860 let end_up = end.to_uppercase().next().unwrap();
861
862 // only allow case insensitive matching when
863 // both start and end are within a-z or A-Z
864 if start != start_up && end != end_up {
865 let c = c.to_ascii_lowercase();
866 if c >= start && c <= end {
867 return true;
868 }
869 }
870 }
871
872 if c >= start && c <= end {
873 return true;
874 }
875 }
876 }
877 }
878
879 false
880 }
881
882 /// A helper function to determine if two chars are (possibly case-insensitively) equal.
chars_eq(a: char, b: char, case_sensitive: bool) -> bool883 fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool {
884 if cfg!(windows) && path::is_separator(a) && path::is_separator(b) {
885 true
886 } else if !case_sensitive && a.is_ascii() && b.is_ascii() {
887 // FIXME: work with non-ascii chars properly (issue #9084)
888 a.to_ascii_lowercase() == b.to_ascii_lowercase()
889 } else {
890 a == b
891 }
892 }
893
894
895 /// Configuration options to modify the behaviour of `Pattern::matches_with(..)`
896 #[allow(missing_copy_implementations)]
897 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
898 pub struct MatchOptions {
899 /// Whether or not patterns should be matched in a case-sensitive manner.
900 /// This currently only considers upper/lower case relationships between
901 /// ASCII characters, but in future this might be extended to work with
902 /// Unicode.
903 pub case_sensitive: bool,
904
905 /// If this is true then path-component separator characters (e.g. `/` on
906 /// Posix) must be matched by a literal `/`, rather than by `*` or `?` or
907 /// `[...]`
908 pub require_literal_separator: bool,
909
910 /// If this is true then paths that contain components that start with a `.`
911 /// will not match unless the `.` appears literally in the pattern: `*`, `?`, `**`,
912 /// or `[...]` will not match. This is useful because such files are
913 /// conventionally considered hidden on Unix systems and it might be
914 /// desirable to skip them when listing files.
915 pub require_literal_leading_dot: bool,
916 }
917
918 impl MatchOptions {
919 /// Constructs a new `MatchOptions` with default field values. This is used
920 /// when calling functions that do not take an explicit `MatchOptions`
921 /// parameter.
922 ///
923 /// This function always returns this value:
924 ///
925 /// ```rust,ignore
926 /// MatchOptions {
927 /// case_sensitive: true,
928 /// require_literal_separator: false.
929 /// require_literal_leading_dot: false
930 /// }
931 /// ```
new() -> MatchOptions932 pub fn new() -> MatchOptions {
933 MatchOptions {
934 case_sensitive: true,
935 require_literal_separator: false,
936 require_literal_leading_dot: false,
937 }
938 }
939 }
940
941 #[cfg(test)]
942 mod test {
943 use std::path::Path;
944 use super::{glob, Pattern, MatchOptions};
945
946 #[test]
test_pattern_from_str()947 fn test_pattern_from_str() {
948 assert!("a*b".parse::<Pattern>().unwrap().matches("a_b"));
949 assert!("a/**b".parse::<Pattern>().unwrap_err().pos == 4);
950 }
951
952 #[test]
test_wildcard_errors()953 fn test_wildcard_errors() {
954 assert!(Pattern::new("a/**b").unwrap_err().pos == 4);
955 assert!(Pattern::new("a/bc**").unwrap_err().pos == 3);
956 assert!(Pattern::new("a/*****").unwrap_err().pos == 4);
957 assert!(Pattern::new("a/b**c**d").unwrap_err().pos == 2);
958 assert!(Pattern::new("a**b").unwrap_err().pos == 0);
959 }
960
961 #[test]
test_unclosed_bracket_errors()962 fn test_unclosed_bracket_errors() {
963 assert!(Pattern::new("abc[def").unwrap_err().pos == 3);
964 assert!(Pattern::new("abc[!def").unwrap_err().pos == 3);
965 assert!(Pattern::new("abc[").unwrap_err().pos == 3);
966 assert!(Pattern::new("abc[!").unwrap_err().pos == 3);
967 assert!(Pattern::new("abc[d").unwrap_err().pos == 3);
968 assert!(Pattern::new("abc[!d").unwrap_err().pos == 3);
969 assert!(Pattern::new("abc[]").unwrap_err().pos == 3);
970 assert!(Pattern::new("abc[!]").unwrap_err().pos == 3);
971 }
972
973 #[test]
test_glob_errors()974 fn test_glob_errors() {
975 assert!(glob("a/**b").err().unwrap().pos == 4);
976 assert!(glob("abc[def").err().unwrap().pos == 3);
977 }
978
979 // this test assumes that there is a /root directory and that
980 // the user running this test is not root or otherwise doesn't
981 // have permission to read its contents
982 #[cfg(unix)]
983 #[test]
test_iteration_errors()984 fn test_iteration_errors() {
985 use std::io;
986 let mut iter = glob("/root/*").unwrap();
987
988 // GlobErrors shouldn't halt iteration
989 let next = iter.next();
990 assert!(next.is_some());
991
992 let err = next.unwrap();
993 assert!(err.is_err());
994
995 let err = err.err().unwrap();
996 assert!(err.path() == Path::new("/root"));
997 assert!(err.error().kind() == io::ErrorKind::PermissionDenied);
998 }
999
1000 #[test]
test_absolute_pattern()1001 fn test_absolute_pattern() {
1002 assert!(glob("/").unwrap().next().is_some());
1003 assert!(glob("//").unwrap().next().is_some());
1004
1005 // assume that the filesystem is not empty!
1006 assert!(glob("/*").unwrap().next().is_some());
1007
1008 #[cfg(not(windows))]
1009 fn win() {}
1010
1011 #[cfg(windows)]
1012 fn win() {
1013 use std::env::current_dir;
1014 use std::ffi::AsOsStr;
1015
1016 // check windows absolute paths with host/device components
1017 let root_with_device = current_dir()
1018 .ok()
1019 .and_then(|p| p.prefix().map(|p| p.join("*")))
1020 .unwrap();
1021 // FIXME (#9639): This needs to handle non-utf8 paths
1022 assert!(glob(root_with_device.as_os_str().to_str().unwrap()).unwrap().next().is_some());
1023 }
1024 win()
1025 }
1026
1027 #[test]
test_wildcards()1028 fn test_wildcards() {
1029 assert!(Pattern::new("a*b").unwrap().matches("a_b"));
1030 assert!(Pattern::new("a*b*c").unwrap().matches("abc"));
1031 assert!(!Pattern::new("a*b*c").unwrap().matches("abcd"));
1032 assert!(Pattern::new("a*b*c").unwrap().matches("a_b_c"));
1033 assert!(Pattern::new("a*b*c").unwrap().matches("a___b___c"));
1034 assert!(Pattern::new("abc*abc*abc").unwrap().matches("abcabcabcabcabcabcabc"));
1035 assert!(!Pattern::new("abc*abc*abc").unwrap().matches("abcabcabcabcabcabcabca"));
1036 assert!(Pattern::new("a*a*a*a*a*a*a*a*a")
1037 .unwrap()
1038 .matches("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
1039 assert!(Pattern::new("a*b[xyz]c*d").unwrap().matches("abxcdbxcddd"));
1040 }
1041
1042 #[test]
test_recursive_wildcards()1043 fn test_recursive_wildcards() {
1044 let pat = Pattern::new("some/**/needle.txt").unwrap();
1045 assert!(pat.matches("some/needle.txt"));
1046 assert!(pat.matches("some/one/needle.txt"));
1047 assert!(pat.matches("some/one/two/needle.txt"));
1048 assert!(pat.matches("some/other/needle.txt"));
1049 assert!(!pat.matches("some/other/notthis.txt"));
1050
1051 // a single ** should be valid, for globs
1052 // Should accept anything
1053 let pat = Pattern::new("**").unwrap();
1054 assert!(pat.is_recursive);
1055 assert!(pat.matches("abcde"));
1056 assert!(pat.matches(""));
1057 assert!(pat.matches(".asdf"));
1058 assert!(pat.matches("/x/.asdf"));
1059
1060
1061 // collapse consecutive wildcards
1062 let pat = Pattern::new("some/**/**/needle.txt").unwrap();
1063 assert!(pat.matches("some/needle.txt"));
1064 assert!(pat.matches("some/one/needle.txt"));
1065 assert!(pat.matches("some/one/two/needle.txt"));
1066 assert!(pat.matches("some/other/needle.txt"));
1067 assert!(!pat.matches("some/other/notthis.txt"));
1068
1069 // ** can begin the pattern
1070 let pat = Pattern::new("**/test").unwrap();
1071 assert!(pat.matches("one/two/test"));
1072 assert!(pat.matches("one/test"));
1073 assert!(pat.matches("test"));
1074
1075 // /** can begin the pattern
1076 let pat = Pattern::new("/**/test").unwrap();
1077 assert!(pat.matches("/one/two/test"));
1078 assert!(pat.matches("/one/test"));
1079 assert!(pat.matches("/test"));
1080 assert!(!pat.matches("/one/notthis"));
1081 assert!(!pat.matches("/notthis"));
1082
1083 // Only start sub-patterns on start of path segment.
1084 let pat = Pattern::new("**/.*").unwrap();
1085 assert!(pat.matches(".abc"));
1086 assert!(pat.matches("abc/.abc"));
1087 assert!(!pat.matches("ab.c"));
1088 assert!(!pat.matches("abc/ab.c"));
1089 }
1090
1091 #[test]
test_lots_of_files()1092 fn test_lots_of_files() {
1093 // this is a good test because it touches lots of differently named files
1094 glob("/*/*/*/*").unwrap().skip(10000).next();
1095 }
1096
1097 #[test]
test_range_pattern()1098 fn test_range_pattern() {
1099
1100 let pat = Pattern::new("a[0-9]b").unwrap();
1101 for i in 0..10 {
1102 assert!(pat.matches(&format!("a{}b", i)));
1103 }
1104 assert!(!pat.matches("a_b"));
1105
1106 let pat = Pattern::new("a[!0-9]b").unwrap();
1107 for i in 0..10 {
1108 assert!(!pat.matches(&format!("a{}b", i)));
1109 }
1110 assert!(pat.matches("a_b"));
1111
1112 let pats = ["[a-z123]", "[1a-z23]", "[123a-z]"];
1113 for &p in pats.iter() {
1114 let pat = Pattern::new(p).unwrap();
1115 for c in "abcdefghijklmnopqrstuvwxyz".chars() {
1116 assert!(pat.matches(&c.to_string()));
1117 }
1118 for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ".chars() {
1119 let options = MatchOptions { case_sensitive: false, ..MatchOptions::new() };
1120 assert!(pat.matches_with(&c.to_string(), &options));
1121 }
1122 assert!(pat.matches("1"));
1123 assert!(pat.matches("2"));
1124 assert!(pat.matches("3"));
1125 }
1126
1127 let pats = ["[abc-]", "[-abc]", "[a-c-]"];
1128 for &p in pats.iter() {
1129 let pat = Pattern::new(p).unwrap();
1130 assert!(pat.matches("a"));
1131 assert!(pat.matches("b"));
1132 assert!(pat.matches("c"));
1133 assert!(pat.matches("-"));
1134 assert!(!pat.matches("d"));
1135 }
1136
1137 let pat = Pattern::new("[2-1]").unwrap();
1138 assert!(!pat.matches("1"));
1139 assert!(!pat.matches("2"));
1140
1141 assert!(Pattern::new("[-]").unwrap().matches("-"));
1142 assert!(!Pattern::new("[!-]").unwrap().matches("-"));
1143 }
1144
1145 #[test]
test_pattern_matches()1146 fn test_pattern_matches() {
1147 let txt_pat = Pattern::new("*hello.txt").unwrap();
1148 assert!(txt_pat.matches("hello.txt"));
1149 assert!(txt_pat.matches("gareth_says_hello.txt"));
1150 assert!(txt_pat.matches("some/path/to/hello.txt"));
1151 assert!(txt_pat.matches("some\\path\\to\\hello.txt"));
1152 assert!(txt_pat.matches("/an/absolute/path/to/hello.txt"));
1153 assert!(!txt_pat.matches("hello.txt-and-then-some"));
1154 assert!(!txt_pat.matches("goodbye.txt"));
1155
1156 let dir_pat = Pattern::new("*some/path/to/hello.txt").unwrap();
1157 assert!(dir_pat.matches("some/path/to/hello.txt"));
1158 assert!(dir_pat.matches("a/bigger/some/path/to/hello.txt"));
1159 assert!(!dir_pat.matches("some/path/to/hello.txt-and-then-some"));
1160 assert!(!dir_pat.matches("some/other/path/to/hello.txt"));
1161 }
1162
1163 #[test]
test_pattern_escape()1164 fn test_pattern_escape() {
1165 let s = "_[_]_?_*_!_";
1166 assert_eq!(Pattern::escape(s), "_[[]_[]]_[?]_[*]_!_".to_string());
1167 assert!(Pattern::new(&Pattern::escape(s)).unwrap().matches(s));
1168 }
1169
1170 #[test]
test_pattern_matches_case_insensitive()1171 fn test_pattern_matches_case_insensitive() {
1172
1173 let pat = Pattern::new("aBcDeFg").unwrap();
1174 let options = MatchOptions {
1175 case_sensitive: false,
1176 require_literal_separator: false,
1177 require_literal_leading_dot: false,
1178 };
1179
1180 assert!(pat.matches_with("aBcDeFg", &options));
1181 assert!(pat.matches_with("abcdefg", &options));
1182 assert!(pat.matches_with("ABCDEFG", &options));
1183 assert!(pat.matches_with("AbCdEfG", &options));
1184 }
1185
1186 #[test]
test_pattern_matches_case_insensitive_range()1187 fn test_pattern_matches_case_insensitive_range() {
1188
1189 let pat_within = Pattern::new("[a]").unwrap();
1190 let pat_except = Pattern::new("[!a]").unwrap();
1191
1192 let options_case_insensitive = MatchOptions {
1193 case_sensitive: false,
1194 require_literal_separator: false,
1195 require_literal_leading_dot: false,
1196 };
1197 let options_case_sensitive = MatchOptions {
1198 case_sensitive: true,
1199 require_literal_separator: false,
1200 require_literal_leading_dot: false,
1201 };
1202
1203 assert!(pat_within.matches_with("a", &options_case_insensitive));
1204 assert!(pat_within.matches_with("A", &options_case_insensitive));
1205 assert!(!pat_within.matches_with("A", &options_case_sensitive));
1206
1207 assert!(!pat_except.matches_with("a", &options_case_insensitive));
1208 assert!(!pat_except.matches_with("A", &options_case_insensitive));
1209 assert!(pat_except.matches_with("A", &options_case_sensitive));
1210 }
1211
1212 #[test]
test_pattern_matches_require_literal_separator()1213 fn test_pattern_matches_require_literal_separator() {
1214
1215 let options_require_literal = MatchOptions {
1216 case_sensitive: true,
1217 require_literal_separator: true,
1218 require_literal_leading_dot: false,
1219 };
1220 let options_not_require_literal = MatchOptions {
1221 case_sensitive: true,
1222 require_literal_separator: false,
1223 require_literal_leading_dot: false,
1224 };
1225
1226 assert!(Pattern::new("abc/def").unwrap().matches_with("abc/def", &options_require_literal));
1227 assert!(!Pattern::new("abc?def")
1228 .unwrap()
1229 .matches_with("abc/def", &options_require_literal));
1230 assert!(!Pattern::new("abc*def")
1231 .unwrap()
1232 .matches_with("abc/def", &options_require_literal));
1233 assert!(!Pattern::new("abc[/]def")
1234 .unwrap()
1235 .matches_with("abc/def", &options_require_literal));
1236
1237 assert!(Pattern::new("abc/def")
1238 .unwrap()
1239 .matches_with("abc/def", &options_not_require_literal));
1240 assert!(Pattern::new("abc?def")
1241 .unwrap()
1242 .matches_with("abc/def", &options_not_require_literal));
1243 assert!(Pattern::new("abc*def")
1244 .unwrap()
1245 .matches_with("abc/def", &options_not_require_literal));
1246 assert!(Pattern::new("abc[/]def")
1247 .unwrap()
1248 .matches_with("abc/def", &options_not_require_literal));
1249 }
1250
1251 #[test]
test_pattern_matches_require_literal_leading_dot()1252 fn test_pattern_matches_require_literal_leading_dot() {
1253
1254 let options_require_literal_leading_dot = MatchOptions {
1255 case_sensitive: true,
1256 require_literal_separator: false,
1257 require_literal_leading_dot: true,
1258 };
1259 let options_not_require_literal_leading_dot = MatchOptions {
1260 case_sensitive: true,
1261 require_literal_separator: false,
1262 require_literal_leading_dot: false,
1263 };
1264
1265 let f = |options| Pattern::new("*.txt").unwrap().matches_with(".hello.txt", options);
1266 assert!(f(&options_not_require_literal_leading_dot));
1267 assert!(!f(&options_require_literal_leading_dot));
1268
1269 let f = |options| Pattern::new(".*.*").unwrap().matches_with(".hello.txt", options);
1270 assert!(f(&options_not_require_literal_leading_dot));
1271 assert!(f(&options_require_literal_leading_dot));
1272
1273 let f = |options| Pattern::new("aaa/bbb/*").unwrap().matches_with("aaa/bbb/.ccc", options);
1274 assert!(f(&options_not_require_literal_leading_dot));
1275 assert!(!f(&options_require_literal_leading_dot));
1276
1277 let f = |options| {
1278 Pattern::new("aaa/bbb/*").unwrap().matches_with("aaa/bbb/c.c.c.", options)
1279 };
1280 assert!(f(&options_not_require_literal_leading_dot));
1281 assert!(f(&options_require_literal_leading_dot));
1282
1283 let f = |options| Pattern::new("aaa/bbb/.*").unwrap().matches_with("aaa/bbb/.ccc", options);
1284 assert!(f(&options_not_require_literal_leading_dot));
1285 assert!(f(&options_require_literal_leading_dot));
1286
1287 let f = |options| Pattern::new("aaa/?bbb").unwrap().matches_with("aaa/.bbb", options);
1288 assert!(f(&options_not_require_literal_leading_dot));
1289 assert!(!f(&options_require_literal_leading_dot));
1290
1291 let f = |options| Pattern::new("aaa/[.]bbb").unwrap().matches_with("aaa/.bbb", options);
1292 assert!(f(&options_not_require_literal_leading_dot));
1293 assert!(!f(&options_require_literal_leading_dot));
1294
1295 let f = |options| Pattern::new("**/*").unwrap().matches_with(".bbb", options);
1296 assert!(f(&options_not_require_literal_leading_dot));
1297 assert!(!f(&options_require_literal_leading_dot));
1298 }
1299
1300 #[test]
test_matches_path()1301 fn test_matches_path() {
1302 // on windows, (Path::new("a/b").as_str().unwrap() == "a\\b"), so this
1303 // tests that / and \ are considered equivalent on windows
1304 assert!(Pattern::new("a/b").unwrap().matches_path(&Path::new("a/b")));
1305 }
1306
1307 #[test]
test_path_join()1308 fn test_path_join() {
1309 let pattern = Path::new("one").join(&Path::new("**/*.rs"));
1310 assert!(Pattern::new(pattern.to_str().unwrap()).is_ok());
1311 }
1312 }
1313