1 use std::fmt;
2 use std::hash;
3 use std::iter;
4 use std::ops::{Deref, DerefMut};
5 use std::path::{is_separator, Path};
6 use std::str;
7 
8 use regex;
9 use regex::bytes::Regex;
10 
11 use crate::{new_regex, Candidate, Error, ErrorKind};
12 
13 /// Describes a matching strategy for a particular pattern.
14 ///
15 /// This provides a way to more quickly determine whether a pattern matches
16 /// a particular file path in a way that scales with a large number of
17 /// patterns. For example, if many patterns are of the form `*.ext`, then it's
18 /// possible to test whether any of those patterns matches by looking up a
19 /// file path's extension in a hash table.
20 #[derive(Clone, Debug, Eq, PartialEq)]
21 pub enum MatchStrategy {
22     /// A pattern matches if and only if the entire file path matches this
23     /// literal string.
24     Literal(String),
25     /// A pattern matches if and only if the file path's basename matches this
26     /// literal string.
27     BasenameLiteral(String),
28     /// A pattern matches if and only if the file path's extension matches this
29     /// literal string.
30     Extension(String),
31     /// A pattern matches if and only if this prefix literal is a prefix of the
32     /// candidate file path.
33     Prefix(String),
34     /// A pattern matches if and only if this prefix literal is a prefix of the
35     /// candidate file path.
36     ///
37     /// An exception: if `component` is true, then `suffix` must appear at the
38     /// beginning of a file path or immediately following a `/`.
39     Suffix {
40         /// The actual suffix.
41         suffix: String,
42         /// Whether this must start at the beginning of a path component.
43         component: bool,
44     },
45     /// A pattern matches only if the given extension matches the file path's
46     /// extension. Note that this is a necessary but NOT sufficient criterion.
47     /// Namely, if the extension matches, then a full regex search is still
48     /// required.
49     RequiredExtension(String),
50     /// A regex needs to be used for matching.
51     Regex,
52 }
53 
54 impl MatchStrategy {
55     /// Returns a matching strategy for the given pattern.
new(pat: &Glob) -> MatchStrategy56     pub fn new(pat: &Glob) -> MatchStrategy {
57         if let Some(lit) = pat.basename_literal() {
58             MatchStrategy::BasenameLiteral(lit)
59         } else if let Some(lit) = pat.literal() {
60             MatchStrategy::Literal(lit)
61         } else if let Some(ext) = pat.ext() {
62             MatchStrategy::Extension(ext)
63         } else if let Some(prefix) = pat.prefix() {
64             MatchStrategy::Prefix(prefix)
65         } else if let Some((suffix, component)) = pat.suffix() {
66             MatchStrategy::Suffix { suffix: suffix, component: component }
67         } else if let Some(ext) = pat.required_ext() {
68             MatchStrategy::RequiredExtension(ext)
69         } else {
70             MatchStrategy::Regex
71         }
72     }
73 }
74 
75 /// Glob represents a successfully parsed shell glob pattern.
76 ///
77 /// It cannot be used directly to match file paths, but it can be converted
78 /// to a regular expression string or a matcher.
79 #[derive(Clone, Debug, Eq)]
80 pub struct Glob {
81     glob: String,
82     re: String,
83     opts: GlobOptions,
84     tokens: Tokens,
85 }
86 
87 impl PartialEq for Glob {
eq(&self, other: &Glob) -> bool88     fn eq(&self, other: &Glob) -> bool {
89         self.glob == other.glob && self.opts == other.opts
90     }
91 }
92 
93 impl hash::Hash for Glob {
hash<H: hash::Hasher>(&self, state: &mut H)94     fn hash<H: hash::Hasher>(&self, state: &mut H) {
95         self.glob.hash(state);
96         self.opts.hash(state);
97     }
98 }
99 
100 impl fmt::Display for Glob {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result101     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102         self.glob.fmt(f)
103     }
104 }
105 
106 impl str::FromStr for Glob {
107     type Err = Error;
108 
from_str(glob: &str) -> Result<Self, Self::Err>109     fn from_str(glob: &str) -> Result<Self, Self::Err> {
110         Self::new(glob)
111     }
112 }
113 
114 /// A matcher for a single pattern.
115 #[derive(Clone, Debug)]
116 pub struct GlobMatcher {
117     /// The underlying pattern.
118     pat: Glob,
119     /// The pattern, as a compiled regex.
120     re: Regex,
121 }
122 
123 impl GlobMatcher {
124     /// Tests whether the given path matches this pattern or not.
is_match<P: AsRef<Path>>(&self, path: P) -> bool125     pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
126         self.is_match_candidate(&Candidate::new(path.as_ref()))
127     }
128 
129     /// Tests whether the given path matches this pattern or not.
is_match_candidate(&self, path: &Candidate<'_>) -> bool130     pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
131         self.re.is_match(&path.path)
132     }
133 
134     /// Returns the `Glob` used to compile this matcher.
glob(&self) -> &Glob135     pub fn glob(&self) -> &Glob {
136         &self.pat
137     }
138 }
139 
140 /// A strategic matcher for a single pattern.
141 #[cfg(test)]
142 #[derive(Clone, Debug)]
143 struct GlobStrategic {
144     /// The match strategy to use.
145     strategy: MatchStrategy,
146     /// The underlying pattern.
147     pat: Glob,
148     /// The pattern, as a compiled regex.
149     re: Regex,
150 }
151 
152 #[cfg(test)]
153 impl GlobStrategic {
154     /// Tests whether the given path matches this pattern or not.
is_match<P: AsRef<Path>>(&self, path: P) -> bool155     fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
156         self.is_match_candidate(&Candidate::new(path.as_ref()))
157     }
158 
159     /// Tests whether the given path matches this pattern or not.
is_match_candidate(&self, candidate: &Candidate<'_>) -> bool160     fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
161         let byte_path = &*candidate.path;
162 
163         match self.strategy {
164             MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
165             MatchStrategy::BasenameLiteral(ref lit) => {
166                 lit.as_bytes() == &*candidate.basename
167             }
168             MatchStrategy::Extension(ref ext) => {
169                 ext.as_bytes() == &*candidate.ext
170             }
171             MatchStrategy::Prefix(ref pre) => {
172                 starts_with(pre.as_bytes(), byte_path)
173             }
174             MatchStrategy::Suffix { ref suffix, component } => {
175                 if component && byte_path == &suffix.as_bytes()[1..] {
176                     return true;
177                 }
178                 ends_with(suffix.as_bytes(), byte_path)
179             }
180             MatchStrategy::RequiredExtension(ref ext) => {
181                 let ext = ext.as_bytes();
182                 &*candidate.ext == ext && self.re.is_match(byte_path)
183             }
184             MatchStrategy::Regex => self.re.is_match(byte_path),
185         }
186     }
187 }
188 
189 /// A builder for a pattern.
190 ///
191 /// This builder enables configuring the match semantics of a pattern. For
192 /// example, one can make matching case insensitive.
193 ///
194 /// The lifetime `'a` refers to the lifetime of the pattern string.
195 #[derive(Clone, Debug)]
196 pub struct GlobBuilder<'a> {
197     /// The glob pattern to compile.
198     glob: &'a str,
199     /// Options for the pattern.
200     opts: GlobOptions,
201 }
202 
203 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
204 struct GlobOptions {
205     /// Whether to match case insensitively.
206     case_insensitive: bool,
207     /// Whether to require a literal separator to match a separator in a file
208     /// path. e.g., when enabled, `*` won't match `/`.
209     literal_separator: bool,
210     /// Whether or not to use `\` to escape special characters.
211     /// e.g., when enabled, `\*` will match a literal `*`.
212     backslash_escape: bool,
213 }
214 
215 impl GlobOptions {
default() -> GlobOptions216     fn default() -> GlobOptions {
217         GlobOptions {
218             case_insensitive: false,
219             literal_separator: false,
220             backslash_escape: !is_separator('\\'),
221         }
222     }
223 }
224 
225 #[derive(Clone, Debug, Default, Eq, PartialEq)]
226 struct Tokens(Vec<Token>);
227 
228 impl Deref for Tokens {
229     type Target = Vec<Token>;
deref(&self) -> &Vec<Token>230     fn deref(&self) -> &Vec<Token> {
231         &self.0
232     }
233 }
234 
235 impl DerefMut for Tokens {
deref_mut(&mut self) -> &mut Vec<Token>236     fn deref_mut(&mut self) -> &mut Vec<Token> {
237         &mut self.0
238     }
239 }
240 
241 #[derive(Clone, Debug, Eq, PartialEq)]
242 enum Token {
243     Literal(char),
244     Any,
245     ZeroOrMore,
246     RecursivePrefix,
247     RecursiveSuffix,
248     RecursiveZeroOrMore,
249     Class { negated: bool, ranges: Vec<(char, char)> },
250     Alternates(Vec<Tokens>),
251 }
252 
253 impl Glob {
254     /// Builds a new pattern with default options.
new(glob: &str) -> Result<Glob, Error>255     pub fn new(glob: &str) -> Result<Glob, Error> {
256         GlobBuilder::new(glob).build()
257     }
258 
259     /// Returns a matcher for this pattern.
compile_matcher(&self) -> GlobMatcher260     pub fn compile_matcher(&self) -> GlobMatcher {
261         let re =
262             new_regex(&self.re).expect("regex compilation shouldn't fail");
263         GlobMatcher { pat: self.clone(), re: re }
264     }
265 
266     /// Returns a strategic matcher.
267     ///
268     /// This isn't exposed because it's not clear whether it's actually
269     /// faster than just running a regex for a *single* pattern. If it
270     /// is faster, then GlobMatcher should do it automatically.
271     #[cfg(test)]
compile_strategic_matcher(&self) -> GlobStrategic272     fn compile_strategic_matcher(&self) -> GlobStrategic {
273         let strategy = MatchStrategy::new(self);
274         let re =
275             new_regex(&self.re).expect("regex compilation shouldn't fail");
276         GlobStrategic { strategy: strategy, pat: self.clone(), re: re }
277     }
278 
279     /// Returns the original glob pattern used to build this pattern.
glob(&self) -> &str280     pub fn glob(&self) -> &str {
281         &self.glob
282     }
283 
284     /// Returns the regular expression string for this glob.
285     ///
286     /// Note that regular expressions for globs are intended to be matched on
287     /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
288     /// particular, globs are frequently used on file paths, where there is no
289     /// general guarantee that file paths are themselves valid UTF-8. As a
290     /// result, callers will need to ensure that they are using a regex API
291     /// that can match on arbitrary bytes. For example, the
292     /// [`regex`](https://crates.io/regex)
293     /// crate's
294     /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
295     /// API is not suitable for this since it matches on `&str`, but its
296     /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
297     /// API is suitable for this.
regex(&self) -> &str298     pub fn regex(&self) -> &str {
299         &self.re
300     }
301 
302     /// Returns the pattern as a literal if and only if the pattern must match
303     /// an entire path exactly.
304     ///
305     /// The basic format of these patterns is `{literal}`.
literal(&self) -> Option<String>306     fn literal(&self) -> Option<String> {
307         if self.opts.case_insensitive {
308             return None;
309         }
310         let mut lit = String::new();
311         for t in &*self.tokens {
312             match *t {
313                 Token::Literal(c) => lit.push(c),
314                 _ => return None,
315             }
316         }
317         if lit.is_empty() {
318             None
319         } else {
320             Some(lit)
321         }
322     }
323 
324     /// Returns an extension if this pattern matches a file path if and only
325     /// if the file path has the extension returned.
326     ///
327     /// Note that this extension returned differs from the extension that
328     /// std::path::Path::extension returns. Namely, this extension includes
329     /// the '.'. Also, paths like `.rs` are considered to have an extension
330     /// of `.rs`.
ext(&self) -> Option<String>331     fn ext(&self) -> Option<String> {
332         if self.opts.case_insensitive {
333             return None;
334         }
335         let start = match self.tokens.get(0) {
336             Some(&Token::RecursivePrefix) => 1,
337             Some(_) => 0,
338             _ => return None,
339         };
340         match self.tokens.get(start) {
341             Some(&Token::ZeroOrMore) => {
342                 // If there was no recursive prefix, then we only permit
343                 // `*` if `*` can match a `/`. For example, if `*` can't
344                 // match `/`, then `*.c` doesn't match `foo/bar.c`.
345                 if start == 0 && self.opts.literal_separator {
346                     return None;
347                 }
348             }
349             _ => return None,
350         }
351         match self.tokens.get(start + 1) {
352             Some(&Token::Literal('.')) => {}
353             _ => return None,
354         }
355         let mut lit = ".".to_string();
356         for t in self.tokens[start + 2..].iter() {
357             match *t {
358                 Token::Literal('.') | Token::Literal('/') => return None,
359                 Token::Literal(c) => lit.push(c),
360                 _ => return None,
361             }
362         }
363         if lit.is_empty() {
364             None
365         } else {
366             Some(lit)
367         }
368     }
369 
370     /// This is like `ext`, but returns an extension even if it isn't sufficient
371     /// to imply a match. Namely, if an extension is returned, then it is
372     /// necessary but not sufficient for a match.
required_ext(&self) -> Option<String>373     fn required_ext(&self) -> Option<String> {
374         if self.opts.case_insensitive {
375             return None;
376         }
377         // We don't care at all about the beginning of this pattern. All we
378         // need to check for is if it ends with a literal of the form `.ext`.
379         let mut ext: Vec<char> = vec![]; // built in reverse
380         for t in self.tokens.iter().rev() {
381             match *t {
382                 Token::Literal('/') => return None,
383                 Token::Literal(c) => {
384                     ext.push(c);
385                     if c == '.' {
386                         break;
387                     }
388                 }
389                 _ => return None,
390             }
391         }
392         if ext.last() != Some(&'.') {
393             None
394         } else {
395             ext.reverse();
396             Some(ext.into_iter().collect())
397         }
398     }
399 
400     /// Returns a literal prefix of this pattern if the entire pattern matches
401     /// if the literal prefix matches.
prefix(&self) -> Option<String>402     fn prefix(&self) -> Option<String> {
403         if self.opts.case_insensitive {
404             return None;
405         }
406         let (end, need_sep) = match self.tokens.last() {
407             Some(&Token::ZeroOrMore) => {
408                 if self.opts.literal_separator {
409                     // If a trailing `*` can't match a `/`, then we can't
410                     // assume a match of the prefix corresponds to a match
411                     // of the overall pattern. e.g., `foo/*` with
412                     // `literal_separator` enabled matches `foo/bar` but not
413                     // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
414                     // literal prefix.
415                     return None;
416                 }
417                 (self.tokens.len() - 1, false)
418             }
419             Some(&Token::RecursiveSuffix) => (self.tokens.len() - 1, true),
420             _ => (self.tokens.len(), false),
421         };
422         let mut lit = String::new();
423         for t in &self.tokens[0..end] {
424             match *t {
425                 Token::Literal(c) => lit.push(c),
426                 _ => return None,
427             }
428         }
429         if need_sep {
430             lit.push('/');
431         }
432         if lit.is_empty() {
433             None
434         } else {
435             Some(lit)
436         }
437     }
438 
439     /// Returns a literal suffix of this pattern if the entire pattern matches
440     /// if the literal suffix matches.
441     ///
442     /// If a literal suffix is returned and it must match either the entire
443     /// file path or be preceded by a `/`, then also return true. This happens
444     /// with a pattern like `**/foo/bar`. Namely, this pattern matches
445     /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
446     /// suffix returned is `/foo/bar` (but should match the entire path
447     /// `foo/bar`).
448     ///
449     /// When this returns true, the suffix literal is guaranteed to start with
450     /// a `/`.
suffix(&self) -> Option<(String, bool)>451     fn suffix(&self) -> Option<(String, bool)> {
452         if self.opts.case_insensitive {
453             return None;
454         }
455         let mut lit = String::new();
456         let (start, entire) = match self.tokens.get(0) {
457             Some(&Token::RecursivePrefix) => {
458                 // We only care if this follows a path component if the next
459                 // token is a literal.
460                 if let Some(&Token::Literal(_)) = self.tokens.get(1) {
461                     lit.push('/');
462                     (1, true)
463                 } else {
464                     (1, false)
465                 }
466             }
467             _ => (0, false),
468         };
469         let start = match self.tokens.get(start) {
470             Some(&Token::ZeroOrMore) => {
471                 // If literal_separator is enabled, then a `*` can't
472                 // necessarily match everything, so reporting a suffix match
473                 // as a match of the pattern would be a false positive.
474                 if self.opts.literal_separator {
475                     return None;
476                 }
477                 start + 1
478             }
479             _ => start,
480         };
481         for t in &self.tokens[start..] {
482             match *t {
483                 Token::Literal(c) => lit.push(c),
484                 _ => return None,
485             }
486         }
487         if lit.is_empty() || lit == "/" {
488             None
489         } else {
490             Some((lit, entire))
491         }
492     }
493 
494     /// If this pattern only needs to inspect the basename of a file path,
495     /// then the tokens corresponding to only the basename match are returned.
496     ///
497     /// For example, given a pattern of `**/*.foo`, only the tokens
498     /// corresponding to `*.foo` are returned.
499     ///
500     /// Note that this will return None if any match of the basename tokens
501     /// doesn't correspond to a match of the entire pattern. For example, the
502     /// glob `foo` only matches when a file path has a basename of `foo`, but
503     /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
504     /// `foo` doesn't match `abc/foo`.
basename_tokens(&self) -> Option<&[Token]>505     fn basename_tokens(&self) -> Option<&[Token]> {
506         if self.opts.case_insensitive {
507             return None;
508         }
509         let start = match self.tokens.get(0) {
510             Some(&Token::RecursivePrefix) => 1,
511             _ => {
512                 // With nothing to gobble up the parent portion of a path,
513                 // we can't assume that matching on only the basename is
514                 // correct.
515                 return None;
516             }
517         };
518         if self.tokens[start..].is_empty() {
519             return None;
520         }
521         for t in &self.tokens[start..] {
522             match *t {
523                 Token::Literal('/') => return None,
524                 Token::Literal(_) => {} // OK
525                 Token::Any | Token::ZeroOrMore => {
526                     if !self.opts.literal_separator {
527                         // In this case, `*` and `?` can match a path
528                         // separator, which means this could reach outside
529                         // the basename.
530                         return None;
531                     }
532                 }
533                 Token::RecursivePrefix
534                 | Token::RecursiveSuffix
535                 | Token::RecursiveZeroOrMore => {
536                     return None;
537                 }
538                 Token::Class { .. } | Token::Alternates(..) => {
539                     // We *could* be a little smarter here, but either one
540                     // of these is going to prevent our literal optimizations
541                     // anyway, so give up.
542                     return None;
543                 }
544             }
545         }
546         Some(&self.tokens[start..])
547     }
548 
549     /// Returns the pattern as a literal if and only if the pattern exclusively
550     /// matches the basename of a file path *and* is a literal.
551     ///
552     /// The basic format of these patterns is `**/{literal}`, where `{literal}`
553     /// does not contain a path separator.
basename_literal(&self) -> Option<String>554     fn basename_literal(&self) -> Option<String> {
555         let tokens = match self.basename_tokens() {
556             None => return None,
557             Some(tokens) => tokens,
558         };
559         let mut lit = String::new();
560         for t in tokens {
561             match *t {
562                 Token::Literal(c) => lit.push(c),
563                 _ => return None,
564             }
565         }
566         Some(lit)
567     }
568 }
569 
570 impl<'a> GlobBuilder<'a> {
571     /// Create a new builder for the pattern given.
572     ///
573     /// The pattern is not compiled until `build` is called.
new(glob: &'a str) -> GlobBuilder<'a>574     pub fn new(glob: &'a str) -> GlobBuilder<'a> {
575         GlobBuilder { glob: glob, opts: GlobOptions::default() }
576     }
577 
578     /// Parses and builds the pattern.
build(&self) -> Result<Glob, Error>579     pub fn build(&self) -> Result<Glob, Error> {
580         let mut p = Parser {
581             glob: &self.glob,
582             stack: vec![Tokens::default()],
583             chars: self.glob.chars().peekable(),
584             prev: None,
585             cur: None,
586             opts: &self.opts,
587         };
588         p.parse()?;
589         if p.stack.is_empty() {
590             Err(Error {
591                 glob: Some(self.glob.to_string()),
592                 kind: ErrorKind::UnopenedAlternates,
593             })
594         } else if p.stack.len() > 1 {
595             Err(Error {
596                 glob: Some(self.glob.to_string()),
597                 kind: ErrorKind::UnclosedAlternates,
598             })
599         } else {
600             let tokens = p.stack.pop().unwrap();
601             Ok(Glob {
602                 glob: self.glob.to_string(),
603                 re: tokens.to_regex_with(&self.opts),
604                 opts: self.opts,
605                 tokens: tokens,
606             })
607         }
608     }
609 
610     /// Toggle whether the pattern matches case insensitively or not.
611     ///
612     /// This is disabled by default.
case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a>613     pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
614         self.opts.case_insensitive = yes;
615         self
616     }
617 
618     /// Toggle whether a literal `/` is required to match a path separator.
619     ///
620     /// By default this is false: `*` and `?` will match `/`.
literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a>621     pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
622         self.opts.literal_separator = yes;
623         self
624     }
625 
626     /// When enabled, a back slash (`\`) may be used to escape
627     /// special characters in a glob pattern. Additionally, this will
628     /// prevent `\` from being interpreted as a path separator on all
629     /// platforms.
630     ///
631     /// This is enabled by default on platforms where `\` is not a
632     /// path separator and disabled by default on platforms where `\`
633     /// is a path separator.
backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a>634     pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
635         self.opts.backslash_escape = yes;
636         self
637     }
638 }
639 
640 impl Tokens {
641     /// Convert this pattern to a string that is guaranteed to be a valid
642     /// regular expression and will represent the matching semantics of this
643     /// glob pattern and the options given.
to_regex_with(&self, options: &GlobOptions) -> String644     fn to_regex_with(&self, options: &GlobOptions) -> String {
645         let mut re = String::new();
646         re.push_str("(?-u)");
647         if options.case_insensitive {
648             re.push_str("(?i)");
649         }
650         re.push('^');
651         // Special case. If the entire glob is just `**`, then it should match
652         // everything.
653         if self.len() == 1 && self[0] == Token::RecursivePrefix {
654             re.push_str(".*");
655             re.push('$');
656             return re;
657         }
658         self.tokens_to_regex(options, &self, &mut re);
659         re.push('$');
660         re
661     }
662 
tokens_to_regex( &self, options: &GlobOptions, tokens: &[Token], re: &mut String, )663     fn tokens_to_regex(
664         &self,
665         options: &GlobOptions,
666         tokens: &[Token],
667         re: &mut String,
668     ) {
669         for tok in tokens {
670             match *tok {
671                 Token::Literal(c) => {
672                     re.push_str(&char_to_escaped_literal(c));
673                 }
674                 Token::Any => {
675                     if options.literal_separator {
676                         re.push_str("[^/]");
677                     } else {
678                         re.push_str(".");
679                     }
680                 }
681                 Token::ZeroOrMore => {
682                     if options.literal_separator {
683                         re.push_str("[^/]*");
684                     } else {
685                         re.push_str(".*");
686                     }
687                 }
688                 Token::RecursivePrefix => {
689                     re.push_str("(?:/?|.*/)");
690                 }
691                 Token::RecursiveSuffix => {
692                     re.push_str("/.*");
693                 }
694                 Token::RecursiveZeroOrMore => {
695                     re.push_str("(?:/|/.*/)");
696                 }
697                 Token::Class { negated, ref ranges } => {
698                     re.push('[');
699                     if negated {
700                         re.push('^');
701                     }
702                     for r in ranges {
703                         if r.0 == r.1 {
704                             // Not strictly necessary, but nicer to look at.
705                             re.push_str(&char_to_escaped_literal(r.0));
706                         } else {
707                             re.push_str(&char_to_escaped_literal(r.0));
708                             re.push('-');
709                             re.push_str(&char_to_escaped_literal(r.1));
710                         }
711                     }
712                     re.push(']');
713                 }
714                 Token::Alternates(ref patterns) => {
715                     let mut parts = vec![];
716                     for pat in patterns {
717                         let mut altre = String::new();
718                         self.tokens_to_regex(options, &pat, &mut altre);
719                         if !altre.is_empty() {
720                             parts.push(altre);
721                         }
722                     }
723 
724                     // It is possible to have an empty set in which case the
725                     // resulting alternation '()' would be an error.
726                     if !parts.is_empty() {
727                         re.push('(');
728                         re.push_str(&parts.join("|"));
729                         re.push(')');
730                     }
731                 }
732             }
733         }
734     }
735 }
736 
737 /// Convert a Unicode scalar value to an escaped string suitable for use as
738 /// a literal in a non-Unicode regex.
char_to_escaped_literal(c: char) -> String739 fn char_to_escaped_literal(c: char) -> String {
740     bytes_to_escaped_literal(&c.to_string().into_bytes())
741 }
742 
743 /// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
744 /// code units are converted to their escaped form.
bytes_to_escaped_literal(bs: &[u8]) -> String745 fn bytes_to_escaped_literal(bs: &[u8]) -> String {
746     let mut s = String::with_capacity(bs.len());
747     for &b in bs {
748         if b <= 0x7F {
749             s.push_str(&regex::escape(&(b as char).to_string()));
750         } else {
751             s.push_str(&format!("\\x{:02x}", b));
752         }
753     }
754     s
755 }
756 
757 struct Parser<'a> {
758     glob: &'a str,
759     stack: Vec<Tokens>,
760     chars: iter::Peekable<str::Chars<'a>>,
761     prev: Option<char>,
762     cur: Option<char>,
763     opts: &'a GlobOptions,
764 }
765 
766 impl<'a> Parser<'a> {
error(&self, kind: ErrorKind) -> Error767     fn error(&self, kind: ErrorKind) -> Error {
768         Error { glob: Some(self.glob.to_string()), kind: kind }
769     }
770 
parse(&mut self) -> Result<(), Error>771     fn parse(&mut self) -> Result<(), Error> {
772         while let Some(c) = self.bump() {
773             match c {
774                 '?' => self.push_token(Token::Any)?,
775                 '*' => self.parse_star()?,
776                 '[' => self.parse_class()?,
777                 '{' => self.push_alternate()?,
778                 '}' => self.pop_alternate()?,
779                 ',' => self.parse_comma()?,
780                 '\\' => self.parse_backslash()?,
781                 c => self.push_token(Token::Literal(c))?,
782             }
783         }
784         Ok(())
785     }
786 
push_alternate(&mut self) -> Result<(), Error>787     fn push_alternate(&mut self) -> Result<(), Error> {
788         if self.stack.len() > 1 {
789             return Err(self.error(ErrorKind::NestedAlternates));
790         }
791         Ok(self.stack.push(Tokens::default()))
792     }
793 
pop_alternate(&mut self) -> Result<(), Error>794     fn pop_alternate(&mut self) -> Result<(), Error> {
795         let mut alts = vec![];
796         while self.stack.len() >= 2 {
797             alts.push(self.stack.pop().unwrap());
798         }
799         self.push_token(Token::Alternates(alts))
800     }
801 
push_token(&mut self, tok: Token) -> Result<(), Error>802     fn push_token(&mut self, tok: Token) -> Result<(), Error> {
803         if let Some(ref mut pat) = self.stack.last_mut() {
804             return Ok(pat.push(tok));
805         }
806         Err(self.error(ErrorKind::UnopenedAlternates))
807     }
808 
pop_token(&mut self) -> Result<Token, Error>809     fn pop_token(&mut self) -> Result<Token, Error> {
810         if let Some(ref mut pat) = self.stack.last_mut() {
811             return Ok(pat.pop().unwrap());
812         }
813         Err(self.error(ErrorKind::UnopenedAlternates))
814     }
815 
have_tokens(&self) -> Result<bool, Error>816     fn have_tokens(&self) -> Result<bool, Error> {
817         match self.stack.last() {
818             None => Err(self.error(ErrorKind::UnopenedAlternates)),
819             Some(ref pat) => Ok(!pat.is_empty()),
820         }
821     }
822 
parse_comma(&mut self) -> Result<(), Error>823     fn parse_comma(&mut self) -> Result<(), Error> {
824         // If we aren't inside a group alternation, then don't
825         // treat commas specially. Otherwise, we need to start
826         // a new alternate.
827         if self.stack.len() <= 1 {
828             self.push_token(Token::Literal(','))
829         } else {
830             Ok(self.stack.push(Tokens::default()))
831         }
832     }
833 
parse_backslash(&mut self) -> Result<(), Error>834     fn parse_backslash(&mut self) -> Result<(), Error> {
835         if self.opts.backslash_escape {
836             match self.bump() {
837                 None => Err(self.error(ErrorKind::DanglingEscape)),
838                 Some(c) => self.push_token(Token::Literal(c)),
839             }
840         } else if is_separator('\\') {
841             // Normalize all patterns to use / as a separator.
842             self.push_token(Token::Literal('/'))
843         } else {
844             self.push_token(Token::Literal('\\'))
845         }
846     }
847 
parse_star(&mut self) -> Result<(), Error>848     fn parse_star(&mut self) -> Result<(), Error> {
849         let prev = self.prev;
850         if self.peek() != Some('*') {
851             self.push_token(Token::ZeroOrMore)?;
852             return Ok(());
853         }
854         assert!(self.bump() == Some('*'));
855         if !self.have_tokens()? {
856             if !self.peek().map_or(true, is_separator) {
857                 self.push_token(Token::ZeroOrMore)?;
858                 self.push_token(Token::ZeroOrMore)?;
859             } else {
860                 self.push_token(Token::RecursivePrefix)?;
861                 assert!(self.bump().map_or(true, is_separator));
862             }
863             return Ok(());
864         }
865 
866         if !prev.map(is_separator).unwrap_or(false) {
867             if self.stack.len() <= 1
868                 || (prev != Some(',') && prev != Some('{'))
869             {
870                 self.push_token(Token::ZeroOrMore)?;
871                 self.push_token(Token::ZeroOrMore)?;
872                 return Ok(());
873             }
874         }
875         let is_suffix = match self.peek() {
876             None => {
877                 assert!(self.bump().is_none());
878                 true
879             }
880             Some(',') | Some('}') if self.stack.len() >= 2 => true,
881             Some(c) if is_separator(c) => {
882                 assert!(self.bump().map(is_separator).unwrap_or(false));
883                 false
884             }
885             _ => {
886                 self.push_token(Token::ZeroOrMore)?;
887                 self.push_token(Token::ZeroOrMore)?;
888                 return Ok(());
889             }
890         };
891         match self.pop_token()? {
892             Token::RecursivePrefix => {
893                 self.push_token(Token::RecursivePrefix)?;
894             }
895             Token::RecursiveSuffix => {
896                 self.push_token(Token::RecursiveSuffix)?;
897             }
898             _ => {
899                 if is_suffix {
900                     self.push_token(Token::RecursiveSuffix)?;
901                 } else {
902                     self.push_token(Token::RecursiveZeroOrMore)?;
903                 }
904             }
905         }
906         Ok(())
907     }
908 
parse_class(&mut self) -> Result<(), Error>909     fn parse_class(&mut self) -> Result<(), Error> {
910         fn add_to_last_range(
911             glob: &str,
912             r: &mut (char, char),
913             add: char,
914         ) -> Result<(), Error> {
915             r.1 = add;
916             if r.1 < r.0 {
917                 Err(Error {
918                     glob: Some(glob.to_string()),
919                     kind: ErrorKind::InvalidRange(r.0, r.1),
920                 })
921             } else {
922                 Ok(())
923             }
924         }
925         let mut ranges = vec![];
926         let negated = match self.chars.peek() {
927             Some(&'!') | Some(&'^') => {
928                 let bump = self.bump();
929                 assert!(bump == Some('!') || bump == Some('^'));
930                 true
931             }
932             _ => false,
933         };
934         let mut first = true;
935         let mut in_range = false;
936         loop {
937             let c = match self.bump() {
938                 Some(c) => c,
939                 // The only way to successfully break this loop is to observe
940                 // a ']'.
941                 None => return Err(self.error(ErrorKind::UnclosedClass)),
942             };
943             match c {
944                 ']' => {
945                     if first {
946                         ranges.push((']', ']'));
947                     } else {
948                         break;
949                     }
950                 }
951                 '-' => {
952                     if first {
953                         ranges.push(('-', '-'));
954                     } else if in_range {
955                         // invariant: in_range is only set when there is
956                         // already at least one character seen.
957                         let r = ranges.last_mut().unwrap();
958                         add_to_last_range(&self.glob, r, '-')?;
959                         in_range = false;
960                     } else {
961                         assert!(!ranges.is_empty());
962                         in_range = true;
963                     }
964                 }
965                 c => {
966                     if in_range {
967                         // invariant: in_range is only set when there is
968                         // already at least one character seen.
969                         add_to_last_range(
970                             &self.glob,
971                             ranges.last_mut().unwrap(),
972                             c,
973                         )?;
974                     } else {
975                         ranges.push((c, c));
976                     }
977                     in_range = false;
978                 }
979             }
980             first = false;
981         }
982         if in_range {
983             // Means that the last character in the class was a '-', so add
984             // it as a literal.
985             ranges.push(('-', '-'));
986         }
987         self.push_token(Token::Class { negated: negated, ranges: ranges })
988     }
989 
bump(&mut self) -> Option<char>990     fn bump(&mut self) -> Option<char> {
991         self.prev = self.cur;
992         self.cur = self.chars.next();
993         self.cur
994     }
995 
peek(&mut self) -> Option<char>996     fn peek(&mut self) -> Option<char> {
997         self.chars.peek().map(|&ch| ch)
998     }
999 }
1000 
1001 #[cfg(test)]
starts_with(needle: &[u8], haystack: &[u8]) -> bool1002 fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
1003     needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
1004 }
1005 
1006 #[cfg(test)]
ends_with(needle: &[u8], haystack: &[u8]) -> bool1007 fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
1008     if needle.len() > haystack.len() {
1009         return false;
1010     }
1011     needle == &haystack[haystack.len() - needle.len()..]
1012 }
1013 
1014 #[cfg(test)]
1015 mod tests {
1016     use super::Token::*;
1017     use super::{Glob, GlobBuilder, Token};
1018     use crate::{ErrorKind, GlobSetBuilder};
1019 
1020     #[derive(Clone, Copy, Debug, Default)]
1021     struct Options {
1022         casei: Option<bool>,
1023         litsep: Option<bool>,
1024         bsesc: Option<bool>,
1025     }
1026 
1027     macro_rules! syntax {
1028         ($name:ident, $pat:expr, $tokens:expr) => {
1029             #[test]
1030             fn $name() {
1031                 let pat = Glob::new($pat).unwrap();
1032                 assert_eq!($tokens, pat.tokens.0);
1033             }
1034         };
1035     }
1036 
1037     macro_rules! syntaxerr {
1038         ($name:ident, $pat:expr, $err:expr) => {
1039             #[test]
1040             fn $name() {
1041                 let err = Glob::new($pat).unwrap_err();
1042                 assert_eq!(&$err, err.kind());
1043             }
1044         };
1045     }
1046 
1047     macro_rules! toregex {
1048         ($name:ident, $pat:expr, $re:expr) => {
1049             toregex!($name, $pat, $re, Options::default());
1050         };
1051         ($name:ident, $pat:expr, $re:expr, $options:expr) => {
1052             #[test]
1053             fn $name() {
1054                 let mut builder = GlobBuilder::new($pat);
1055                 if let Some(casei) = $options.casei {
1056                     builder.case_insensitive(casei);
1057                 }
1058                 if let Some(litsep) = $options.litsep {
1059                     builder.literal_separator(litsep);
1060                 }
1061                 if let Some(bsesc) = $options.bsesc {
1062                     builder.backslash_escape(bsesc);
1063                 }
1064                 let pat = builder.build().unwrap();
1065                 assert_eq!(format!("(?-u){}", $re), pat.regex());
1066             }
1067         };
1068     }
1069 
1070     macro_rules! matches {
1071         ($name:ident, $pat:expr, $path:expr) => {
1072             matches!($name, $pat, $path, Options::default());
1073         };
1074         ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1075             #[test]
1076             fn $name() {
1077                 let mut builder = GlobBuilder::new($pat);
1078                 if let Some(casei) = $options.casei {
1079                     builder.case_insensitive(casei);
1080                 }
1081                 if let Some(litsep) = $options.litsep {
1082                     builder.literal_separator(litsep);
1083                 }
1084                 if let Some(bsesc) = $options.bsesc {
1085                     builder.backslash_escape(bsesc);
1086                 }
1087                 let pat = builder.build().unwrap();
1088                 let matcher = pat.compile_matcher();
1089                 let strategic = pat.compile_strategic_matcher();
1090                 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1091                 assert!(matcher.is_match($path));
1092                 assert!(strategic.is_match($path));
1093                 assert!(set.is_match($path));
1094             }
1095         };
1096     }
1097 
1098     macro_rules! nmatches {
1099         ($name:ident, $pat:expr, $path:expr) => {
1100             nmatches!($name, $pat, $path, Options::default());
1101         };
1102         ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1103             #[test]
1104             fn $name() {
1105                 let mut builder = GlobBuilder::new($pat);
1106                 if let Some(casei) = $options.casei {
1107                     builder.case_insensitive(casei);
1108                 }
1109                 if let Some(litsep) = $options.litsep {
1110                     builder.literal_separator(litsep);
1111                 }
1112                 if let Some(bsesc) = $options.bsesc {
1113                     builder.backslash_escape(bsesc);
1114                 }
1115                 let pat = builder.build().unwrap();
1116                 let matcher = pat.compile_matcher();
1117                 let strategic = pat.compile_strategic_matcher();
1118                 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1119                 assert!(!matcher.is_match($path));
1120                 assert!(!strategic.is_match($path));
1121                 assert!(!set.is_match($path));
1122             }
1123         };
1124     }
1125 
s(string: &str) -> String1126     fn s(string: &str) -> String {
1127         string.to_string()
1128     }
1129 
class(s: char, e: char) -> Token1130     fn class(s: char, e: char) -> Token {
1131         Class { negated: false, ranges: vec![(s, e)] }
1132     }
1133 
classn(s: char, e: char) -> Token1134     fn classn(s: char, e: char) -> Token {
1135         Class { negated: true, ranges: vec![(s, e)] }
1136     }
1137 
rclass(ranges: &[(char, char)]) -> Token1138     fn rclass(ranges: &[(char, char)]) -> Token {
1139         Class { negated: false, ranges: ranges.to_vec() }
1140     }
1141 
rclassn(ranges: &[(char, char)]) -> Token1142     fn rclassn(ranges: &[(char, char)]) -> Token {
1143         Class { negated: true, ranges: ranges.to_vec() }
1144     }
1145 
1146     syntax!(literal1, "a", vec![Literal('a')]);
1147     syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1148     syntax!(any1, "?", vec![Any]);
1149     syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1150     syntax!(seq1, "*", vec![ZeroOrMore]);
1151     syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1152     syntax!(
1153         seq3,
1154         "*a*b*",
1155         vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
1156     );
1157     syntax!(rseq1, "**", vec![RecursivePrefix]);
1158     syntax!(rseq2, "**/", vec![RecursivePrefix]);
1159     syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1160     syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1161     syntax!(
1162         rseq5,
1163         "a/**/b",
1164         vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
1165     );
1166     syntax!(cls1, "[a]", vec![class('a', 'a')]);
1167     syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1168     syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1169     syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1170     syntax!(cls5, "[-]", vec![class('-', '-')]);
1171     syntax!(cls6, "[]]", vec![class(']', ']')]);
1172     syntax!(cls7, "[*]", vec![class('*', '*')]);
1173     syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1174     syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1175     syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1176     syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1177     syntax!(
1178         cls12,
1179         "[-a-z-]",
1180         vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
1181     );
1182     syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1183     syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1184     syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1185     syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1186     syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1187     syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1188     syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1189     syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1190     syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1191 
1192     syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1193     syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1194     syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1195     syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1196     syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1197     syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1198 
1199     const CASEI: Options =
1200         Options { casei: Some(true), litsep: None, bsesc: None };
1201     const SLASHLIT: Options =
1202         Options { casei: None, litsep: Some(true), bsesc: None };
1203     const NOBSESC: Options =
1204         Options { casei: None, litsep: None, bsesc: Some(false) };
1205     const BSESC: Options =
1206         Options { casei: None, litsep: None, bsesc: Some(true) };
1207 
1208     toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1209 
1210     toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1211     toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);
1212 
1213     toregex!(re1, "a", "^a$");
1214     toregex!(re2, "?", "^.$");
1215     toregex!(re3, "*", "^.*$");
1216     toregex!(re4, "a?", "^a.$");
1217     toregex!(re5, "?a", "^.a$");
1218     toregex!(re6, "a*", "^a.*$");
1219     toregex!(re7, "*a", "^.*a$");
1220     toregex!(re8, "[*]", r"^[\*]$");
1221     toregex!(re9, "[+]", r"^[\+]$");
1222     toregex!(re10, "+", r"^\+$");
1223     toregex!(re11, "☃", r"^\xe2\x98\x83$");
1224     toregex!(re12, "**", r"^.*$");
1225     toregex!(re13, "**/", r"^.*$");
1226     toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
1227     toregex!(re15, "**/**", r"^.*$");
1228     toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
1229     toregex!(re17, "**/**/**", r"^.*$");
1230     toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
1231     toregex!(re19, "a/**", r"^a/.*$");
1232     toregex!(re20, "a/**/**", r"^a/.*$");
1233     toregex!(re21, "a/**/**/**", r"^a/.*$");
1234     toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
1235     toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
1236     toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
1237     toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
1238     toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
1239     toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
1240     toregex!(re28, "a**", r"^a.*.*$");
1241     toregex!(re29, "**a", r"^.*.*a$");
1242     toregex!(re30, "a**b", r"^a.*.*b$");
1243     toregex!(re31, "***", r"^.*.*.*$");
1244     toregex!(re32, "/a**", r"^/a.*.*$");
1245     toregex!(re33, "/**a", r"^/.*.*a$");
1246     toregex!(re34, "/a**b", r"^/a.*.*b$");
1247 
1248     matches!(match1, "a", "a");
1249     matches!(match2, "a*b", "a_b");
1250     matches!(match3, "a*b*c", "abc");
1251     matches!(match4, "a*b*c", "a_b_c");
1252     matches!(match5, "a*b*c", "a___b___c");
1253     matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc");
1254     matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1255     matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
1256     matches!(match9, "*.rs", ".rs");
1257     matches!(match10, "☃", "☃");
1258 
1259     matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1260     matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1261     matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1262     matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1263     matches!(matchrec5, "**", "abcde");
1264     matches!(matchrec6, "**", "");
1265     matches!(matchrec7, "**", ".asdf");
1266     matches!(matchrec8, "**", "/x/.asdf");
1267     matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt");
1268     matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt");
1269     matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt");
1270     matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt");
1271     matches!(matchrec13, "**/test", "one/two/test");
1272     matches!(matchrec14, "**/test", "one/test");
1273     matches!(matchrec15, "**/test", "test");
1274     matches!(matchrec16, "/**/test", "/one/two/test");
1275     matches!(matchrec17, "/**/test", "/one/test");
1276     matches!(matchrec18, "/**/test", "/test");
1277     matches!(matchrec19, "**/.*", ".abc");
1278     matches!(matchrec20, "**/.*", "abc/.abc");
1279     matches!(matchrec21, "**/foo/bar", "foo/bar");
1280     matches!(matchrec22, ".*/**", ".abc/abc");
1281     matches!(matchrec23, "test/**", "test/");
1282     matches!(matchrec24, "test/**", "test/one");
1283     matches!(matchrec25, "test/**", "test/one/two");
1284     matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt");
1285 
1286     matches!(matchrange1, "a[0-9]b", "a0b");
1287     matches!(matchrange2, "a[0-9]b", "a9b");
1288     matches!(matchrange3, "a[!0-9]b", "a_b");
1289     matches!(matchrange4, "[a-z123]", "1");
1290     matches!(matchrange5, "[1a-z23]", "1");
1291     matches!(matchrange6, "[123a-z]", "1");
1292     matches!(matchrange7, "[abc-]", "-");
1293     matches!(matchrange8, "[-abc]", "-");
1294     matches!(matchrange9, "[-a-c]", "b");
1295     matches!(matchrange10, "[a-c-]", "b");
1296     matches!(matchrange11, "[-]", "-");
1297     matches!(matchrange12, "a[^0-9]b", "a_b");
1298 
1299     matches!(matchpat1, "*hello.txt", "hello.txt");
1300     matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1301     matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1302     matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
1303     matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1304     matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1305     matches!(
1306         matchpat7,
1307         "*some/path/to/hello.txt",
1308         "a/bigger/some/path/to/hello.txt"
1309     );
1310 
1311     matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
1312 
1313     matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1314     matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1315     matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1316     matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1317 
1318     matches!(matchalt1, "a,b", "a,b");
1319     matches!(matchalt2, ",", ",");
1320     matches!(matchalt3, "{a,b}", "a");
1321     matches!(matchalt4, "{a,b}", "b");
1322     matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
1323     matches!(matchalt6, "{**/src/**,foo}", "foo");
1324     matches!(matchalt7, "{[}],foo}", "}");
1325     matches!(matchalt8, "{foo}", "foo");
1326     matches!(matchalt9, "{}", "");
1327     matches!(matchalt10, "{,}", "");
1328     matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
1329     matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
1330     matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
1331 
1332     matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1333     #[cfg(unix)]
1334     nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1335     #[cfg(not(unix))]
1336     nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
1337     nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1338     matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1339     #[cfg(unix)]
1340     nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1341     #[cfg(not(unix))]
1342     matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1343 
1344     matches!(matchbackslash1, "\\[", "[", BSESC);
1345     matches!(matchbackslash2, "\\?", "?", BSESC);
1346     matches!(matchbackslash3, "\\*", "*", BSESC);
1347     matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
1348     matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
1349     matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
1350     #[cfg(unix)]
1351     matches!(matchbackslash7, "\\a", "a");
1352     #[cfg(not(unix))]
1353     matches!(matchbackslash8, "\\a", "/a");
1354 
1355     nmatches!(matchnot1, "a*b*c", "abcd");
1356     nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
1357     nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1358     nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt");
1359     nmatches!(matchnot5, "/**/test", "test");
1360     nmatches!(matchnot6, "/**/test", "/one/notthis");
1361     nmatches!(matchnot7, "/**/test", "/notthis");
1362     nmatches!(matchnot8, "**/.*", "ab.c");
1363     nmatches!(matchnot9, "**/.*", "abc/ab.c");
1364     nmatches!(matchnot10, ".*/**", "a.bc");
1365     nmatches!(matchnot11, ".*/**", "abc/a.bc");
1366     nmatches!(matchnot12, "a[0-9]b", "a_b");
1367     nmatches!(matchnot13, "a[!0-9]b", "a0b");
1368     nmatches!(matchnot14, "a[!0-9]b", "a9b");
1369     nmatches!(matchnot15, "[!-]", "-");
1370     nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1371     nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1372     nmatches!(
1373         matchnot18,
1374         "*some/path/to/hello.txt",
1375         "some/path/to/hello.txt-and-then-some"
1376     );
1377     nmatches!(
1378         matchnot19,
1379         "*some/path/to/hello.txt",
1380         "some/other/path/to/hello.txt"
1381     );
1382     nmatches!(matchnot20, "a", "foo/a");
1383     nmatches!(matchnot21, "./foo", "foo");
1384     nmatches!(matchnot22, "**/foo", "foofoo");
1385     nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1386     nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1387     nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1388     nmatches!(
1389         matchnot26,
1390         "**/m4/ltoptions.m4",
1391         "csharp/src/packages/repositories.config",
1392         SLASHLIT
1393     );
1394     nmatches!(matchnot27, "a[^0-9]b", "a0b");
1395     nmatches!(matchnot28, "a[^0-9]b", "a9b");
1396     nmatches!(matchnot29, "[^-]", "-");
1397     nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1398     nmatches!(
1399         matchrec31,
1400         "some/*/needle.txt",
1401         "some/one/two/needle.txt",
1402         SLASHLIT
1403     );
1404     nmatches!(
1405         matchrec32,
1406         "some/*/needle.txt",
1407         "some/one/two/three/needle.txt",
1408         SLASHLIT
1409     );
1410     nmatches!(matchrec33, ".*/**", ".abc");
1411     nmatches!(matchrec34, "foo/**", "foo");
1412 
1413     macro_rules! extract {
1414         ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1415             extract!($which, $name, $pat, $expect, Options::default());
1416         };
1417         ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1418             #[test]
1419             fn $name() {
1420                 let mut builder = GlobBuilder::new($pat);
1421                 if let Some(casei) = $options.casei {
1422                     builder.case_insensitive(casei);
1423                 }
1424                 if let Some(litsep) = $options.litsep {
1425                     builder.literal_separator(litsep);
1426                 }
1427                 if let Some(bsesc) = $options.bsesc {
1428                     builder.backslash_escape(bsesc);
1429                 }
1430                 let pat = builder.build().unwrap();
1431                 assert_eq!($expect, pat.$which());
1432             }
1433         };
1434     }
1435 
1436     macro_rules! literal {
1437         ($($tt:tt)*) => { extract!(literal, $($tt)*); }
1438     }
1439 
1440     macro_rules! basetokens {
1441         ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); }
1442     }
1443 
1444     macro_rules! ext {
1445         ($($tt:tt)*) => { extract!(ext, $($tt)*); }
1446     }
1447 
1448     macro_rules! required_ext {
1449         ($($tt:tt)*) => { extract!(required_ext, $($tt)*); }
1450     }
1451 
1452     macro_rules! prefix {
1453         ($($tt:tt)*) => { extract!(prefix, $($tt)*); }
1454     }
1455 
1456     macro_rules! suffix {
1457         ($($tt:tt)*) => { extract!(suffix, $($tt)*); }
1458     }
1459 
1460     macro_rules! baseliteral {
1461         ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); }
1462     }
1463 
1464     literal!(extract_lit1, "foo", Some(s("foo")));
1465     literal!(extract_lit2, "foo", None, CASEI);
1466     literal!(extract_lit3, "/foo", Some(s("/foo")));
1467     literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1468     literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1469     literal!(extract_lit6, "*.foo", None);
1470     literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1471     literal!(extract_lit8, "**/foo/bar", None);
1472 
1473     basetokens!(
1474         extract_basetoks1,
1475         "**/foo",
1476         Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
1477     );
1478     basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1479     basetokens!(
1480         extract_basetoks3,
1481         "**/foo",
1482         Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
1483         SLASHLIT
1484     );
1485     basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1486     basetokens!(extract_basetoks5, "*foo", None);
1487     basetokens!(extract_basetoks6, "**/fo*o", None);
1488     basetokens!(
1489         extract_basetoks7,
1490         "**/fo*o",
1491         Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
1492         SLASHLIT
1493     );
1494 
1495     ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
1496     ext!(extract_ext2, "**/*.rs.bak", None);
1497     ext!(extract_ext3, "*.rs", Some(s(".rs")));
1498     ext!(extract_ext4, "a*.rs", None);
1499     ext!(extract_ext5, "/*.c", None);
1500     ext!(extract_ext6, "*.c", None, SLASHLIT);
1501     ext!(extract_ext7, "*.c", Some(s(".c")));
1502 
1503     required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1504     required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1505     required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1506     required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1507     required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1508     required_ext!(extract_req_ext6, "./rs", None);
1509     required_ext!(extract_req_ext7, "foo", None);
1510     required_ext!(extract_req_ext8, ".foo/", None);
1511     required_ext!(extract_req_ext9, "foo/", None);
1512 
1513     prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1514     prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1515     prefix!(extract_prefix3, "**/foo", None);
1516     prefix!(extract_prefix4, "foo/**", Some(s("foo/")));
1517 
1518     suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
1519     suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
1520     suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1521     suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false)));
1522     suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false)));
1523     suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1524     suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false)));
1525 
1526     baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1527     baseliteral!(extract_baselit2, "foo", None);
1528     baseliteral!(extract_baselit3, "*foo", None);
1529     baseliteral!(extract_baselit4, "*/foo", None);
1530 }
1531