1 use std::fmt;
2 use std::hash;
3 use std::iter;
4 use std::ops::{Deref, DerefMut};
5 use std::path::{is_separator, Path};
6 use std::str;
7
8 use regex;
9 use regex::bytes::Regex;
10
11 use crate::{new_regex, Candidate, Error, ErrorKind};
12
13 /// Describes a matching strategy for a particular pattern.
14 ///
15 /// This provides a way to more quickly determine whether a pattern matches
16 /// a particular file path in a way that scales with a large number of
17 /// patterns. For example, if many patterns are of the form `*.ext`, then it's
18 /// possible to test whether any of those patterns matches by looking up a
19 /// file path's extension in a hash table.
20 #[derive(Clone, Debug, Eq, PartialEq)]
21 pub enum MatchStrategy {
22 /// A pattern matches if and only if the entire file path matches this
23 /// literal string.
24 Literal(String),
25 /// A pattern matches if and only if the file path's basename matches this
26 /// literal string.
27 BasenameLiteral(String),
28 /// A pattern matches if and only if the file path's extension matches this
29 /// literal string.
30 Extension(String),
31 /// A pattern matches if and only if this prefix literal is a prefix of the
32 /// candidate file path.
33 Prefix(String),
34 /// A pattern matches if and only if this prefix literal is a prefix of the
35 /// candidate file path.
36 ///
37 /// An exception: if `component` is true, then `suffix` must appear at the
38 /// beginning of a file path or immediately following a `/`.
39 Suffix {
40 /// The actual suffix.
41 suffix: String,
42 /// Whether this must start at the beginning of a path component.
43 component: bool,
44 },
45 /// A pattern matches only if the given extension matches the file path's
46 /// extension. Note that this is a necessary but NOT sufficient criterion.
47 /// Namely, if the extension matches, then a full regex search is still
48 /// required.
49 RequiredExtension(String),
50 /// A regex needs to be used for matching.
51 Regex,
52 }
53
54 impl MatchStrategy {
55 /// Returns a matching strategy for the given pattern.
new(pat: &Glob) -> MatchStrategy56 pub fn new(pat: &Glob) -> MatchStrategy {
57 if let Some(lit) = pat.basename_literal() {
58 MatchStrategy::BasenameLiteral(lit)
59 } else if let Some(lit) = pat.literal() {
60 MatchStrategy::Literal(lit)
61 } else if let Some(ext) = pat.ext() {
62 MatchStrategy::Extension(ext)
63 } else if let Some(prefix) = pat.prefix() {
64 MatchStrategy::Prefix(prefix)
65 } else if let Some((suffix, component)) = pat.suffix() {
66 MatchStrategy::Suffix { suffix: suffix, component: component }
67 } else if let Some(ext) = pat.required_ext() {
68 MatchStrategy::RequiredExtension(ext)
69 } else {
70 MatchStrategy::Regex
71 }
72 }
73 }
74
75 /// Glob represents a successfully parsed shell glob pattern.
76 ///
77 /// It cannot be used directly to match file paths, but it can be converted
78 /// to a regular expression string or a matcher.
79 #[derive(Clone, Debug, Eq)]
80 pub struct Glob {
81 glob: String,
82 re: String,
83 opts: GlobOptions,
84 tokens: Tokens,
85 }
86
87 impl PartialEq for Glob {
eq(&self, other: &Glob) -> bool88 fn eq(&self, other: &Glob) -> bool {
89 self.glob == other.glob && self.opts == other.opts
90 }
91 }
92
93 impl hash::Hash for Glob {
hash<H: hash::Hasher>(&self, state: &mut H)94 fn hash<H: hash::Hasher>(&self, state: &mut H) {
95 self.glob.hash(state);
96 self.opts.hash(state);
97 }
98 }
99
100 impl fmt::Display for Glob {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result101 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102 self.glob.fmt(f)
103 }
104 }
105
106 impl str::FromStr for Glob {
107 type Err = Error;
108
from_str(glob: &str) -> Result<Self, Self::Err>109 fn from_str(glob: &str) -> Result<Self, Self::Err> {
110 Self::new(glob)
111 }
112 }
113
114 /// A matcher for a single pattern.
115 #[derive(Clone, Debug)]
116 pub struct GlobMatcher {
117 /// The underlying pattern.
118 pat: Glob,
119 /// The pattern, as a compiled regex.
120 re: Regex,
121 }
122
123 impl GlobMatcher {
124 /// Tests whether the given path matches this pattern or not.
is_match<P: AsRef<Path>>(&self, path: P) -> bool125 pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
126 self.is_match_candidate(&Candidate::new(path.as_ref()))
127 }
128
129 /// Tests whether the given path matches this pattern or not.
is_match_candidate(&self, path: &Candidate<'_>) -> bool130 pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
131 self.re.is_match(&path.path)
132 }
133
134 /// Returns the `Glob` used to compile this matcher.
glob(&self) -> &Glob135 pub fn glob(&self) -> &Glob {
136 &self.pat
137 }
138 }
139
140 /// A strategic matcher for a single pattern.
141 #[cfg(test)]
142 #[derive(Clone, Debug)]
143 struct GlobStrategic {
144 /// The match strategy to use.
145 strategy: MatchStrategy,
146 /// The underlying pattern.
147 pat: Glob,
148 /// The pattern, as a compiled regex.
149 re: Regex,
150 }
151
152 #[cfg(test)]
153 impl GlobStrategic {
154 /// Tests whether the given path matches this pattern or not.
is_match<P: AsRef<Path>>(&self, path: P) -> bool155 fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
156 self.is_match_candidate(&Candidate::new(path.as_ref()))
157 }
158
159 /// Tests whether the given path matches this pattern or not.
is_match_candidate(&self, candidate: &Candidate<'_>) -> bool160 fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
161 let byte_path = &*candidate.path;
162
163 match self.strategy {
164 MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
165 MatchStrategy::BasenameLiteral(ref lit) => {
166 lit.as_bytes() == &*candidate.basename
167 }
168 MatchStrategy::Extension(ref ext) => {
169 ext.as_bytes() == &*candidate.ext
170 }
171 MatchStrategy::Prefix(ref pre) => {
172 starts_with(pre.as_bytes(), byte_path)
173 }
174 MatchStrategy::Suffix { ref suffix, component } => {
175 if component && byte_path == &suffix.as_bytes()[1..] {
176 return true;
177 }
178 ends_with(suffix.as_bytes(), byte_path)
179 }
180 MatchStrategy::RequiredExtension(ref ext) => {
181 let ext = ext.as_bytes();
182 &*candidate.ext == ext && self.re.is_match(byte_path)
183 }
184 MatchStrategy::Regex => self.re.is_match(byte_path),
185 }
186 }
187 }
188
189 /// A builder for a pattern.
190 ///
191 /// This builder enables configuring the match semantics of a pattern. For
192 /// example, one can make matching case insensitive.
193 ///
194 /// The lifetime `'a` refers to the lifetime of the pattern string.
195 #[derive(Clone, Debug)]
196 pub struct GlobBuilder<'a> {
197 /// The glob pattern to compile.
198 glob: &'a str,
199 /// Options for the pattern.
200 opts: GlobOptions,
201 }
202
203 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
204 struct GlobOptions {
205 /// Whether to match case insensitively.
206 case_insensitive: bool,
207 /// Whether to require a literal separator to match a separator in a file
208 /// path. e.g., when enabled, `*` won't match `/`.
209 literal_separator: bool,
210 /// Whether or not to use `\` to escape special characters.
211 /// e.g., when enabled, `\*` will match a literal `*`.
212 backslash_escape: bool,
213 }
214
215 impl GlobOptions {
default() -> GlobOptions216 fn default() -> GlobOptions {
217 GlobOptions {
218 case_insensitive: false,
219 literal_separator: false,
220 backslash_escape: !is_separator('\\'),
221 }
222 }
223 }
224
225 #[derive(Clone, Debug, Default, Eq, PartialEq)]
226 struct Tokens(Vec<Token>);
227
228 impl Deref for Tokens {
229 type Target = Vec<Token>;
deref(&self) -> &Vec<Token>230 fn deref(&self) -> &Vec<Token> {
231 &self.0
232 }
233 }
234
235 impl DerefMut for Tokens {
deref_mut(&mut self) -> &mut Vec<Token>236 fn deref_mut(&mut self) -> &mut Vec<Token> {
237 &mut self.0
238 }
239 }
240
241 #[derive(Clone, Debug, Eq, PartialEq)]
242 enum Token {
243 Literal(char),
244 Any,
245 ZeroOrMore,
246 RecursivePrefix,
247 RecursiveSuffix,
248 RecursiveZeroOrMore,
249 Class { negated: bool, ranges: Vec<(char, char)> },
250 Alternates(Vec<Tokens>),
251 }
252
253 impl Glob {
254 /// Builds a new pattern with default options.
new(glob: &str) -> Result<Glob, Error>255 pub fn new(glob: &str) -> Result<Glob, Error> {
256 GlobBuilder::new(glob).build()
257 }
258
259 /// Returns a matcher for this pattern.
compile_matcher(&self) -> GlobMatcher260 pub fn compile_matcher(&self) -> GlobMatcher {
261 let re =
262 new_regex(&self.re).expect("regex compilation shouldn't fail");
263 GlobMatcher { pat: self.clone(), re: re }
264 }
265
266 /// Returns a strategic matcher.
267 ///
268 /// This isn't exposed because it's not clear whether it's actually
269 /// faster than just running a regex for a *single* pattern. If it
270 /// is faster, then GlobMatcher should do it automatically.
271 #[cfg(test)]
compile_strategic_matcher(&self) -> GlobStrategic272 fn compile_strategic_matcher(&self) -> GlobStrategic {
273 let strategy = MatchStrategy::new(self);
274 let re =
275 new_regex(&self.re).expect("regex compilation shouldn't fail");
276 GlobStrategic { strategy: strategy, pat: self.clone(), re: re }
277 }
278
279 /// Returns the original glob pattern used to build this pattern.
glob(&self) -> &str280 pub fn glob(&self) -> &str {
281 &self.glob
282 }
283
284 /// Returns the regular expression string for this glob.
285 ///
286 /// Note that regular expressions for globs are intended to be matched on
287 /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
288 /// particular, globs are frequently used on file paths, where there is no
289 /// general guarantee that file paths are themselves valid UTF-8. As a
290 /// result, callers will need to ensure that they are using a regex API
291 /// that can match on arbitrary bytes. For example, the
292 /// [`regex`](https://crates.io/regex)
293 /// crate's
294 /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
295 /// API is not suitable for this since it matches on `&str`, but its
296 /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
297 /// API is suitable for this.
regex(&self) -> &str298 pub fn regex(&self) -> &str {
299 &self.re
300 }
301
302 /// Returns the pattern as a literal if and only if the pattern must match
303 /// an entire path exactly.
304 ///
305 /// The basic format of these patterns is `{literal}`.
literal(&self) -> Option<String>306 fn literal(&self) -> Option<String> {
307 if self.opts.case_insensitive {
308 return None;
309 }
310 let mut lit = String::new();
311 for t in &*self.tokens {
312 match *t {
313 Token::Literal(c) => lit.push(c),
314 _ => return None,
315 }
316 }
317 if lit.is_empty() {
318 None
319 } else {
320 Some(lit)
321 }
322 }
323
324 /// Returns an extension if this pattern matches a file path if and only
325 /// if the file path has the extension returned.
326 ///
327 /// Note that this extension returned differs from the extension that
328 /// std::path::Path::extension returns. Namely, this extension includes
329 /// the '.'. Also, paths like `.rs` are considered to have an extension
330 /// of `.rs`.
ext(&self) -> Option<String>331 fn ext(&self) -> Option<String> {
332 if self.opts.case_insensitive {
333 return None;
334 }
335 let start = match self.tokens.get(0) {
336 Some(&Token::RecursivePrefix) => 1,
337 Some(_) => 0,
338 _ => return None,
339 };
340 match self.tokens.get(start) {
341 Some(&Token::ZeroOrMore) => {
342 // If there was no recursive prefix, then we only permit
343 // `*` if `*` can match a `/`. For example, if `*` can't
344 // match `/`, then `*.c` doesn't match `foo/bar.c`.
345 if start == 0 && self.opts.literal_separator {
346 return None;
347 }
348 }
349 _ => return None,
350 }
351 match self.tokens.get(start + 1) {
352 Some(&Token::Literal('.')) => {}
353 _ => return None,
354 }
355 let mut lit = ".".to_string();
356 for t in self.tokens[start + 2..].iter() {
357 match *t {
358 Token::Literal('.') | Token::Literal('/') => return None,
359 Token::Literal(c) => lit.push(c),
360 _ => return None,
361 }
362 }
363 if lit.is_empty() {
364 None
365 } else {
366 Some(lit)
367 }
368 }
369
370 /// This is like `ext`, but returns an extension even if it isn't sufficient
371 /// to imply a match. Namely, if an extension is returned, then it is
372 /// necessary but not sufficient for a match.
required_ext(&self) -> Option<String>373 fn required_ext(&self) -> Option<String> {
374 if self.opts.case_insensitive {
375 return None;
376 }
377 // We don't care at all about the beginning of this pattern. All we
378 // need to check for is if it ends with a literal of the form `.ext`.
379 let mut ext: Vec<char> = vec![]; // built in reverse
380 for t in self.tokens.iter().rev() {
381 match *t {
382 Token::Literal('/') => return None,
383 Token::Literal(c) => {
384 ext.push(c);
385 if c == '.' {
386 break;
387 }
388 }
389 _ => return None,
390 }
391 }
392 if ext.last() != Some(&'.') {
393 None
394 } else {
395 ext.reverse();
396 Some(ext.into_iter().collect())
397 }
398 }
399
400 /// Returns a literal prefix of this pattern if the entire pattern matches
401 /// if the literal prefix matches.
prefix(&self) -> Option<String>402 fn prefix(&self) -> Option<String> {
403 if self.opts.case_insensitive {
404 return None;
405 }
406 let (end, need_sep) = match self.tokens.last() {
407 Some(&Token::ZeroOrMore) => {
408 if self.opts.literal_separator {
409 // If a trailing `*` can't match a `/`, then we can't
410 // assume a match of the prefix corresponds to a match
411 // of the overall pattern. e.g., `foo/*` with
412 // `literal_separator` enabled matches `foo/bar` but not
413 // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
414 // literal prefix.
415 return None;
416 }
417 (self.tokens.len() - 1, false)
418 }
419 Some(&Token::RecursiveSuffix) => (self.tokens.len() - 1, true),
420 _ => (self.tokens.len(), false),
421 };
422 let mut lit = String::new();
423 for t in &self.tokens[0..end] {
424 match *t {
425 Token::Literal(c) => lit.push(c),
426 _ => return None,
427 }
428 }
429 if need_sep {
430 lit.push('/');
431 }
432 if lit.is_empty() {
433 None
434 } else {
435 Some(lit)
436 }
437 }
438
439 /// Returns a literal suffix of this pattern if the entire pattern matches
440 /// if the literal suffix matches.
441 ///
442 /// If a literal suffix is returned and it must match either the entire
443 /// file path or be preceded by a `/`, then also return true. This happens
444 /// with a pattern like `**/foo/bar`. Namely, this pattern matches
445 /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
446 /// suffix returned is `/foo/bar` (but should match the entire path
447 /// `foo/bar`).
448 ///
449 /// When this returns true, the suffix literal is guaranteed to start with
450 /// a `/`.
suffix(&self) -> Option<(String, bool)>451 fn suffix(&self) -> Option<(String, bool)> {
452 if self.opts.case_insensitive {
453 return None;
454 }
455 let mut lit = String::new();
456 let (start, entire) = match self.tokens.get(0) {
457 Some(&Token::RecursivePrefix) => {
458 // We only care if this follows a path component if the next
459 // token is a literal.
460 if let Some(&Token::Literal(_)) = self.tokens.get(1) {
461 lit.push('/');
462 (1, true)
463 } else {
464 (1, false)
465 }
466 }
467 _ => (0, false),
468 };
469 let start = match self.tokens.get(start) {
470 Some(&Token::ZeroOrMore) => {
471 // If literal_separator is enabled, then a `*` can't
472 // necessarily match everything, so reporting a suffix match
473 // as a match of the pattern would be a false positive.
474 if self.opts.literal_separator {
475 return None;
476 }
477 start + 1
478 }
479 _ => start,
480 };
481 for t in &self.tokens[start..] {
482 match *t {
483 Token::Literal(c) => lit.push(c),
484 _ => return None,
485 }
486 }
487 if lit.is_empty() || lit == "/" {
488 None
489 } else {
490 Some((lit, entire))
491 }
492 }
493
494 /// If this pattern only needs to inspect the basename of a file path,
495 /// then the tokens corresponding to only the basename match are returned.
496 ///
497 /// For example, given a pattern of `**/*.foo`, only the tokens
498 /// corresponding to `*.foo` are returned.
499 ///
500 /// Note that this will return None if any match of the basename tokens
501 /// doesn't correspond to a match of the entire pattern. For example, the
502 /// glob `foo` only matches when a file path has a basename of `foo`, but
503 /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
504 /// `foo` doesn't match `abc/foo`.
basename_tokens(&self) -> Option<&[Token]>505 fn basename_tokens(&self) -> Option<&[Token]> {
506 if self.opts.case_insensitive {
507 return None;
508 }
509 let start = match self.tokens.get(0) {
510 Some(&Token::RecursivePrefix) => 1,
511 _ => {
512 // With nothing to gobble up the parent portion of a path,
513 // we can't assume that matching on only the basename is
514 // correct.
515 return None;
516 }
517 };
518 if self.tokens[start..].is_empty() {
519 return None;
520 }
521 for t in &self.tokens[start..] {
522 match *t {
523 Token::Literal('/') => return None,
524 Token::Literal(_) => {} // OK
525 Token::Any | Token::ZeroOrMore => {
526 if !self.opts.literal_separator {
527 // In this case, `*` and `?` can match a path
528 // separator, which means this could reach outside
529 // the basename.
530 return None;
531 }
532 }
533 Token::RecursivePrefix
534 | Token::RecursiveSuffix
535 | Token::RecursiveZeroOrMore => {
536 return None;
537 }
538 Token::Class { .. } | Token::Alternates(..) => {
539 // We *could* be a little smarter here, but either one
540 // of these is going to prevent our literal optimizations
541 // anyway, so give up.
542 return None;
543 }
544 }
545 }
546 Some(&self.tokens[start..])
547 }
548
549 /// Returns the pattern as a literal if and only if the pattern exclusively
550 /// matches the basename of a file path *and* is a literal.
551 ///
552 /// The basic format of these patterns is `**/{literal}`, where `{literal}`
553 /// does not contain a path separator.
basename_literal(&self) -> Option<String>554 fn basename_literal(&self) -> Option<String> {
555 let tokens = match self.basename_tokens() {
556 None => return None,
557 Some(tokens) => tokens,
558 };
559 let mut lit = String::new();
560 for t in tokens {
561 match *t {
562 Token::Literal(c) => lit.push(c),
563 _ => return None,
564 }
565 }
566 Some(lit)
567 }
568 }
569
570 impl<'a> GlobBuilder<'a> {
571 /// Create a new builder for the pattern given.
572 ///
573 /// The pattern is not compiled until `build` is called.
new(glob: &'a str) -> GlobBuilder<'a>574 pub fn new(glob: &'a str) -> GlobBuilder<'a> {
575 GlobBuilder { glob: glob, opts: GlobOptions::default() }
576 }
577
578 /// Parses and builds the pattern.
build(&self) -> Result<Glob, Error>579 pub fn build(&self) -> Result<Glob, Error> {
580 let mut p = Parser {
581 glob: &self.glob,
582 stack: vec![Tokens::default()],
583 chars: self.glob.chars().peekable(),
584 prev: None,
585 cur: None,
586 opts: &self.opts,
587 };
588 p.parse()?;
589 if p.stack.is_empty() {
590 Err(Error {
591 glob: Some(self.glob.to_string()),
592 kind: ErrorKind::UnopenedAlternates,
593 })
594 } else if p.stack.len() > 1 {
595 Err(Error {
596 glob: Some(self.glob.to_string()),
597 kind: ErrorKind::UnclosedAlternates,
598 })
599 } else {
600 let tokens = p.stack.pop().unwrap();
601 Ok(Glob {
602 glob: self.glob.to_string(),
603 re: tokens.to_regex_with(&self.opts),
604 opts: self.opts,
605 tokens: tokens,
606 })
607 }
608 }
609
610 /// Toggle whether the pattern matches case insensitively or not.
611 ///
612 /// This is disabled by default.
case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a>613 pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
614 self.opts.case_insensitive = yes;
615 self
616 }
617
618 /// Toggle whether a literal `/` is required to match a path separator.
619 ///
620 /// By default this is false: `*` and `?` will match `/`.
literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a>621 pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
622 self.opts.literal_separator = yes;
623 self
624 }
625
626 /// When enabled, a back slash (`\`) may be used to escape
627 /// special characters in a glob pattern. Additionally, this will
628 /// prevent `\` from being interpreted as a path separator on all
629 /// platforms.
630 ///
631 /// This is enabled by default on platforms where `\` is not a
632 /// path separator and disabled by default on platforms where `\`
633 /// is a path separator.
backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a>634 pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
635 self.opts.backslash_escape = yes;
636 self
637 }
638 }
639
640 impl Tokens {
641 /// Convert this pattern to a string that is guaranteed to be a valid
642 /// regular expression and will represent the matching semantics of this
643 /// glob pattern and the options given.
to_regex_with(&self, options: &GlobOptions) -> String644 fn to_regex_with(&self, options: &GlobOptions) -> String {
645 let mut re = String::new();
646 re.push_str("(?-u)");
647 if options.case_insensitive {
648 re.push_str("(?i)");
649 }
650 re.push('^');
651 // Special case. If the entire glob is just `**`, then it should match
652 // everything.
653 if self.len() == 1 && self[0] == Token::RecursivePrefix {
654 re.push_str(".*");
655 re.push('$');
656 return re;
657 }
658 self.tokens_to_regex(options, &self, &mut re);
659 re.push('$');
660 re
661 }
662
tokens_to_regex( &self, options: &GlobOptions, tokens: &[Token], re: &mut String, )663 fn tokens_to_regex(
664 &self,
665 options: &GlobOptions,
666 tokens: &[Token],
667 re: &mut String,
668 ) {
669 for tok in tokens {
670 match *tok {
671 Token::Literal(c) => {
672 re.push_str(&char_to_escaped_literal(c));
673 }
674 Token::Any => {
675 if options.literal_separator {
676 re.push_str("[^/]");
677 } else {
678 re.push_str(".");
679 }
680 }
681 Token::ZeroOrMore => {
682 if options.literal_separator {
683 re.push_str("[^/]*");
684 } else {
685 re.push_str(".*");
686 }
687 }
688 Token::RecursivePrefix => {
689 re.push_str("(?:/?|.*/)");
690 }
691 Token::RecursiveSuffix => {
692 re.push_str("/.*");
693 }
694 Token::RecursiveZeroOrMore => {
695 re.push_str("(?:/|/.*/)");
696 }
697 Token::Class { negated, ref ranges } => {
698 re.push('[');
699 if negated {
700 re.push('^');
701 }
702 for r in ranges {
703 if r.0 == r.1 {
704 // Not strictly necessary, but nicer to look at.
705 re.push_str(&char_to_escaped_literal(r.0));
706 } else {
707 re.push_str(&char_to_escaped_literal(r.0));
708 re.push('-');
709 re.push_str(&char_to_escaped_literal(r.1));
710 }
711 }
712 re.push(']');
713 }
714 Token::Alternates(ref patterns) => {
715 let mut parts = vec![];
716 for pat in patterns {
717 let mut altre = String::new();
718 self.tokens_to_regex(options, &pat, &mut altre);
719 if !altre.is_empty() {
720 parts.push(altre);
721 }
722 }
723
724 // It is possible to have an empty set in which case the
725 // resulting alternation '()' would be an error.
726 if !parts.is_empty() {
727 re.push('(');
728 re.push_str(&parts.join("|"));
729 re.push(')');
730 }
731 }
732 }
733 }
734 }
735 }
736
737 /// Convert a Unicode scalar value to an escaped string suitable for use as
738 /// a literal in a non-Unicode regex.
char_to_escaped_literal(c: char) -> String739 fn char_to_escaped_literal(c: char) -> String {
740 bytes_to_escaped_literal(&c.to_string().into_bytes())
741 }
742
743 /// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
744 /// code units are converted to their escaped form.
bytes_to_escaped_literal(bs: &[u8]) -> String745 fn bytes_to_escaped_literal(bs: &[u8]) -> String {
746 let mut s = String::with_capacity(bs.len());
747 for &b in bs {
748 if b <= 0x7F {
749 s.push_str(®ex::escape(&(b as char).to_string()));
750 } else {
751 s.push_str(&format!("\\x{:02x}", b));
752 }
753 }
754 s
755 }
756
757 struct Parser<'a> {
758 glob: &'a str,
759 stack: Vec<Tokens>,
760 chars: iter::Peekable<str::Chars<'a>>,
761 prev: Option<char>,
762 cur: Option<char>,
763 opts: &'a GlobOptions,
764 }
765
766 impl<'a> Parser<'a> {
error(&self, kind: ErrorKind) -> Error767 fn error(&self, kind: ErrorKind) -> Error {
768 Error { glob: Some(self.glob.to_string()), kind: kind }
769 }
770
parse(&mut self) -> Result<(), Error>771 fn parse(&mut self) -> Result<(), Error> {
772 while let Some(c) = self.bump() {
773 match c {
774 '?' => self.push_token(Token::Any)?,
775 '*' => self.parse_star()?,
776 '[' => self.parse_class()?,
777 '{' => self.push_alternate()?,
778 '}' => self.pop_alternate()?,
779 ',' => self.parse_comma()?,
780 '\\' => self.parse_backslash()?,
781 c => self.push_token(Token::Literal(c))?,
782 }
783 }
784 Ok(())
785 }
786
push_alternate(&mut self) -> Result<(), Error>787 fn push_alternate(&mut self) -> Result<(), Error> {
788 if self.stack.len() > 1 {
789 return Err(self.error(ErrorKind::NestedAlternates));
790 }
791 Ok(self.stack.push(Tokens::default()))
792 }
793
pop_alternate(&mut self) -> Result<(), Error>794 fn pop_alternate(&mut self) -> Result<(), Error> {
795 let mut alts = vec![];
796 while self.stack.len() >= 2 {
797 alts.push(self.stack.pop().unwrap());
798 }
799 self.push_token(Token::Alternates(alts))
800 }
801
push_token(&mut self, tok: Token) -> Result<(), Error>802 fn push_token(&mut self, tok: Token) -> Result<(), Error> {
803 if let Some(ref mut pat) = self.stack.last_mut() {
804 return Ok(pat.push(tok));
805 }
806 Err(self.error(ErrorKind::UnopenedAlternates))
807 }
808
pop_token(&mut self) -> Result<Token, Error>809 fn pop_token(&mut self) -> Result<Token, Error> {
810 if let Some(ref mut pat) = self.stack.last_mut() {
811 return Ok(pat.pop().unwrap());
812 }
813 Err(self.error(ErrorKind::UnopenedAlternates))
814 }
815
have_tokens(&self) -> Result<bool, Error>816 fn have_tokens(&self) -> Result<bool, Error> {
817 match self.stack.last() {
818 None => Err(self.error(ErrorKind::UnopenedAlternates)),
819 Some(ref pat) => Ok(!pat.is_empty()),
820 }
821 }
822
parse_comma(&mut self) -> Result<(), Error>823 fn parse_comma(&mut self) -> Result<(), Error> {
824 // If we aren't inside a group alternation, then don't
825 // treat commas specially. Otherwise, we need to start
826 // a new alternate.
827 if self.stack.len() <= 1 {
828 self.push_token(Token::Literal(','))
829 } else {
830 Ok(self.stack.push(Tokens::default()))
831 }
832 }
833
parse_backslash(&mut self) -> Result<(), Error>834 fn parse_backslash(&mut self) -> Result<(), Error> {
835 if self.opts.backslash_escape {
836 match self.bump() {
837 None => Err(self.error(ErrorKind::DanglingEscape)),
838 Some(c) => self.push_token(Token::Literal(c)),
839 }
840 } else if is_separator('\\') {
841 // Normalize all patterns to use / as a separator.
842 self.push_token(Token::Literal('/'))
843 } else {
844 self.push_token(Token::Literal('\\'))
845 }
846 }
847
parse_star(&mut self) -> Result<(), Error>848 fn parse_star(&mut self) -> Result<(), Error> {
849 let prev = self.prev;
850 if self.peek() != Some('*') {
851 self.push_token(Token::ZeroOrMore)?;
852 return Ok(());
853 }
854 assert!(self.bump() == Some('*'));
855 if !self.have_tokens()? {
856 if !self.peek().map_or(true, is_separator) {
857 self.push_token(Token::ZeroOrMore)?;
858 self.push_token(Token::ZeroOrMore)?;
859 } else {
860 self.push_token(Token::RecursivePrefix)?;
861 assert!(self.bump().map_or(true, is_separator));
862 }
863 return Ok(());
864 }
865
866 if !prev.map(is_separator).unwrap_or(false) {
867 if self.stack.len() <= 1
868 || (prev != Some(',') && prev != Some('{'))
869 {
870 self.push_token(Token::ZeroOrMore)?;
871 self.push_token(Token::ZeroOrMore)?;
872 return Ok(());
873 }
874 }
875 let is_suffix = match self.peek() {
876 None => {
877 assert!(self.bump().is_none());
878 true
879 }
880 Some(',') | Some('}') if self.stack.len() >= 2 => true,
881 Some(c) if is_separator(c) => {
882 assert!(self.bump().map(is_separator).unwrap_or(false));
883 false
884 }
885 _ => {
886 self.push_token(Token::ZeroOrMore)?;
887 self.push_token(Token::ZeroOrMore)?;
888 return Ok(());
889 }
890 };
891 match self.pop_token()? {
892 Token::RecursivePrefix => {
893 self.push_token(Token::RecursivePrefix)?;
894 }
895 Token::RecursiveSuffix => {
896 self.push_token(Token::RecursiveSuffix)?;
897 }
898 _ => {
899 if is_suffix {
900 self.push_token(Token::RecursiveSuffix)?;
901 } else {
902 self.push_token(Token::RecursiveZeroOrMore)?;
903 }
904 }
905 }
906 Ok(())
907 }
908
parse_class(&mut self) -> Result<(), Error>909 fn parse_class(&mut self) -> Result<(), Error> {
910 fn add_to_last_range(
911 glob: &str,
912 r: &mut (char, char),
913 add: char,
914 ) -> Result<(), Error> {
915 r.1 = add;
916 if r.1 < r.0 {
917 Err(Error {
918 glob: Some(glob.to_string()),
919 kind: ErrorKind::InvalidRange(r.0, r.1),
920 })
921 } else {
922 Ok(())
923 }
924 }
925 let mut ranges = vec![];
926 let negated = match self.chars.peek() {
927 Some(&'!') | Some(&'^') => {
928 let bump = self.bump();
929 assert!(bump == Some('!') || bump == Some('^'));
930 true
931 }
932 _ => false,
933 };
934 let mut first = true;
935 let mut in_range = false;
936 loop {
937 let c = match self.bump() {
938 Some(c) => c,
939 // The only way to successfully break this loop is to observe
940 // a ']'.
941 None => return Err(self.error(ErrorKind::UnclosedClass)),
942 };
943 match c {
944 ']' => {
945 if first {
946 ranges.push((']', ']'));
947 } else {
948 break;
949 }
950 }
951 '-' => {
952 if first {
953 ranges.push(('-', '-'));
954 } else if in_range {
955 // invariant: in_range is only set when there is
956 // already at least one character seen.
957 let r = ranges.last_mut().unwrap();
958 add_to_last_range(&self.glob, r, '-')?;
959 in_range = false;
960 } else {
961 assert!(!ranges.is_empty());
962 in_range = true;
963 }
964 }
965 c => {
966 if in_range {
967 // invariant: in_range is only set when there is
968 // already at least one character seen.
969 add_to_last_range(
970 &self.glob,
971 ranges.last_mut().unwrap(),
972 c,
973 )?;
974 } else {
975 ranges.push((c, c));
976 }
977 in_range = false;
978 }
979 }
980 first = false;
981 }
982 if in_range {
983 // Means that the last character in the class was a '-', so add
984 // it as a literal.
985 ranges.push(('-', '-'));
986 }
987 self.push_token(Token::Class { negated: negated, ranges: ranges })
988 }
989
bump(&mut self) -> Option<char>990 fn bump(&mut self) -> Option<char> {
991 self.prev = self.cur;
992 self.cur = self.chars.next();
993 self.cur
994 }
995
peek(&mut self) -> Option<char>996 fn peek(&mut self) -> Option<char> {
997 self.chars.peek().map(|&ch| ch)
998 }
999 }
1000
1001 #[cfg(test)]
starts_with(needle: &[u8], haystack: &[u8]) -> bool1002 fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
1003 needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
1004 }
1005
1006 #[cfg(test)]
ends_with(needle: &[u8], haystack: &[u8]) -> bool1007 fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
1008 if needle.len() > haystack.len() {
1009 return false;
1010 }
1011 needle == &haystack[haystack.len() - needle.len()..]
1012 }
1013
1014 #[cfg(test)]
1015 mod tests {
1016 use super::Token::*;
1017 use super::{Glob, GlobBuilder, Token};
1018 use crate::{ErrorKind, GlobSetBuilder};
1019
1020 #[derive(Clone, Copy, Debug, Default)]
1021 struct Options {
1022 casei: Option<bool>,
1023 litsep: Option<bool>,
1024 bsesc: Option<bool>,
1025 }
1026
1027 macro_rules! syntax {
1028 ($name:ident, $pat:expr, $tokens:expr) => {
1029 #[test]
1030 fn $name() {
1031 let pat = Glob::new($pat).unwrap();
1032 assert_eq!($tokens, pat.tokens.0);
1033 }
1034 };
1035 }
1036
1037 macro_rules! syntaxerr {
1038 ($name:ident, $pat:expr, $err:expr) => {
1039 #[test]
1040 fn $name() {
1041 let err = Glob::new($pat).unwrap_err();
1042 assert_eq!(&$err, err.kind());
1043 }
1044 };
1045 }
1046
1047 macro_rules! toregex {
1048 ($name:ident, $pat:expr, $re:expr) => {
1049 toregex!($name, $pat, $re, Options::default());
1050 };
1051 ($name:ident, $pat:expr, $re:expr, $options:expr) => {
1052 #[test]
1053 fn $name() {
1054 let mut builder = GlobBuilder::new($pat);
1055 if let Some(casei) = $options.casei {
1056 builder.case_insensitive(casei);
1057 }
1058 if let Some(litsep) = $options.litsep {
1059 builder.literal_separator(litsep);
1060 }
1061 if let Some(bsesc) = $options.bsesc {
1062 builder.backslash_escape(bsesc);
1063 }
1064 let pat = builder.build().unwrap();
1065 assert_eq!(format!("(?-u){}", $re), pat.regex());
1066 }
1067 };
1068 }
1069
1070 macro_rules! matches {
1071 ($name:ident, $pat:expr, $path:expr) => {
1072 matches!($name, $pat, $path, Options::default());
1073 };
1074 ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1075 #[test]
1076 fn $name() {
1077 let mut builder = GlobBuilder::new($pat);
1078 if let Some(casei) = $options.casei {
1079 builder.case_insensitive(casei);
1080 }
1081 if let Some(litsep) = $options.litsep {
1082 builder.literal_separator(litsep);
1083 }
1084 if let Some(bsesc) = $options.bsesc {
1085 builder.backslash_escape(bsesc);
1086 }
1087 let pat = builder.build().unwrap();
1088 let matcher = pat.compile_matcher();
1089 let strategic = pat.compile_strategic_matcher();
1090 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1091 assert!(matcher.is_match($path));
1092 assert!(strategic.is_match($path));
1093 assert!(set.is_match($path));
1094 }
1095 };
1096 }
1097
1098 macro_rules! nmatches {
1099 ($name:ident, $pat:expr, $path:expr) => {
1100 nmatches!($name, $pat, $path, Options::default());
1101 };
1102 ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1103 #[test]
1104 fn $name() {
1105 let mut builder = GlobBuilder::new($pat);
1106 if let Some(casei) = $options.casei {
1107 builder.case_insensitive(casei);
1108 }
1109 if let Some(litsep) = $options.litsep {
1110 builder.literal_separator(litsep);
1111 }
1112 if let Some(bsesc) = $options.bsesc {
1113 builder.backslash_escape(bsesc);
1114 }
1115 let pat = builder.build().unwrap();
1116 let matcher = pat.compile_matcher();
1117 let strategic = pat.compile_strategic_matcher();
1118 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1119 assert!(!matcher.is_match($path));
1120 assert!(!strategic.is_match($path));
1121 assert!(!set.is_match($path));
1122 }
1123 };
1124 }
1125
s(string: &str) -> String1126 fn s(string: &str) -> String {
1127 string.to_string()
1128 }
1129
class(s: char, e: char) -> Token1130 fn class(s: char, e: char) -> Token {
1131 Class { negated: false, ranges: vec![(s, e)] }
1132 }
1133
classn(s: char, e: char) -> Token1134 fn classn(s: char, e: char) -> Token {
1135 Class { negated: true, ranges: vec![(s, e)] }
1136 }
1137
rclass(ranges: &[(char, char)]) -> Token1138 fn rclass(ranges: &[(char, char)]) -> Token {
1139 Class { negated: false, ranges: ranges.to_vec() }
1140 }
1141
rclassn(ranges: &[(char, char)]) -> Token1142 fn rclassn(ranges: &[(char, char)]) -> Token {
1143 Class { negated: true, ranges: ranges.to_vec() }
1144 }
1145
1146 syntax!(literal1, "a", vec![Literal('a')]);
1147 syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1148 syntax!(any1, "?", vec![Any]);
1149 syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1150 syntax!(seq1, "*", vec![ZeroOrMore]);
1151 syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1152 syntax!(
1153 seq3,
1154 "*a*b*",
1155 vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
1156 );
1157 syntax!(rseq1, "**", vec![RecursivePrefix]);
1158 syntax!(rseq2, "**/", vec![RecursivePrefix]);
1159 syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1160 syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1161 syntax!(
1162 rseq5,
1163 "a/**/b",
1164 vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
1165 );
1166 syntax!(cls1, "[a]", vec![class('a', 'a')]);
1167 syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1168 syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1169 syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1170 syntax!(cls5, "[-]", vec![class('-', '-')]);
1171 syntax!(cls6, "[]]", vec![class(']', ']')]);
1172 syntax!(cls7, "[*]", vec![class('*', '*')]);
1173 syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1174 syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1175 syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1176 syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1177 syntax!(
1178 cls12,
1179 "[-a-z-]",
1180 vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
1181 );
1182 syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1183 syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1184 syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1185 syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1186 syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1187 syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1188 syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1189 syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1190 syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1191
1192 syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1193 syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1194 syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1195 syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1196 syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1197 syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1198
1199 const CASEI: Options =
1200 Options { casei: Some(true), litsep: None, bsesc: None };
1201 const SLASHLIT: Options =
1202 Options { casei: None, litsep: Some(true), bsesc: None };
1203 const NOBSESC: Options =
1204 Options { casei: None, litsep: None, bsesc: Some(false) };
1205 const BSESC: Options =
1206 Options { casei: None, litsep: None, bsesc: Some(true) };
1207
1208 toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1209
1210 toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1211 toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);
1212
1213 toregex!(re1, "a", "^a$");
1214 toregex!(re2, "?", "^.$");
1215 toregex!(re3, "*", "^.*$");
1216 toregex!(re4, "a?", "^a.$");
1217 toregex!(re5, "?a", "^.a$");
1218 toregex!(re6, "a*", "^a.*$");
1219 toregex!(re7, "*a", "^.*a$");
1220 toregex!(re8, "[*]", r"^[\*]$");
1221 toregex!(re9, "[+]", r"^[\+]$");
1222 toregex!(re10, "+", r"^\+$");
1223 toregex!(re11, "☃", r"^\xe2\x98\x83$");
1224 toregex!(re12, "**", r"^.*$");
1225 toregex!(re13, "**/", r"^.*$");
1226 toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
1227 toregex!(re15, "**/**", r"^.*$");
1228 toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
1229 toregex!(re17, "**/**/**", r"^.*$");
1230 toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
1231 toregex!(re19, "a/**", r"^a/.*$");
1232 toregex!(re20, "a/**/**", r"^a/.*$");
1233 toregex!(re21, "a/**/**/**", r"^a/.*$");
1234 toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
1235 toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
1236 toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
1237 toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
1238 toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
1239 toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
1240 toregex!(re28, "a**", r"^a.*.*$");
1241 toregex!(re29, "**a", r"^.*.*a$");
1242 toregex!(re30, "a**b", r"^a.*.*b$");
1243 toregex!(re31, "***", r"^.*.*.*$");
1244 toregex!(re32, "/a**", r"^/a.*.*$");
1245 toregex!(re33, "/**a", r"^/.*.*a$");
1246 toregex!(re34, "/a**b", r"^/a.*.*b$");
1247
1248 matches!(match1, "a", "a");
1249 matches!(match2, "a*b", "a_b");
1250 matches!(match3, "a*b*c", "abc");
1251 matches!(match4, "a*b*c", "a_b_c");
1252 matches!(match5, "a*b*c", "a___b___c");
1253 matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc");
1254 matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1255 matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
1256 matches!(match9, "*.rs", ".rs");
1257 matches!(match10, "☃", "☃");
1258
1259 matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1260 matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1261 matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1262 matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1263 matches!(matchrec5, "**", "abcde");
1264 matches!(matchrec6, "**", "");
1265 matches!(matchrec7, "**", ".asdf");
1266 matches!(matchrec8, "**", "/x/.asdf");
1267 matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt");
1268 matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt");
1269 matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt");
1270 matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt");
1271 matches!(matchrec13, "**/test", "one/two/test");
1272 matches!(matchrec14, "**/test", "one/test");
1273 matches!(matchrec15, "**/test", "test");
1274 matches!(matchrec16, "/**/test", "/one/two/test");
1275 matches!(matchrec17, "/**/test", "/one/test");
1276 matches!(matchrec18, "/**/test", "/test");
1277 matches!(matchrec19, "**/.*", ".abc");
1278 matches!(matchrec20, "**/.*", "abc/.abc");
1279 matches!(matchrec21, "**/foo/bar", "foo/bar");
1280 matches!(matchrec22, ".*/**", ".abc/abc");
1281 matches!(matchrec23, "test/**", "test/");
1282 matches!(matchrec24, "test/**", "test/one");
1283 matches!(matchrec25, "test/**", "test/one/two");
1284 matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt");
1285
1286 matches!(matchrange1, "a[0-9]b", "a0b");
1287 matches!(matchrange2, "a[0-9]b", "a9b");
1288 matches!(matchrange3, "a[!0-9]b", "a_b");
1289 matches!(matchrange4, "[a-z123]", "1");
1290 matches!(matchrange5, "[1a-z23]", "1");
1291 matches!(matchrange6, "[123a-z]", "1");
1292 matches!(matchrange7, "[abc-]", "-");
1293 matches!(matchrange8, "[-abc]", "-");
1294 matches!(matchrange9, "[-a-c]", "b");
1295 matches!(matchrange10, "[a-c-]", "b");
1296 matches!(matchrange11, "[-]", "-");
1297 matches!(matchrange12, "a[^0-9]b", "a_b");
1298
1299 matches!(matchpat1, "*hello.txt", "hello.txt");
1300 matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1301 matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1302 matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
1303 matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1304 matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1305 matches!(
1306 matchpat7,
1307 "*some/path/to/hello.txt",
1308 "a/bigger/some/path/to/hello.txt"
1309 );
1310
1311 matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
1312
1313 matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1314 matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1315 matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1316 matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1317
1318 matches!(matchalt1, "a,b", "a,b");
1319 matches!(matchalt2, ",", ",");
1320 matches!(matchalt3, "{a,b}", "a");
1321 matches!(matchalt4, "{a,b}", "b");
1322 matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
1323 matches!(matchalt6, "{**/src/**,foo}", "foo");
1324 matches!(matchalt7, "{[}],foo}", "}");
1325 matches!(matchalt8, "{foo}", "foo");
1326 matches!(matchalt9, "{}", "");
1327 matches!(matchalt10, "{,}", "");
1328 matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
1329 matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
1330 matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
1331
1332 matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1333 #[cfg(unix)]
1334 nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1335 #[cfg(not(unix))]
1336 nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
1337 nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1338 matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1339 #[cfg(unix)]
1340 nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1341 #[cfg(not(unix))]
1342 matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1343
1344 matches!(matchbackslash1, "\\[", "[", BSESC);
1345 matches!(matchbackslash2, "\\?", "?", BSESC);
1346 matches!(matchbackslash3, "\\*", "*", BSESC);
1347 matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
1348 matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
1349 matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
1350 #[cfg(unix)]
1351 matches!(matchbackslash7, "\\a", "a");
1352 #[cfg(not(unix))]
1353 matches!(matchbackslash8, "\\a", "/a");
1354
1355 nmatches!(matchnot1, "a*b*c", "abcd");
1356 nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
1357 nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1358 nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt");
1359 nmatches!(matchnot5, "/**/test", "test");
1360 nmatches!(matchnot6, "/**/test", "/one/notthis");
1361 nmatches!(matchnot7, "/**/test", "/notthis");
1362 nmatches!(matchnot8, "**/.*", "ab.c");
1363 nmatches!(matchnot9, "**/.*", "abc/ab.c");
1364 nmatches!(matchnot10, ".*/**", "a.bc");
1365 nmatches!(matchnot11, ".*/**", "abc/a.bc");
1366 nmatches!(matchnot12, "a[0-9]b", "a_b");
1367 nmatches!(matchnot13, "a[!0-9]b", "a0b");
1368 nmatches!(matchnot14, "a[!0-9]b", "a9b");
1369 nmatches!(matchnot15, "[!-]", "-");
1370 nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1371 nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1372 nmatches!(
1373 matchnot18,
1374 "*some/path/to/hello.txt",
1375 "some/path/to/hello.txt-and-then-some"
1376 );
1377 nmatches!(
1378 matchnot19,
1379 "*some/path/to/hello.txt",
1380 "some/other/path/to/hello.txt"
1381 );
1382 nmatches!(matchnot20, "a", "foo/a");
1383 nmatches!(matchnot21, "./foo", "foo");
1384 nmatches!(matchnot22, "**/foo", "foofoo");
1385 nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1386 nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1387 nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1388 nmatches!(
1389 matchnot26,
1390 "**/m4/ltoptions.m4",
1391 "csharp/src/packages/repositories.config",
1392 SLASHLIT
1393 );
1394 nmatches!(matchnot27, "a[^0-9]b", "a0b");
1395 nmatches!(matchnot28, "a[^0-9]b", "a9b");
1396 nmatches!(matchnot29, "[^-]", "-");
1397 nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1398 nmatches!(
1399 matchrec31,
1400 "some/*/needle.txt",
1401 "some/one/two/needle.txt",
1402 SLASHLIT
1403 );
1404 nmatches!(
1405 matchrec32,
1406 "some/*/needle.txt",
1407 "some/one/two/three/needle.txt",
1408 SLASHLIT
1409 );
1410 nmatches!(matchrec33, ".*/**", ".abc");
1411 nmatches!(matchrec34, "foo/**", "foo");
1412
1413 macro_rules! extract {
1414 ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1415 extract!($which, $name, $pat, $expect, Options::default());
1416 };
1417 ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1418 #[test]
1419 fn $name() {
1420 let mut builder = GlobBuilder::new($pat);
1421 if let Some(casei) = $options.casei {
1422 builder.case_insensitive(casei);
1423 }
1424 if let Some(litsep) = $options.litsep {
1425 builder.literal_separator(litsep);
1426 }
1427 if let Some(bsesc) = $options.bsesc {
1428 builder.backslash_escape(bsesc);
1429 }
1430 let pat = builder.build().unwrap();
1431 assert_eq!($expect, pat.$which());
1432 }
1433 };
1434 }
1435
1436 macro_rules! literal {
1437 ($($tt:tt)*) => { extract!(literal, $($tt)*); }
1438 }
1439
1440 macro_rules! basetokens {
1441 ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); }
1442 }
1443
1444 macro_rules! ext {
1445 ($($tt:tt)*) => { extract!(ext, $($tt)*); }
1446 }
1447
1448 macro_rules! required_ext {
1449 ($($tt:tt)*) => { extract!(required_ext, $($tt)*); }
1450 }
1451
1452 macro_rules! prefix {
1453 ($($tt:tt)*) => { extract!(prefix, $($tt)*); }
1454 }
1455
1456 macro_rules! suffix {
1457 ($($tt:tt)*) => { extract!(suffix, $($tt)*); }
1458 }
1459
1460 macro_rules! baseliteral {
1461 ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); }
1462 }
1463
1464 literal!(extract_lit1, "foo", Some(s("foo")));
1465 literal!(extract_lit2, "foo", None, CASEI);
1466 literal!(extract_lit3, "/foo", Some(s("/foo")));
1467 literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1468 literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1469 literal!(extract_lit6, "*.foo", None);
1470 literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1471 literal!(extract_lit8, "**/foo/bar", None);
1472
1473 basetokens!(
1474 extract_basetoks1,
1475 "**/foo",
1476 Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
1477 );
1478 basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1479 basetokens!(
1480 extract_basetoks3,
1481 "**/foo",
1482 Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
1483 SLASHLIT
1484 );
1485 basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1486 basetokens!(extract_basetoks5, "*foo", None);
1487 basetokens!(extract_basetoks6, "**/fo*o", None);
1488 basetokens!(
1489 extract_basetoks7,
1490 "**/fo*o",
1491 Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
1492 SLASHLIT
1493 );
1494
1495 ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
1496 ext!(extract_ext2, "**/*.rs.bak", None);
1497 ext!(extract_ext3, "*.rs", Some(s(".rs")));
1498 ext!(extract_ext4, "a*.rs", None);
1499 ext!(extract_ext5, "/*.c", None);
1500 ext!(extract_ext6, "*.c", None, SLASHLIT);
1501 ext!(extract_ext7, "*.c", Some(s(".c")));
1502
1503 required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1504 required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1505 required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1506 required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1507 required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1508 required_ext!(extract_req_ext6, "./rs", None);
1509 required_ext!(extract_req_ext7, "foo", None);
1510 required_ext!(extract_req_ext8, ".foo/", None);
1511 required_ext!(extract_req_ext9, "foo/", None);
1512
1513 prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1514 prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1515 prefix!(extract_prefix3, "**/foo", None);
1516 prefix!(extract_prefix4, "foo/**", Some(s("foo/")));
1517
1518 suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
1519 suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
1520 suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1521 suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false)));
1522 suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false)));
1523 suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1524 suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false)));
1525
1526 baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1527 baseliteral!(extract_baselit2, "foo", None);
1528 baseliteral!(extract_baselit3, "*foo", None);
1529 baseliteral!(extract_baselit4, "*/foo", None);
1530 }
1531