1 // matchers.rs
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7 
8 //! Structs and types for matching files and directories.
9 
10 use crate::{
11     dirstate::dirs_multiset::DirsChildrenMultiset,
12     filepatterns::{
13         build_single_regex, filter_subincludes, get_patterns_from_file,
14         PatternFileWarning, PatternResult,
15     },
16     utils::{
17         files::find_dirs,
18         hg_path::{HgPath, HgPathBuf},
19         Escaped,
20     },
21     DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
22     PatternSyntax,
23 };
24 
25 use crate::filepatterns::normalize_path_bytes;
26 use std::borrow::ToOwned;
27 use std::collections::HashSet;
28 use std::fmt::{Display, Error, Formatter};
29 use std::iter::FromIterator;
30 use std::ops::Deref;
31 use std::path::{Path, PathBuf};
32 
33 use micro_timer::timed;
34 
35 #[derive(Debug, PartialEq)]
36 pub enum VisitChildrenSet<'a> {
37     /// Don't visit anything
38     Empty,
39     /// Only visit this directory
40     This,
41     /// Visit this directory and these subdirectories
42     /// TODO Should we implement a `NonEmptyHashSet`?
43     Set(HashSet<&'a HgPath>),
44     /// Visit this directory and all subdirectories
45     Recursive,
46 }
47 
48 pub trait Matcher {
49     /// Explicitly listed files
file_set(&self) -> Option<&HashSet<&HgPath>>50     fn file_set(&self) -> Option<&HashSet<&HgPath>>;
51     /// Returns whether `filename` is in `file_set`
exact_match(&self, filename: &HgPath) -> bool52     fn exact_match(&self, filename: &HgPath) -> bool;
53     /// Returns whether `filename` is matched by this matcher
matches(&self, filename: &HgPath) -> bool54     fn matches(&self, filename: &HgPath) -> bool;
55     /// Decides whether a directory should be visited based on whether it
56     /// has potential matches in it or one of its subdirectories, and
57     /// potentially lists which subdirectories of that directory should be
58     /// visited. This is based on the match's primary, included, and excluded
59     /// patterns.
60     ///
61     /// # Example
62     ///
63     /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
64     /// return the following values (assuming the implementation of
65     /// visit_children_set is capable of recognizing this; some implementations
66     /// are not).
67     ///
68     /// ```text
69     /// ```ignore
70     /// '' -> {'foo', 'qux'}
71     /// 'baz' -> set()
72     /// 'foo' -> {'bar'}
73     /// // Ideally this would be `Recursive`, but since the prefix nature of
74     /// // matchers is applied to the entire matcher, we have to downgrade this
75     /// // to `This` due to the (yet to be implemented in Rust) non-prefix
76     /// // `RootFilesIn'-kind matcher being mixed in.
77     /// 'foo/bar' -> 'this'
78     /// 'qux' -> 'this'
79     /// ```
80     /// # Important
81     ///
82     /// Most matchers do not know if they're representing files or
83     /// directories. They see `['path:dir/f']` and don't know whether `f` is a
84     /// file or a directory, so `visit_children_set('dir')` for most matchers
85     /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
86     /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
87     /// it may return `VisitChildrenSet::This`.
88     /// Do not rely on the return being a `HashSet` indicating that there are
89     /// no files in this dir to investigate (or equivalently that if there are
90     /// files to investigate in 'dir' that it will always return
91     /// `VisitChildrenSet::This`).
visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet92     fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
93     /// Matcher will match everything and `files_set()` will be empty:
94     /// optimization might be possible.
matches_everything(&self) -> bool95     fn matches_everything(&self) -> bool;
96     /// Matcher will match exactly the files in `files_set()`: optimization
97     /// might be possible.
is_exact(&self) -> bool98     fn is_exact(&self) -> bool;
99 }
100 
101 /// Matches everything.
102 ///```
103 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
104 ///
105 /// let matcher = AlwaysMatcher;
106 ///
107 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
108 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
109 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
110 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
111 /// ```
112 #[derive(Debug)]
113 pub struct AlwaysMatcher;
114 
115 impl Matcher for AlwaysMatcher {
file_set(&self) -> Option<&HashSet<&HgPath>>116     fn file_set(&self) -> Option<&HashSet<&HgPath>> {
117         None
118     }
exact_match(&self, _filename: &HgPath) -> bool119     fn exact_match(&self, _filename: &HgPath) -> bool {
120         false
121     }
matches(&self, _filename: &HgPath) -> bool122     fn matches(&self, _filename: &HgPath) -> bool {
123         true
124     }
visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet125     fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
126         VisitChildrenSet::Recursive
127     }
matches_everything(&self) -> bool128     fn matches_everything(&self) -> bool {
129         true
130     }
is_exact(&self) -> bool131     fn is_exact(&self) -> bool {
132         false
133     }
134 }
135 
136 /// Matches the input files exactly. They are interpreted as paths, not
137 /// patterns.
138 ///
139 ///```
140 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
141 ///
142 /// let files = [HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
143 /// let matcher = FileMatcher::new(&files).unwrap();
144 ///
145 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
146 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
147 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
148 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
149 /// ```
150 #[derive(Debug)]
151 pub struct FileMatcher<'a> {
152     files: HashSet<&'a HgPath>,
153     dirs: DirsMultiset,
154 }
155 
156 impl<'a> FileMatcher<'a> {
new(files: &'a [HgPathBuf]) -> Result<Self, DirstateMapError>157     pub fn new(files: &'a [HgPathBuf]) -> Result<Self, DirstateMapError> {
158         Ok(Self {
159             files: HashSet::from_iter(files.iter().map(AsRef::as_ref)),
160             dirs: DirsMultiset::from_manifest(files)?,
161         })
162     }
inner_matches(&self, filename: &HgPath) -> bool163     fn inner_matches(&self, filename: &HgPath) -> bool {
164         self.files.contains(filename.as_ref())
165     }
166 }
167 
168 impl<'a> Matcher for FileMatcher<'a> {
file_set(&self) -> Option<&HashSet<&HgPath>>169     fn file_set(&self) -> Option<&HashSet<&HgPath>> {
170         Some(&self.files)
171     }
exact_match(&self, filename: &HgPath) -> bool172     fn exact_match(&self, filename: &HgPath) -> bool {
173         self.inner_matches(filename)
174     }
matches(&self, filename: &HgPath) -> bool175     fn matches(&self, filename: &HgPath) -> bool {
176         self.inner_matches(filename)
177     }
visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet178     fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
179         if self.files.is_empty() || !self.dirs.contains(&directory) {
180             return VisitChildrenSet::Empty;
181         }
182         let dirs_as_set = self.dirs.iter().map(Deref::deref).collect();
183 
184         let mut candidates: HashSet<&HgPath> =
185             self.files.union(&dirs_as_set).cloned().collect();
186         candidates.remove(HgPath::new(b""));
187 
188         if !directory.as_ref().is_empty() {
189             let directory = [directory.as_ref().as_bytes(), b"/"].concat();
190             candidates = candidates
191                 .iter()
192                 .filter_map(|c| {
193                     if c.as_bytes().starts_with(&directory) {
194                         Some(HgPath::new(&c.as_bytes()[directory.len()..]))
195                     } else {
196                         None
197                     }
198                 })
199                 .collect();
200         }
201 
202         // `self.dirs` includes all of the directories, recursively, so if
203         // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
204         // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
205         // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
206         // subdir will be in there without a slash.
207         VisitChildrenSet::Set(
208             candidates
209                 .iter()
210                 .filter_map(|c| {
211                     if c.bytes().all(|b| *b != b'/') {
212                         Some(*c)
213                     } else {
214                         None
215                     }
216                 })
217                 .collect(),
218         )
219     }
matches_everything(&self) -> bool220     fn matches_everything(&self) -> bool {
221         false
222     }
is_exact(&self) -> bool223     fn is_exact(&self) -> bool {
224         true
225     }
226 }
227 
228 /// Matches files that are included in the ignore rules.
229 /// ```
230 /// use hg::{
231 ///     matchers::{IncludeMatcher, Matcher},
232 ///     IgnorePattern,
233 ///     PatternSyntax,
234 ///     utils::hg_path::HgPath
235 /// };
236 /// use std::path::Path;
237 /// ///
238 /// let ignore_patterns =
239 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
240 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
241 /// ///
242 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
243 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
244 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
245 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
246 /// ```
247 pub struct IncludeMatcher<'a> {
248     patterns: Vec<u8>,
249     match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
250     /// Whether all the patterns match a prefix (i.e. recursively)
251     prefix: bool,
252     roots: HashSet<HgPathBuf>,
253     dirs: HashSet<HgPathBuf>,
254     parents: HashSet<HgPathBuf>,
255 }
256 
257 impl<'a> Matcher for IncludeMatcher<'a> {
file_set(&self) -> Option<&HashSet<&HgPath>>258     fn file_set(&self) -> Option<&HashSet<&HgPath>> {
259         None
260     }
261 
exact_match(&self, _filename: &HgPath) -> bool262     fn exact_match(&self, _filename: &HgPath) -> bool {
263         false
264     }
265 
matches(&self, filename: &HgPath) -> bool266     fn matches(&self, filename: &HgPath) -> bool {
267         (self.match_fn)(filename.as_ref())
268     }
269 
visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet270     fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
271         let dir = directory.as_ref();
272         if self.prefix && self.roots.contains(dir) {
273             return VisitChildrenSet::Recursive;
274         }
275         if self.roots.contains(HgPath::new(b""))
276             || self.roots.contains(dir)
277             || self.dirs.contains(dir)
278             || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
279         {
280             return VisitChildrenSet::This;
281         }
282 
283         if self.parents.contains(directory.as_ref()) {
284             let multiset = self.get_all_parents_children();
285             if let Some(children) = multiset.get(dir) {
286                 return VisitChildrenSet::Set(children.to_owned());
287             }
288         }
289         VisitChildrenSet::Empty
290     }
291 
matches_everything(&self) -> bool292     fn matches_everything(&self) -> bool {
293         false
294     }
295 
is_exact(&self) -> bool296     fn is_exact(&self) -> bool {
297         false
298     }
299 }
300 
301 /// Returns a function that matches an `HgPath` against the given regex
302 /// pattern.
303 ///
304 /// This can fail when the pattern is invalid or not supported by the
305 /// underlying engine (the `regex` crate), for instance anything with
306 /// back-references.
307 #[timed]
re_matcher( pattern: &[u8], ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync>308 fn re_matcher(
309     pattern: &[u8],
310 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
311     use std::io::Write;
312 
313     // The `regex` crate adds `.*` to the start and end of expressions if there
314     // are no anchors, so add the start anchor.
315     let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
316     for byte in pattern {
317         if *byte > 127 {
318             write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
319         } else {
320             escaped_bytes.push(*byte);
321         }
322     }
323     escaped_bytes.push(b')');
324 
325     // Avoid the cost of UTF8 checking
326     //
327     // # Safety
328     // This is safe because we escaped all non-ASCII bytes.
329     let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
330     let re = regex::bytes::RegexBuilder::new(&pattern_string)
331         .unicode(false)
332         // Big repos with big `.hgignore` will hit the default limit and
333         // incur a significant performance hit. One repo's `hg status` hit
334         // multiple *minutes*.
335         .dfa_size_limit(50 * (1 << 20))
336         .build()
337         .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
338 
339     Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
340 }
341 
342 /// Returns the regex pattern and a function that matches an `HgPath` against
343 /// said regex formed by the given ignore patterns.
build_regex_match( ignore_patterns: &[IgnorePattern], ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)>344 fn build_regex_match(
345     ignore_patterns: &[IgnorePattern],
346 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
347     let mut regexps = vec![];
348     let mut exact_set = HashSet::new();
349 
350     for pattern in ignore_patterns {
351         if let Some(re) = build_single_regex(pattern)? {
352             regexps.push(re);
353         } else {
354             let exact = normalize_path_bytes(&pattern.pattern);
355             exact_set.insert(HgPathBuf::from_bytes(&exact));
356         }
357     }
358 
359     let full_regex = regexps.join(&b'|');
360 
361     // An empty pattern would cause the regex engine to incorrectly match the
362     // (empty) root directory
363     let func = if !(regexps.is_empty()) {
364         let matcher = re_matcher(&full_regex)?;
365         let func = move |filename: &HgPath| {
366             exact_set.contains(filename) || matcher(filename)
367         };
368         Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
369     } else {
370         let func = move |filename: &HgPath| exact_set.contains(filename);
371         Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
372     };
373 
374     Ok((full_regex, func))
375 }
376 
377 /// Returns roots and directories corresponding to each pattern.
378 ///
379 /// This calculates the roots and directories exactly matching the patterns and
380 /// returns a tuple of (roots, dirs). It does not return other directories
381 /// which may also need to be considered, like the parent directories.
roots_and_dirs( ignore_patterns: &[IgnorePattern], ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>)382 fn roots_and_dirs(
383     ignore_patterns: &[IgnorePattern],
384 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
385     let mut roots = Vec::new();
386     let mut dirs = Vec::new();
387 
388     for ignore_pattern in ignore_patterns {
389         let IgnorePattern {
390             syntax, pattern, ..
391         } = ignore_pattern;
392         match syntax {
393             PatternSyntax::RootGlob | PatternSyntax::Glob => {
394                 let mut root = HgPathBuf::new();
395                 for p in pattern.split(|c| *c == b'/') {
396                     if p.iter().any(|c| match *c {
397                         b'[' | b'{' | b'*' | b'?' => true,
398                         _ => false,
399                     }) {
400                         break;
401                     }
402                     root.push(HgPathBuf::from_bytes(p).as_ref());
403                 }
404                 roots.push(root);
405             }
406             PatternSyntax::Path | PatternSyntax::RelPath => {
407                 let pat = HgPath::new(if pattern == b"." {
408                     &[] as &[u8]
409                 } else {
410                     pattern
411                 });
412                 roots.push(pat.to_owned());
413             }
414             PatternSyntax::RootFiles => {
415                 let pat = if pattern == b"." {
416                     &[] as &[u8]
417                 } else {
418                     pattern
419                 };
420                 dirs.push(HgPathBuf::from_bytes(pat));
421             }
422             _ => {
423                 roots.push(HgPathBuf::new());
424             }
425         }
426     }
427     (roots, dirs)
428 }
429 
430 /// Paths extracted from patterns
431 #[derive(Debug, PartialEq)]
432 struct RootsDirsAndParents {
433     /// Directories to match recursively
434     pub roots: HashSet<HgPathBuf>,
435     /// Directories to match non-recursively
436     pub dirs: HashSet<HgPathBuf>,
437     /// Implicitly required directories to go to items in either roots or dirs
438     pub parents: HashSet<HgPathBuf>,
439 }
440 
441 /// Extract roots, dirs and parents from patterns.
roots_dirs_and_parents( ignore_patterns: &[IgnorePattern], ) -> PatternResult<RootsDirsAndParents>442 fn roots_dirs_and_parents(
443     ignore_patterns: &[IgnorePattern],
444 ) -> PatternResult<RootsDirsAndParents> {
445     let (roots, dirs) = roots_and_dirs(ignore_patterns);
446 
447     let mut parents = HashSet::new();
448 
449     parents.extend(
450         DirsMultiset::from_manifest(&dirs)
451             .map_err(|e| match e {
452                 DirstateMapError::InvalidPath(e) => e,
453                 _ => unreachable!(),
454             })?
455             .iter()
456             .map(ToOwned::to_owned),
457     );
458     parents.extend(
459         DirsMultiset::from_manifest(&roots)
460             .map_err(|e| match e {
461                 DirstateMapError::InvalidPath(e) => e,
462                 _ => unreachable!(),
463             })?
464             .iter()
465             .map(ToOwned::to_owned),
466     );
467 
468     Ok(RootsDirsAndParents {
469         roots: HashSet::from_iter(roots),
470         dirs: HashSet::from_iter(dirs),
471         parents,
472     })
473 }
474 
475 /// Returns a function that checks whether a given file (in the general sense)
476 /// should be matched.
build_match<'a, 'b>( ignore_patterns: Vec<IgnorePattern>, ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + 'b + Sync>)>477 fn build_match<'a, 'b>(
478     ignore_patterns: Vec<IgnorePattern>,
479 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + 'b + Sync>)> {
480     let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
481     // For debugging and printing
482     let mut patterns = vec![];
483 
484     let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
485 
486     if !subincludes.is_empty() {
487         // Build prefix-based matcher functions for subincludes
488         let mut submatchers = FastHashMap::default();
489         let mut prefixes = vec![];
490 
491         for sub_include in subincludes {
492             let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
493             let match_fn =
494                 Box::new(move |path: &HgPath| matcher.matches(path));
495             prefixes.push(sub_include.prefix.clone());
496             submatchers.insert(sub_include.prefix.clone(), match_fn);
497         }
498 
499         let match_subinclude = move |filename: &HgPath| {
500             for prefix in prefixes.iter() {
501                 if let Some(rel) = filename.relative_to(prefix) {
502                     if (submatchers[prefix])(rel) {
503                         return true;
504                     }
505                 }
506             }
507             false
508         };
509 
510         match_funcs.push(Box::new(match_subinclude));
511     }
512 
513     if !ignore_patterns.is_empty() {
514         // Either do dumb matching if all patterns are rootfiles, or match
515         // with a regex.
516         if ignore_patterns
517             .iter()
518             .all(|k| k.syntax == PatternSyntax::RootFiles)
519         {
520             let dirs: HashSet<_> = ignore_patterns
521                 .iter()
522                 .map(|k| k.pattern.to_owned())
523                 .collect();
524             let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
525 
526             let match_func = move |path: &HgPath| -> bool {
527                 let path = path.as_bytes();
528                 let i = path.iter().rfind(|a| **a == b'/');
529                 let dir = if let Some(i) = i {
530                     &path[..*i as usize]
531                 } else {
532                     b"."
533                 };
534                 dirs.contains(dir.deref())
535             };
536             match_funcs.push(Box::new(match_func));
537 
538             patterns.extend(b"rootfilesin: ");
539             dirs_vec.sort();
540             patterns.extend(dirs_vec.escaped_bytes());
541         } else {
542             let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
543             patterns = new_re;
544             match_funcs.push(match_func)
545         }
546     }
547 
548     Ok(if match_funcs.len() == 1 {
549         (patterns, match_funcs.remove(0))
550     } else {
551         (
552             patterns,
553             Box::new(move |f: &HgPath| -> bool {
554                 match_funcs.iter().any(|match_func| match_func(f))
555             }),
556         )
557     })
558 }
559 
560 /// Parses all "ignore" files with their recursive includes and returns a
561 /// function that checks whether a given file (in the general sense) should be
562 /// ignored.
get_ignore_function<'a>( mut all_pattern_files: Vec<PathBuf>, root_dir: &Path, inspect_pattern_bytes: &mut impl FnMut(&[u8]), ) -> PatternResult<( Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>, Vec<PatternFileWarning>, )>563 pub fn get_ignore_function<'a>(
564     mut all_pattern_files: Vec<PathBuf>,
565     root_dir: &Path,
566     inspect_pattern_bytes: &mut impl FnMut(&[u8]),
567 ) -> PatternResult<(
568     Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
569     Vec<PatternFileWarning>,
570 )> {
571     let mut all_patterns = vec![];
572     let mut all_warnings = vec![];
573 
574     // Sort to make the ordering of calls to `inspect_pattern_bytes`
575     // deterministic even if the ordering of `all_pattern_files` is not (such
576     // as when a iteration order of a Python dict or Rust HashMap is involved).
577     // Sort by "string" representation instead of the default by component
578     // (with a Rust-specific definition of a component)
579     all_pattern_files
580         .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
581 
582     for pattern_file in &all_pattern_files {
583         let (patterns, warnings) = get_patterns_from_file(
584             pattern_file,
585             root_dir,
586             inspect_pattern_bytes,
587         )?;
588 
589         all_patterns.extend(patterns.to_owned());
590         all_warnings.extend(warnings);
591     }
592     let matcher = IncludeMatcher::new(all_patterns)?;
593     Ok((
594         Box::new(move |path: &HgPath| matcher.matches(path)),
595         all_warnings,
596     ))
597 }
598 
599 impl<'a> IncludeMatcher<'a> {
new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self>600     pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
601         let RootsDirsAndParents {
602             roots,
603             dirs,
604             parents,
605         } = roots_dirs_and_parents(&ignore_patterns)?;
606         let prefix = ignore_patterns.iter().any(|k| match k.syntax {
607             PatternSyntax::Path | PatternSyntax::RelPath => true,
608             _ => false,
609         });
610         let (patterns, match_fn) = build_match(ignore_patterns)?;
611 
612         Ok(Self {
613             patterns,
614             match_fn,
615             prefix,
616             roots,
617             dirs,
618             parents,
619         })
620     }
621 
get_all_parents_children(&self) -> DirsChildrenMultiset622     fn get_all_parents_children(&self) -> DirsChildrenMultiset {
623         // TODO cache
624         let thing = self
625             .dirs
626             .iter()
627             .chain(self.roots.iter())
628             .chain(self.parents.iter());
629         DirsChildrenMultiset::new(thing, Some(&self.parents))
630     }
631 }
632 
633 impl<'a> Display for IncludeMatcher<'a> {
fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>634     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
635         // XXX What about exact matches?
636         // I'm not sure it's worth it to clone the HashSet and keep it
637         // around just in case someone wants to display the matcher, plus
638         // it's going to be unreadable after a few entries, but we need to
639         // inform in this display that exact matches are being used and are
640         // (on purpose) missing from the `includes`.
641         write!(
642             f,
643             "IncludeMatcher(includes='{}')",
644             String::from_utf8_lossy(&self.patterns.escaped_bytes())
645         )
646     }
647 }
648 
649 #[cfg(test)]
650 mod tests {
651     use super::*;
652     use pretty_assertions::assert_eq;
653     use std::path::Path;
654 
655     #[test]
test_roots_and_dirs()656     fn test_roots_and_dirs() {
657         let pats = vec![
658             IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
659             IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
660             IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
661         ];
662         let (roots, dirs) = roots_and_dirs(&pats);
663 
664         assert_eq!(
665             roots,
666             vec!(
667                 HgPathBuf::from_bytes(b"g/h"),
668                 HgPathBuf::from_bytes(b"g/h"),
669                 HgPathBuf::new()
670             ),
671         );
672         assert_eq!(dirs, vec!());
673     }
674 
675     #[test]
test_roots_dirs_and_parents()676     fn test_roots_dirs_and_parents() {
677         let pats = vec![
678             IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
679             IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
680             IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
681         ];
682 
683         let mut roots = HashSet::new();
684         roots.insert(HgPathBuf::from_bytes(b"g/h"));
685         roots.insert(HgPathBuf::new());
686 
687         let dirs = HashSet::new();
688 
689         let mut parents = HashSet::new();
690         parents.insert(HgPathBuf::new());
691         parents.insert(HgPathBuf::from_bytes(b"g"));
692 
693         assert_eq!(
694             roots_dirs_and_parents(&pats).unwrap(),
695             RootsDirsAndParents {
696                 roots,
697                 dirs,
698                 parents
699             }
700         );
701     }
702 
703     #[test]
test_filematcher_visit_children_set()704     fn test_filematcher_visit_children_set() {
705         // Visitchildrenset
706         let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
707         let matcher = FileMatcher::new(&files).unwrap();
708 
709         let mut set = HashSet::new();
710         set.insert(HgPath::new(b"dir"));
711         assert_eq!(
712             matcher.visit_children_set(HgPath::new(b"")),
713             VisitChildrenSet::Set(set)
714         );
715 
716         let mut set = HashSet::new();
717         set.insert(HgPath::new(b"subdir"));
718         assert_eq!(
719             matcher.visit_children_set(HgPath::new(b"dir")),
720             VisitChildrenSet::Set(set)
721         );
722 
723         let mut set = HashSet::new();
724         set.insert(HgPath::new(b"foo.txt"));
725         assert_eq!(
726             matcher.visit_children_set(HgPath::new(b"dir/subdir")),
727             VisitChildrenSet::Set(set)
728         );
729 
730         assert_eq!(
731             matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
732             VisitChildrenSet::Empty
733         );
734         assert_eq!(
735             matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
736             VisitChildrenSet::Empty
737         );
738         assert_eq!(
739             matcher.visit_children_set(HgPath::new(b"folder")),
740             VisitChildrenSet::Empty
741         );
742     }
743 
744     #[test]
test_filematcher_visit_children_set_files_and_dirs()745     fn test_filematcher_visit_children_set_files_and_dirs() {
746         let files = vec![
747             HgPathBuf::from_bytes(b"rootfile.txt"),
748             HgPathBuf::from_bytes(b"a/file1.txt"),
749             HgPathBuf::from_bytes(b"a/b/file2.txt"),
750             // No file in a/b/c
751             HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
752         ];
753         let matcher = FileMatcher::new(&files).unwrap();
754 
755         let mut set = HashSet::new();
756         set.insert(HgPath::new(b"a"));
757         set.insert(HgPath::new(b"rootfile.txt"));
758         assert_eq!(
759             matcher.visit_children_set(HgPath::new(b"")),
760             VisitChildrenSet::Set(set)
761         );
762 
763         let mut set = HashSet::new();
764         set.insert(HgPath::new(b"b"));
765         set.insert(HgPath::new(b"file1.txt"));
766         assert_eq!(
767             matcher.visit_children_set(HgPath::new(b"a")),
768             VisitChildrenSet::Set(set)
769         );
770 
771         let mut set = HashSet::new();
772         set.insert(HgPath::new(b"c"));
773         set.insert(HgPath::new(b"file2.txt"));
774         assert_eq!(
775             matcher.visit_children_set(HgPath::new(b"a/b")),
776             VisitChildrenSet::Set(set)
777         );
778 
779         let mut set = HashSet::new();
780         set.insert(HgPath::new(b"d"));
781         assert_eq!(
782             matcher.visit_children_set(HgPath::new(b"a/b/c")),
783             VisitChildrenSet::Set(set)
784         );
785         let mut set = HashSet::new();
786         set.insert(HgPath::new(b"file4.txt"));
787         assert_eq!(
788             matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
789             VisitChildrenSet::Set(set)
790         );
791 
792         assert_eq!(
793             matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
794             VisitChildrenSet::Empty
795         );
796         assert_eq!(
797             matcher.visit_children_set(HgPath::new(b"folder")),
798             VisitChildrenSet::Empty
799         );
800     }
801 
802     #[test]
test_includematcher()803     fn test_includematcher() {
804         // VisitchildrensetPrefix
805         let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
806             PatternSyntax::RelPath,
807             b"dir/subdir",
808             Path::new(""),
809         )])
810         .unwrap();
811 
812         let mut set = HashSet::new();
813         set.insert(HgPath::new(b"dir"));
814         assert_eq!(
815             matcher.visit_children_set(HgPath::new(b"")),
816             VisitChildrenSet::Set(set)
817         );
818 
819         let mut set = HashSet::new();
820         set.insert(HgPath::new(b"subdir"));
821         assert_eq!(
822             matcher.visit_children_set(HgPath::new(b"dir")),
823             VisitChildrenSet::Set(set)
824         );
825         assert_eq!(
826             matcher.visit_children_set(HgPath::new(b"dir/subdir")),
827             VisitChildrenSet::Recursive
828         );
829         // OPT: This should probably be 'all' if its parent is?
830         assert_eq!(
831             matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
832             VisitChildrenSet::This
833         );
834         assert_eq!(
835             matcher.visit_children_set(HgPath::new(b"folder")),
836             VisitChildrenSet::Empty
837         );
838 
839         // VisitchildrensetRootfilesin
840         let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
841             PatternSyntax::RootFiles,
842             b"dir/subdir",
843             Path::new(""),
844         )])
845         .unwrap();
846 
847         let mut set = HashSet::new();
848         set.insert(HgPath::new(b"dir"));
849         assert_eq!(
850             matcher.visit_children_set(HgPath::new(b"")),
851             VisitChildrenSet::Set(set)
852         );
853 
854         let mut set = HashSet::new();
855         set.insert(HgPath::new(b"subdir"));
856         assert_eq!(
857             matcher.visit_children_set(HgPath::new(b"dir")),
858             VisitChildrenSet::Set(set)
859         );
860 
861         assert_eq!(
862             matcher.visit_children_set(HgPath::new(b"dir/subdir")),
863             VisitChildrenSet::This
864         );
865         assert_eq!(
866             matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
867             VisitChildrenSet::Empty
868         );
869         assert_eq!(
870             matcher.visit_children_set(HgPath::new(b"folder")),
871             VisitChildrenSet::Empty
872         );
873 
874         // VisitchildrensetGlob
875         let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
876             PatternSyntax::Glob,
877             b"dir/z*",
878             Path::new(""),
879         )])
880         .unwrap();
881 
882         let mut set = HashSet::new();
883         set.insert(HgPath::new(b"dir"));
884         assert_eq!(
885             matcher.visit_children_set(HgPath::new(b"")),
886             VisitChildrenSet::Set(set)
887         );
888         assert_eq!(
889             matcher.visit_children_set(HgPath::new(b"folder")),
890             VisitChildrenSet::Empty
891         );
892         assert_eq!(
893             matcher.visit_children_set(HgPath::new(b"dir")),
894             VisitChildrenSet::This
895         );
896         // OPT: these should probably be set().
897         assert_eq!(
898             matcher.visit_children_set(HgPath::new(b"dir/subdir")),
899             VisitChildrenSet::This
900         );
901         assert_eq!(
902             matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
903             VisitChildrenSet::This
904         );
905     }
906 }
907