1 // matchers.rs
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 //! Structs and types for matching files and directories.
9
10 use crate::{
11 dirstate::dirs_multiset::DirsChildrenMultiset,
12 filepatterns::{
13 build_single_regex, filter_subincludes, get_patterns_from_file,
14 PatternFileWarning, PatternResult,
15 },
16 utils::{
17 files::find_dirs,
18 hg_path::{HgPath, HgPathBuf},
19 Escaped,
20 },
21 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
22 PatternSyntax,
23 };
24
25 use crate::filepatterns::normalize_path_bytes;
26 use std::borrow::ToOwned;
27 use std::collections::HashSet;
28 use std::fmt::{Display, Error, Formatter};
29 use std::iter::FromIterator;
30 use std::ops::Deref;
31 use std::path::{Path, PathBuf};
32
33 use micro_timer::timed;
34
35 #[derive(Debug, PartialEq)]
36 pub enum VisitChildrenSet<'a> {
37 /// Don't visit anything
38 Empty,
39 /// Only visit this directory
40 This,
41 /// Visit this directory and these subdirectories
42 /// TODO Should we implement a `NonEmptyHashSet`?
43 Set(HashSet<&'a HgPath>),
44 /// Visit this directory and all subdirectories
45 Recursive,
46 }
47
48 pub trait Matcher {
49 /// Explicitly listed files
file_set(&self) -> Option<&HashSet<&HgPath>>50 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
51 /// Returns whether `filename` is in `file_set`
exact_match(&self, filename: &HgPath) -> bool52 fn exact_match(&self, filename: &HgPath) -> bool;
53 /// Returns whether `filename` is matched by this matcher
matches(&self, filename: &HgPath) -> bool54 fn matches(&self, filename: &HgPath) -> bool;
55 /// Decides whether a directory should be visited based on whether it
56 /// has potential matches in it or one of its subdirectories, and
57 /// potentially lists which subdirectories of that directory should be
58 /// visited. This is based on the match's primary, included, and excluded
59 /// patterns.
60 ///
61 /// # Example
62 ///
63 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
64 /// return the following values (assuming the implementation of
65 /// visit_children_set is capable of recognizing this; some implementations
66 /// are not).
67 ///
68 /// ```text
69 /// ```ignore
70 /// '' -> {'foo', 'qux'}
71 /// 'baz' -> set()
72 /// 'foo' -> {'bar'}
73 /// // Ideally this would be `Recursive`, but since the prefix nature of
74 /// // matchers is applied to the entire matcher, we have to downgrade this
75 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
76 /// // `RootFilesIn'-kind matcher being mixed in.
77 /// 'foo/bar' -> 'this'
78 /// 'qux' -> 'this'
79 /// ```
80 /// # Important
81 ///
82 /// Most matchers do not know if they're representing files or
83 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
84 /// file or a directory, so `visit_children_set('dir')` for most matchers
85 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
86 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
87 /// it may return `VisitChildrenSet::This`.
88 /// Do not rely on the return being a `HashSet` indicating that there are
89 /// no files in this dir to investigate (or equivalently that if there are
90 /// files to investigate in 'dir' that it will always return
91 /// `VisitChildrenSet::This`).
visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet92 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
93 /// Matcher will match everything and `files_set()` will be empty:
94 /// optimization might be possible.
matches_everything(&self) -> bool95 fn matches_everything(&self) -> bool;
96 /// Matcher will match exactly the files in `files_set()`: optimization
97 /// might be possible.
is_exact(&self) -> bool98 fn is_exact(&self) -> bool;
99 }
100
101 /// Matches everything.
102 ///```
103 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
104 ///
105 /// let matcher = AlwaysMatcher;
106 ///
107 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
108 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
109 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
110 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
111 /// ```
112 #[derive(Debug)]
113 pub struct AlwaysMatcher;
114
115 impl Matcher for AlwaysMatcher {
file_set(&self) -> Option<&HashSet<&HgPath>>116 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
117 None
118 }
exact_match(&self, _filename: &HgPath) -> bool119 fn exact_match(&self, _filename: &HgPath) -> bool {
120 false
121 }
matches(&self, _filename: &HgPath) -> bool122 fn matches(&self, _filename: &HgPath) -> bool {
123 true
124 }
visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet125 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
126 VisitChildrenSet::Recursive
127 }
matches_everything(&self) -> bool128 fn matches_everything(&self) -> bool {
129 true
130 }
is_exact(&self) -> bool131 fn is_exact(&self) -> bool {
132 false
133 }
134 }
135
136 /// Matches the input files exactly. They are interpreted as paths, not
137 /// patterns.
138 ///
139 ///```
140 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
141 ///
142 /// let files = [HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
143 /// let matcher = FileMatcher::new(&files).unwrap();
144 ///
145 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
146 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
147 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
148 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
149 /// ```
150 #[derive(Debug)]
151 pub struct FileMatcher<'a> {
152 files: HashSet<&'a HgPath>,
153 dirs: DirsMultiset,
154 }
155
156 impl<'a> FileMatcher<'a> {
new(files: &'a [HgPathBuf]) -> Result<Self, DirstateMapError>157 pub fn new(files: &'a [HgPathBuf]) -> Result<Self, DirstateMapError> {
158 Ok(Self {
159 files: HashSet::from_iter(files.iter().map(AsRef::as_ref)),
160 dirs: DirsMultiset::from_manifest(files)?,
161 })
162 }
inner_matches(&self, filename: &HgPath) -> bool163 fn inner_matches(&self, filename: &HgPath) -> bool {
164 self.files.contains(filename.as_ref())
165 }
166 }
167
168 impl<'a> Matcher for FileMatcher<'a> {
file_set(&self) -> Option<&HashSet<&HgPath>>169 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
170 Some(&self.files)
171 }
exact_match(&self, filename: &HgPath) -> bool172 fn exact_match(&self, filename: &HgPath) -> bool {
173 self.inner_matches(filename)
174 }
matches(&self, filename: &HgPath) -> bool175 fn matches(&self, filename: &HgPath) -> bool {
176 self.inner_matches(filename)
177 }
visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet178 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
179 if self.files.is_empty() || !self.dirs.contains(&directory) {
180 return VisitChildrenSet::Empty;
181 }
182 let dirs_as_set = self.dirs.iter().map(Deref::deref).collect();
183
184 let mut candidates: HashSet<&HgPath> =
185 self.files.union(&dirs_as_set).cloned().collect();
186 candidates.remove(HgPath::new(b""));
187
188 if !directory.as_ref().is_empty() {
189 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
190 candidates = candidates
191 .iter()
192 .filter_map(|c| {
193 if c.as_bytes().starts_with(&directory) {
194 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
195 } else {
196 None
197 }
198 })
199 .collect();
200 }
201
202 // `self.dirs` includes all of the directories, recursively, so if
203 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
204 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
205 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
206 // subdir will be in there without a slash.
207 VisitChildrenSet::Set(
208 candidates
209 .iter()
210 .filter_map(|c| {
211 if c.bytes().all(|b| *b != b'/') {
212 Some(*c)
213 } else {
214 None
215 }
216 })
217 .collect(),
218 )
219 }
matches_everything(&self) -> bool220 fn matches_everything(&self) -> bool {
221 false
222 }
is_exact(&self) -> bool223 fn is_exact(&self) -> bool {
224 true
225 }
226 }
227
228 /// Matches files that are included in the ignore rules.
229 /// ```
230 /// use hg::{
231 /// matchers::{IncludeMatcher, Matcher},
232 /// IgnorePattern,
233 /// PatternSyntax,
234 /// utils::hg_path::HgPath
235 /// };
236 /// use std::path::Path;
237 /// ///
238 /// let ignore_patterns =
239 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
240 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
241 /// ///
242 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
243 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
244 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
245 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
246 /// ```
247 pub struct IncludeMatcher<'a> {
248 patterns: Vec<u8>,
249 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
250 /// Whether all the patterns match a prefix (i.e. recursively)
251 prefix: bool,
252 roots: HashSet<HgPathBuf>,
253 dirs: HashSet<HgPathBuf>,
254 parents: HashSet<HgPathBuf>,
255 }
256
257 impl<'a> Matcher for IncludeMatcher<'a> {
file_set(&self) -> Option<&HashSet<&HgPath>>258 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
259 None
260 }
261
exact_match(&self, _filename: &HgPath) -> bool262 fn exact_match(&self, _filename: &HgPath) -> bool {
263 false
264 }
265
matches(&self, filename: &HgPath) -> bool266 fn matches(&self, filename: &HgPath) -> bool {
267 (self.match_fn)(filename.as_ref())
268 }
269
visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet270 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
271 let dir = directory.as_ref();
272 if self.prefix && self.roots.contains(dir) {
273 return VisitChildrenSet::Recursive;
274 }
275 if self.roots.contains(HgPath::new(b""))
276 || self.roots.contains(dir)
277 || self.dirs.contains(dir)
278 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
279 {
280 return VisitChildrenSet::This;
281 }
282
283 if self.parents.contains(directory.as_ref()) {
284 let multiset = self.get_all_parents_children();
285 if let Some(children) = multiset.get(dir) {
286 return VisitChildrenSet::Set(children.to_owned());
287 }
288 }
289 VisitChildrenSet::Empty
290 }
291
matches_everything(&self) -> bool292 fn matches_everything(&self) -> bool {
293 false
294 }
295
is_exact(&self) -> bool296 fn is_exact(&self) -> bool {
297 false
298 }
299 }
300
301 /// Returns a function that matches an `HgPath` against the given regex
302 /// pattern.
303 ///
304 /// This can fail when the pattern is invalid or not supported by the
305 /// underlying engine (the `regex` crate), for instance anything with
306 /// back-references.
307 #[timed]
re_matcher( pattern: &[u8], ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync>308 fn re_matcher(
309 pattern: &[u8],
310 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
311 use std::io::Write;
312
313 // The `regex` crate adds `.*` to the start and end of expressions if there
314 // are no anchors, so add the start anchor.
315 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
316 for byte in pattern {
317 if *byte > 127 {
318 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
319 } else {
320 escaped_bytes.push(*byte);
321 }
322 }
323 escaped_bytes.push(b')');
324
325 // Avoid the cost of UTF8 checking
326 //
327 // # Safety
328 // This is safe because we escaped all non-ASCII bytes.
329 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
330 let re = regex::bytes::RegexBuilder::new(&pattern_string)
331 .unicode(false)
332 // Big repos with big `.hgignore` will hit the default limit and
333 // incur a significant performance hit. One repo's `hg status` hit
334 // multiple *minutes*.
335 .dfa_size_limit(50 * (1 << 20))
336 .build()
337 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
338
339 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
340 }
341
342 /// Returns the regex pattern and a function that matches an `HgPath` against
343 /// said regex formed by the given ignore patterns.
build_regex_match( ignore_patterns: &[IgnorePattern], ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)>344 fn build_regex_match(
345 ignore_patterns: &[IgnorePattern],
346 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
347 let mut regexps = vec![];
348 let mut exact_set = HashSet::new();
349
350 for pattern in ignore_patterns {
351 if let Some(re) = build_single_regex(pattern)? {
352 regexps.push(re);
353 } else {
354 let exact = normalize_path_bytes(&pattern.pattern);
355 exact_set.insert(HgPathBuf::from_bytes(&exact));
356 }
357 }
358
359 let full_regex = regexps.join(&b'|');
360
361 // An empty pattern would cause the regex engine to incorrectly match the
362 // (empty) root directory
363 let func = if !(regexps.is_empty()) {
364 let matcher = re_matcher(&full_regex)?;
365 let func = move |filename: &HgPath| {
366 exact_set.contains(filename) || matcher(filename)
367 };
368 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
369 } else {
370 let func = move |filename: &HgPath| exact_set.contains(filename);
371 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
372 };
373
374 Ok((full_regex, func))
375 }
376
377 /// Returns roots and directories corresponding to each pattern.
378 ///
379 /// This calculates the roots and directories exactly matching the patterns and
380 /// returns a tuple of (roots, dirs). It does not return other directories
381 /// which may also need to be considered, like the parent directories.
roots_and_dirs( ignore_patterns: &[IgnorePattern], ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>)382 fn roots_and_dirs(
383 ignore_patterns: &[IgnorePattern],
384 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
385 let mut roots = Vec::new();
386 let mut dirs = Vec::new();
387
388 for ignore_pattern in ignore_patterns {
389 let IgnorePattern {
390 syntax, pattern, ..
391 } = ignore_pattern;
392 match syntax {
393 PatternSyntax::RootGlob | PatternSyntax::Glob => {
394 let mut root = HgPathBuf::new();
395 for p in pattern.split(|c| *c == b'/') {
396 if p.iter().any(|c| match *c {
397 b'[' | b'{' | b'*' | b'?' => true,
398 _ => false,
399 }) {
400 break;
401 }
402 root.push(HgPathBuf::from_bytes(p).as_ref());
403 }
404 roots.push(root);
405 }
406 PatternSyntax::Path | PatternSyntax::RelPath => {
407 let pat = HgPath::new(if pattern == b"." {
408 &[] as &[u8]
409 } else {
410 pattern
411 });
412 roots.push(pat.to_owned());
413 }
414 PatternSyntax::RootFiles => {
415 let pat = if pattern == b"." {
416 &[] as &[u8]
417 } else {
418 pattern
419 };
420 dirs.push(HgPathBuf::from_bytes(pat));
421 }
422 _ => {
423 roots.push(HgPathBuf::new());
424 }
425 }
426 }
427 (roots, dirs)
428 }
429
430 /// Paths extracted from patterns
431 #[derive(Debug, PartialEq)]
432 struct RootsDirsAndParents {
433 /// Directories to match recursively
434 pub roots: HashSet<HgPathBuf>,
435 /// Directories to match non-recursively
436 pub dirs: HashSet<HgPathBuf>,
437 /// Implicitly required directories to go to items in either roots or dirs
438 pub parents: HashSet<HgPathBuf>,
439 }
440
441 /// Extract roots, dirs and parents from patterns.
roots_dirs_and_parents( ignore_patterns: &[IgnorePattern], ) -> PatternResult<RootsDirsAndParents>442 fn roots_dirs_and_parents(
443 ignore_patterns: &[IgnorePattern],
444 ) -> PatternResult<RootsDirsAndParents> {
445 let (roots, dirs) = roots_and_dirs(ignore_patterns);
446
447 let mut parents = HashSet::new();
448
449 parents.extend(
450 DirsMultiset::from_manifest(&dirs)
451 .map_err(|e| match e {
452 DirstateMapError::InvalidPath(e) => e,
453 _ => unreachable!(),
454 })?
455 .iter()
456 .map(ToOwned::to_owned),
457 );
458 parents.extend(
459 DirsMultiset::from_manifest(&roots)
460 .map_err(|e| match e {
461 DirstateMapError::InvalidPath(e) => e,
462 _ => unreachable!(),
463 })?
464 .iter()
465 .map(ToOwned::to_owned),
466 );
467
468 Ok(RootsDirsAndParents {
469 roots: HashSet::from_iter(roots),
470 dirs: HashSet::from_iter(dirs),
471 parents,
472 })
473 }
474
475 /// Returns a function that checks whether a given file (in the general sense)
476 /// should be matched.
build_match<'a, 'b>( ignore_patterns: Vec<IgnorePattern>, ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + 'b + Sync>)>477 fn build_match<'a, 'b>(
478 ignore_patterns: Vec<IgnorePattern>,
479 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + 'b + Sync>)> {
480 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
481 // For debugging and printing
482 let mut patterns = vec![];
483
484 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
485
486 if !subincludes.is_empty() {
487 // Build prefix-based matcher functions for subincludes
488 let mut submatchers = FastHashMap::default();
489 let mut prefixes = vec![];
490
491 for sub_include in subincludes {
492 let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
493 let match_fn =
494 Box::new(move |path: &HgPath| matcher.matches(path));
495 prefixes.push(sub_include.prefix.clone());
496 submatchers.insert(sub_include.prefix.clone(), match_fn);
497 }
498
499 let match_subinclude = move |filename: &HgPath| {
500 for prefix in prefixes.iter() {
501 if let Some(rel) = filename.relative_to(prefix) {
502 if (submatchers[prefix])(rel) {
503 return true;
504 }
505 }
506 }
507 false
508 };
509
510 match_funcs.push(Box::new(match_subinclude));
511 }
512
513 if !ignore_patterns.is_empty() {
514 // Either do dumb matching if all patterns are rootfiles, or match
515 // with a regex.
516 if ignore_patterns
517 .iter()
518 .all(|k| k.syntax == PatternSyntax::RootFiles)
519 {
520 let dirs: HashSet<_> = ignore_patterns
521 .iter()
522 .map(|k| k.pattern.to_owned())
523 .collect();
524 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
525
526 let match_func = move |path: &HgPath| -> bool {
527 let path = path.as_bytes();
528 let i = path.iter().rfind(|a| **a == b'/');
529 let dir = if let Some(i) = i {
530 &path[..*i as usize]
531 } else {
532 b"."
533 };
534 dirs.contains(dir.deref())
535 };
536 match_funcs.push(Box::new(match_func));
537
538 patterns.extend(b"rootfilesin: ");
539 dirs_vec.sort();
540 patterns.extend(dirs_vec.escaped_bytes());
541 } else {
542 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
543 patterns = new_re;
544 match_funcs.push(match_func)
545 }
546 }
547
548 Ok(if match_funcs.len() == 1 {
549 (patterns, match_funcs.remove(0))
550 } else {
551 (
552 patterns,
553 Box::new(move |f: &HgPath| -> bool {
554 match_funcs.iter().any(|match_func| match_func(f))
555 }),
556 )
557 })
558 }
559
560 /// Parses all "ignore" files with their recursive includes and returns a
561 /// function that checks whether a given file (in the general sense) should be
562 /// ignored.
get_ignore_function<'a>( mut all_pattern_files: Vec<PathBuf>, root_dir: &Path, inspect_pattern_bytes: &mut impl FnMut(&[u8]), ) -> PatternResult<( Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>, Vec<PatternFileWarning>, )>563 pub fn get_ignore_function<'a>(
564 mut all_pattern_files: Vec<PathBuf>,
565 root_dir: &Path,
566 inspect_pattern_bytes: &mut impl FnMut(&[u8]),
567 ) -> PatternResult<(
568 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
569 Vec<PatternFileWarning>,
570 )> {
571 let mut all_patterns = vec![];
572 let mut all_warnings = vec![];
573
574 // Sort to make the ordering of calls to `inspect_pattern_bytes`
575 // deterministic even if the ordering of `all_pattern_files` is not (such
576 // as when a iteration order of a Python dict or Rust HashMap is involved).
577 // Sort by "string" representation instead of the default by component
578 // (with a Rust-specific definition of a component)
579 all_pattern_files
580 .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
581
582 for pattern_file in &all_pattern_files {
583 let (patterns, warnings) = get_patterns_from_file(
584 pattern_file,
585 root_dir,
586 inspect_pattern_bytes,
587 )?;
588
589 all_patterns.extend(patterns.to_owned());
590 all_warnings.extend(warnings);
591 }
592 let matcher = IncludeMatcher::new(all_patterns)?;
593 Ok((
594 Box::new(move |path: &HgPath| matcher.matches(path)),
595 all_warnings,
596 ))
597 }
598
599 impl<'a> IncludeMatcher<'a> {
new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self>600 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
601 let RootsDirsAndParents {
602 roots,
603 dirs,
604 parents,
605 } = roots_dirs_and_parents(&ignore_patterns)?;
606 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
607 PatternSyntax::Path | PatternSyntax::RelPath => true,
608 _ => false,
609 });
610 let (patterns, match_fn) = build_match(ignore_patterns)?;
611
612 Ok(Self {
613 patterns,
614 match_fn,
615 prefix,
616 roots,
617 dirs,
618 parents,
619 })
620 }
621
get_all_parents_children(&self) -> DirsChildrenMultiset622 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
623 // TODO cache
624 let thing = self
625 .dirs
626 .iter()
627 .chain(self.roots.iter())
628 .chain(self.parents.iter());
629 DirsChildrenMultiset::new(thing, Some(&self.parents))
630 }
631 }
632
633 impl<'a> Display for IncludeMatcher<'a> {
fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>634 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
635 // XXX What about exact matches?
636 // I'm not sure it's worth it to clone the HashSet and keep it
637 // around just in case someone wants to display the matcher, plus
638 // it's going to be unreadable after a few entries, but we need to
639 // inform in this display that exact matches are being used and are
640 // (on purpose) missing from the `includes`.
641 write!(
642 f,
643 "IncludeMatcher(includes='{}')",
644 String::from_utf8_lossy(&self.patterns.escaped_bytes())
645 )
646 }
647 }
648
649 #[cfg(test)]
650 mod tests {
651 use super::*;
652 use pretty_assertions::assert_eq;
653 use std::path::Path;
654
655 #[test]
test_roots_and_dirs()656 fn test_roots_and_dirs() {
657 let pats = vec![
658 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
659 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
660 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
661 ];
662 let (roots, dirs) = roots_and_dirs(&pats);
663
664 assert_eq!(
665 roots,
666 vec!(
667 HgPathBuf::from_bytes(b"g/h"),
668 HgPathBuf::from_bytes(b"g/h"),
669 HgPathBuf::new()
670 ),
671 );
672 assert_eq!(dirs, vec!());
673 }
674
675 #[test]
test_roots_dirs_and_parents()676 fn test_roots_dirs_and_parents() {
677 let pats = vec![
678 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
679 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
680 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
681 ];
682
683 let mut roots = HashSet::new();
684 roots.insert(HgPathBuf::from_bytes(b"g/h"));
685 roots.insert(HgPathBuf::new());
686
687 let dirs = HashSet::new();
688
689 let mut parents = HashSet::new();
690 parents.insert(HgPathBuf::new());
691 parents.insert(HgPathBuf::from_bytes(b"g"));
692
693 assert_eq!(
694 roots_dirs_and_parents(&pats).unwrap(),
695 RootsDirsAndParents {
696 roots,
697 dirs,
698 parents
699 }
700 );
701 }
702
703 #[test]
test_filematcher_visit_children_set()704 fn test_filematcher_visit_children_set() {
705 // Visitchildrenset
706 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
707 let matcher = FileMatcher::new(&files).unwrap();
708
709 let mut set = HashSet::new();
710 set.insert(HgPath::new(b"dir"));
711 assert_eq!(
712 matcher.visit_children_set(HgPath::new(b"")),
713 VisitChildrenSet::Set(set)
714 );
715
716 let mut set = HashSet::new();
717 set.insert(HgPath::new(b"subdir"));
718 assert_eq!(
719 matcher.visit_children_set(HgPath::new(b"dir")),
720 VisitChildrenSet::Set(set)
721 );
722
723 let mut set = HashSet::new();
724 set.insert(HgPath::new(b"foo.txt"));
725 assert_eq!(
726 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
727 VisitChildrenSet::Set(set)
728 );
729
730 assert_eq!(
731 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
732 VisitChildrenSet::Empty
733 );
734 assert_eq!(
735 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
736 VisitChildrenSet::Empty
737 );
738 assert_eq!(
739 matcher.visit_children_set(HgPath::new(b"folder")),
740 VisitChildrenSet::Empty
741 );
742 }
743
744 #[test]
test_filematcher_visit_children_set_files_and_dirs()745 fn test_filematcher_visit_children_set_files_and_dirs() {
746 let files = vec![
747 HgPathBuf::from_bytes(b"rootfile.txt"),
748 HgPathBuf::from_bytes(b"a/file1.txt"),
749 HgPathBuf::from_bytes(b"a/b/file2.txt"),
750 // No file in a/b/c
751 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
752 ];
753 let matcher = FileMatcher::new(&files).unwrap();
754
755 let mut set = HashSet::new();
756 set.insert(HgPath::new(b"a"));
757 set.insert(HgPath::new(b"rootfile.txt"));
758 assert_eq!(
759 matcher.visit_children_set(HgPath::new(b"")),
760 VisitChildrenSet::Set(set)
761 );
762
763 let mut set = HashSet::new();
764 set.insert(HgPath::new(b"b"));
765 set.insert(HgPath::new(b"file1.txt"));
766 assert_eq!(
767 matcher.visit_children_set(HgPath::new(b"a")),
768 VisitChildrenSet::Set(set)
769 );
770
771 let mut set = HashSet::new();
772 set.insert(HgPath::new(b"c"));
773 set.insert(HgPath::new(b"file2.txt"));
774 assert_eq!(
775 matcher.visit_children_set(HgPath::new(b"a/b")),
776 VisitChildrenSet::Set(set)
777 );
778
779 let mut set = HashSet::new();
780 set.insert(HgPath::new(b"d"));
781 assert_eq!(
782 matcher.visit_children_set(HgPath::new(b"a/b/c")),
783 VisitChildrenSet::Set(set)
784 );
785 let mut set = HashSet::new();
786 set.insert(HgPath::new(b"file4.txt"));
787 assert_eq!(
788 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
789 VisitChildrenSet::Set(set)
790 );
791
792 assert_eq!(
793 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
794 VisitChildrenSet::Empty
795 );
796 assert_eq!(
797 matcher.visit_children_set(HgPath::new(b"folder")),
798 VisitChildrenSet::Empty
799 );
800 }
801
802 #[test]
test_includematcher()803 fn test_includematcher() {
804 // VisitchildrensetPrefix
805 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
806 PatternSyntax::RelPath,
807 b"dir/subdir",
808 Path::new(""),
809 )])
810 .unwrap();
811
812 let mut set = HashSet::new();
813 set.insert(HgPath::new(b"dir"));
814 assert_eq!(
815 matcher.visit_children_set(HgPath::new(b"")),
816 VisitChildrenSet::Set(set)
817 );
818
819 let mut set = HashSet::new();
820 set.insert(HgPath::new(b"subdir"));
821 assert_eq!(
822 matcher.visit_children_set(HgPath::new(b"dir")),
823 VisitChildrenSet::Set(set)
824 );
825 assert_eq!(
826 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
827 VisitChildrenSet::Recursive
828 );
829 // OPT: This should probably be 'all' if its parent is?
830 assert_eq!(
831 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
832 VisitChildrenSet::This
833 );
834 assert_eq!(
835 matcher.visit_children_set(HgPath::new(b"folder")),
836 VisitChildrenSet::Empty
837 );
838
839 // VisitchildrensetRootfilesin
840 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
841 PatternSyntax::RootFiles,
842 b"dir/subdir",
843 Path::new(""),
844 )])
845 .unwrap();
846
847 let mut set = HashSet::new();
848 set.insert(HgPath::new(b"dir"));
849 assert_eq!(
850 matcher.visit_children_set(HgPath::new(b"")),
851 VisitChildrenSet::Set(set)
852 );
853
854 let mut set = HashSet::new();
855 set.insert(HgPath::new(b"subdir"));
856 assert_eq!(
857 matcher.visit_children_set(HgPath::new(b"dir")),
858 VisitChildrenSet::Set(set)
859 );
860
861 assert_eq!(
862 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
863 VisitChildrenSet::This
864 );
865 assert_eq!(
866 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
867 VisitChildrenSet::Empty
868 );
869 assert_eq!(
870 matcher.visit_children_set(HgPath::new(b"folder")),
871 VisitChildrenSet::Empty
872 );
873
874 // VisitchildrensetGlob
875 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
876 PatternSyntax::Glob,
877 b"dir/z*",
878 Path::new(""),
879 )])
880 .unwrap();
881
882 let mut set = HashSet::new();
883 set.insert(HgPath::new(b"dir"));
884 assert_eq!(
885 matcher.visit_children_set(HgPath::new(b"")),
886 VisitChildrenSet::Set(set)
887 );
888 assert_eq!(
889 matcher.visit_children_set(HgPath::new(b"folder")),
890 VisitChildrenSet::Empty
891 );
892 assert_eq!(
893 matcher.visit_children_set(HgPath::new(b"dir")),
894 VisitChildrenSet::This
895 );
896 // OPT: these should probably be set().
897 assert_eq!(
898 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
899 VisitChildrenSet::This
900 );
901 assert_eq!(
902 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
903 VisitChildrenSet::This
904 );
905 }
906 }
907