1 use super::regex::{Regex, Region};
2 use super::scope::*;
3 use super::syntax_definition::*;
4 use yaml_rust::{YamlLoader, Yaml, ScanError};
5 use yaml_rust::yaml::Hash;
6 use std::collections::HashMap;
7 use std::error::Error;
8 use std::fmt;
9 use std::path::Path;
10 use std::ops::DerefMut;
11 
12 #[derive(Debug)]
13 pub enum ParseSyntaxError {
14     /// Invalid YAML file syntax, or at least something yaml_rust can't handle
15     InvalidYaml(ScanError),
16     /// The file must contain at least one YAML document
17     EmptyFile,
18     /// Some keys are required for something to be a valid `.sublime-syntax`
19     MissingMandatoryKey(&'static str),
20     /// Invalid regex
21     RegexCompileError(String, Box<dyn Error + Send + Sync + 'static>),
22     /// A scope that syntect's scope implementation can't handle
23     InvalidScope(ParseScopeError),
24     /// A reference to another file that is invalid
25     BadFileRef,
26     /// Syntaxes must have a context named "main"
27     MainMissing,
28     /// Some part of the YAML file is the wrong type (e.g a string but should be a list)
29     /// Sorry this doesn't give you any way to narrow down where this is.
30     /// Maybe use Sublime Text to figure it out.
31     TypeMismatch,
32 }
33 
34 impl fmt::Display for ParseSyntaxError {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result35     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36         use crate::ParseSyntaxError::*;
37 
38         match *self {
39             InvalidYaml(_) => write!(f, "Invalid YAML file syntax"),
40             EmptyFile => write!(f, "Empty file"),
41             MissingMandatoryKey(_) => write!(f, "Missing mandatory key in YAML file"),
42             RegexCompileError(ref regex, ref error) =>
43                 write!(f, "Error while compiling regex '{}': {}",
44                        regex, error),
45             InvalidScope(_) => write!(f, "Invalid scope"),
46             BadFileRef => write!(f, "Invalid file reference"),
47             MainMissing => write!(f, "Context 'main' is missing"),
48             TypeMismatch => write!(f, "Type mismatch"),
49         }
50     }
51 }
52 
53 impl Error for ParseSyntaxError {
cause(&self) -> Option<&dyn Error>54     fn cause(&self) -> Option<&dyn Error> {
55         use crate::ParseSyntaxError::*;
56 
57         match self {
58             InvalidYaml(ref error) => Some(error),
59             RegexCompileError(_, error) => Some(error.as_ref()),
60             _ => None,
61         }
62     }
63 }
64 
get_key<'a, R, F: FnOnce(&'a Yaml) -> Option<R>>(map: &'a Hash, key: &'static str, f: F) -> Result<R, ParseSyntaxError>65 fn get_key<'a, R, F: FnOnce(&'a Yaml) -> Option<R>>(map: &'a Hash,
66                                                     key: &'static str,
67                                                     f: F)
68                                                     -> Result<R, ParseSyntaxError> {
69     map.get(&Yaml::String(key.to_owned()))
70         .ok_or_else(|| ParseSyntaxError::MissingMandatoryKey(key))
71         .and_then(|x| f(x).ok_or(ParseSyntaxError::TypeMismatch))
72 }
73 
str_to_scopes(s: &str, repo: &mut ScopeRepository) -> Result<Vec<Scope>, ParseSyntaxError>74 fn str_to_scopes(s: &str, repo: &mut ScopeRepository) -> Result<Vec<Scope>, ParseSyntaxError> {
75     s.split_whitespace()
76         .map(|scope| repo.build(scope).map_err(ParseSyntaxError::InvalidScope))
77         .collect()
78 }
79 
80 struct ParserState<'a> {
81     scope_repo: &'a mut ScopeRepository,
82     variables: HashMap<String, String>,
83     variable_regex: Regex,
84     backref_regex: Regex,
85     lines_include_newline: bool,
86 }
87 
88 // `__start` must not include prototypes from the actual syntax definition,
89 // otherwise it's possible that a prototype makes us pop out of `__start`.
90 static START_CONTEXT: &'static str = "
91 __start:
92     - meta_include_prototype: false
93     - match: ''
94       push: __main
95 __main:
96     - include: main
97 ";
98 
99 impl SyntaxDefinition {
100     /// In case you want to create your own SyntaxDefinition's in memory from strings.
101     ///
102     /// Generally you should use a [`SyntaxSet`].
103     ///
104     /// `fallback_name` is an optional name to use when the YAML doesn't provide a `name` key.
105     ///
106     /// [`SyntaxSet`]: ../struct.SyntaxSet.html
load_from_str( s: &str, lines_include_newline: bool, fallback_name: Option<&str>, ) -> Result<SyntaxDefinition, ParseSyntaxError>107     pub fn load_from_str(
108         s: &str,
109         lines_include_newline: bool,
110         fallback_name: Option<&str>,
111     ) -> Result<SyntaxDefinition, ParseSyntaxError> {
112         let docs = match YamlLoader::load_from_str(s) {
113             Ok(x) => x,
114             Err(e) => return Err(ParseSyntaxError::InvalidYaml(e)),
115         };
116         if docs.is_empty() {
117             return Err(ParseSyntaxError::EmptyFile);
118         }
119         let doc = &docs[0];
120         let mut scope_repo = SCOPE_REPO.lock().unwrap();
121         SyntaxDefinition::parse_top_level(doc, scope_repo.deref_mut(), lines_include_newline, fallback_name)
122     }
123 
parse_top_level(doc: &Yaml, scope_repo: &mut ScopeRepository, lines_include_newline: bool, fallback_name: Option<&str>) -> Result<SyntaxDefinition, ParseSyntaxError>124     fn parse_top_level(doc: &Yaml,
125                        scope_repo: &mut ScopeRepository,
126                        lines_include_newline: bool,
127                        fallback_name: Option<&str>)
128                        -> Result<SyntaxDefinition, ParseSyntaxError> {
129         let h = doc.as_hash().ok_or(ParseSyntaxError::TypeMismatch)?;
130 
131         let mut variables = HashMap::new();
132         if let Ok(map) = get_key(h, "variables", |x| x.as_hash()) {
133             for (key, value) in map.iter() {
134                 if let (Some(key_str), Some(val_str)) = (key.as_str(), value.as_str()) {
135                     variables.insert(key_str.to_owned(), val_str.to_owned());
136                 }
137             }
138         }
139         let contexts_hash = get_key(h, "contexts", |x| x.as_hash())?;
140         let top_level_scope = scope_repo.build(get_key(h, "scope", |x| x.as_str())?)
141             .map_err(ParseSyntaxError::InvalidScope)?;
142         let mut state = ParserState {
143             scope_repo,
144             variables,
145             variable_regex: Regex::new(r"\{\{([A-Za-z0-9_]+)\}\}".into()),
146             backref_regex: Regex::new(r"\\\d".into()),
147             lines_include_newline,
148         };
149 
150         let mut contexts = SyntaxDefinition::parse_contexts(contexts_hash, &mut state)?;
151         if !contexts.contains_key("main") {
152             return Err(ParseSyntaxError::MainMissing);
153         }
154 
155         SyntaxDefinition::add_initial_contexts(
156             &mut contexts,
157             &mut state,
158             top_level_scope,
159         );
160 
161         let defn = SyntaxDefinition {
162             name: get_key(h, "name", |x| x.as_str()).unwrap_or_else(|_| fallback_name.unwrap_or("Unnamed")).to_owned(),
163             scope: top_level_scope,
164             file_extensions: {
165                 get_key(h, "file_extensions", |x| x.as_vec())
166                     .map(|v| v.iter().filter_map(|y| y.as_str()).map(|x| x.to_owned()).collect())
167                     .unwrap_or_else(|_| Vec::new())
168             },
169             // TODO maybe cache a compiled version of this Regex
170             first_line_match: get_key(h, "first_line_match", |x| x.as_str())
171                 .ok()
172                 .map(|s| s.to_owned()),
173             hidden: get_key(h, "hidden", |x| x.as_bool()).unwrap_or(false),
174 
175             variables: state.variables.clone(),
176             contexts,
177         };
178         Ok(defn)
179     }
180 
parse_contexts(map: &Hash, state: &mut ParserState<'_>) -> Result<HashMap<String, Context>, ParseSyntaxError>181     fn parse_contexts(map: &Hash,
182                       state: &mut ParserState<'_>)
183                       -> Result<HashMap<String, Context>, ParseSyntaxError> {
184         let mut contexts = HashMap::new();
185         for (key, value) in map.iter() {
186             if let (Some(name), Some(val_vec)) = (key.as_str(), value.as_vec()) {
187                 let is_prototype = name == "prototype";
188                 let mut namer = ContextNamer::new(name);
189                 SyntaxDefinition::parse_context(val_vec, state, &mut contexts, is_prototype, &mut namer)?;
190             }
191         }
192 
193         Ok(contexts)
194     }
195 
parse_context(vec: &[Yaml], state: &mut ParserState<'_>, contexts: &mut HashMap<String, Context>, is_prototype: bool, namer: &mut ContextNamer) -> Result<String, ParseSyntaxError>196     fn parse_context(vec: &[Yaml],
197                      // TODO: Maybe just pass the scope repo if that's all that's needed?
198                      state: &mut ParserState<'_>,
199                      contexts: &mut HashMap<String, Context>,
200                      is_prototype: bool,
201                      namer: &mut ContextNamer)
202                      -> Result<String, ParseSyntaxError> {
203         let mut context = Context::new(!is_prototype);
204         let name = namer.next();
205 
206         for y in vec.iter() {
207             let map = y.as_hash().ok_or(ParseSyntaxError::TypeMismatch)?;
208 
209             let mut is_special = false;
210             if let Ok(x) = get_key(map, "meta_scope", |x| x.as_str()) {
211                 context.meta_scope = str_to_scopes(x, state.scope_repo)?;
212                 is_special = true;
213             }
214             if let Ok(x) = get_key(map, "meta_content_scope", |x| x.as_str()) {
215                 context.meta_content_scope = str_to_scopes(x, state.scope_repo)?;
216                 is_special = true;
217             }
218             if let Ok(x) = get_key(map, "meta_include_prototype", |x| x.as_bool()) {
219                 context.meta_include_prototype = x;
220                 is_special = true;
221             }
222             if let Ok(true) = get_key(map, "clear_scopes", |x| x.as_bool()) {
223                 context.clear_scopes = Some(ClearAmount::All);
224                 is_special = true;
225             }
226             if let Ok(x) = get_key(map, "clear_scopes", |x| x.as_i64()) {
227                 context.clear_scopes = Some(ClearAmount::TopN(x as usize));
228                 is_special = true;
229             }
230             if !is_special {
231                 if let Ok(x) = get_key(map, "include", Some) {
232                     let reference = SyntaxDefinition::parse_reference(
233                         x, state, contexts, namer)?;
234                     context.patterns.push(Pattern::Include(reference));
235                 } else {
236                     let pattern = SyntaxDefinition::parse_match_pattern(
237                         map, state, contexts, namer)?;
238                     if pattern.has_captures {
239                         context.uses_backrefs = true;
240                     }
241                     context.patterns.push(Pattern::Match(pattern));
242                 }
243             }
244 
245         }
246 
247         contexts.insert(name.clone(), context);
248         Ok(name)
249     }
250 
parse_reference(y: &Yaml, state: &mut ParserState<'_>, contexts: &mut HashMap<String, Context>, namer: &mut ContextNamer) -> Result<ContextReference, ParseSyntaxError>251     fn parse_reference(y: &Yaml,
252                        state: &mut ParserState<'_>,
253                        contexts: &mut HashMap<String, Context>,
254                        namer: &mut ContextNamer)
255                        -> Result<ContextReference, ParseSyntaxError> {
256         if let Some(s) = y.as_str() {
257             let parts: Vec<&str> = s.split('#').collect();
258             let sub_context = if parts.len() > 1 {
259                 Some(parts[1].to_owned())
260             } else {
261                 None
262             };
263             if parts[0].starts_with("scope:") {
264                 Ok(ContextReference::ByScope {
265                     scope: state.scope_repo
266                         .build(&parts[0][6..])
267                         .map_err(ParseSyntaxError::InvalidScope)?,
268                     sub_context,
269                 })
270             } else if parts[0].ends_with(".sublime-syntax") {
271                 let stem = Path::new(parts[0])
272                     .file_stem()
273                     .and_then(|x| x.to_str())
274                     .ok_or(ParseSyntaxError::BadFileRef)?;
275                 Ok(ContextReference::File {
276                     name: stem.to_owned(),
277                     sub_context,
278                 })
279             } else {
280                 Ok(ContextReference::Named(parts[0].to_owned()))
281             }
282         } else if let Some(v) = y.as_vec() {
283             let subname = SyntaxDefinition::parse_context(v, state, contexts, false, namer)?;
284             Ok(ContextReference::Inline(subname))
285         } else {
286             Err(ParseSyntaxError::TypeMismatch)
287         }
288     }
289 
parse_match_pattern(map: &Hash, state: &mut ParserState<'_>, contexts: &mut HashMap<String, Context>, namer: &mut ContextNamer) -> Result<MatchPattern, ParseSyntaxError>290     fn parse_match_pattern(map: &Hash,
291                            state: &mut ParserState<'_>,
292                            contexts: &mut HashMap<String, Context>,
293                            namer: &mut ContextNamer)
294                            -> Result<MatchPattern, ParseSyntaxError> {
295         let raw_regex = get_key(map, "match", |x| x.as_str())?;
296         let regex_str = Self::parse_regex(raw_regex, state)?;
297         // println!("{:?}", regex_str);
298 
299         let scope = get_key(map, "scope", |x| x.as_str())
300             .ok()
301             .map(|s| str_to_scopes(s, state.scope_repo))
302             .unwrap_or_else(|| Ok(vec![]))?;
303 
304         let captures = if let Ok(map) = get_key(map, "captures", |x| x.as_hash()) {
305             Some(Self::parse_captures(map, &regex_str, state)?)
306         } else {
307             None
308         };
309 
310         let mut has_captures = false;
311         let operation = if get_key(map, "pop", Some).is_ok() {
312             // Thanks @wbond for letting me know this is the correct way to check for captures
313             has_captures = state.backref_regex.search(&regex_str, 0, regex_str.len(), None);
314             MatchOperation::Pop
315         } else if let Ok(y) = get_key(map, "push", Some) {
316             MatchOperation::Push(SyntaxDefinition::parse_pushargs(y, state, contexts, namer)?)
317         } else if let Ok(y) = get_key(map, "set", Some) {
318             MatchOperation::Set(SyntaxDefinition::parse_pushargs(y, state, contexts, namer)?)
319         } else if let Ok(y) = get_key(map, "embed", Some) {
320             // Same as push so we translate it to what it would be
321             let mut embed_escape_context_yaml = vec!();
322             let mut commands = Hash::new();
323             commands.insert(Yaml::String("meta_include_prototype".to_string()), Yaml::Boolean(false));
324             embed_escape_context_yaml.push(Yaml::Hash(commands));
325             if let Ok(s) = get_key(map, "embed_scope", Some) {
326                 commands = Hash::new();
327                 commands.insert(Yaml::String("meta_content_scope".to_string()), s.clone());
328                 embed_escape_context_yaml.push(Yaml::Hash(commands));
329             }
330             if let Ok(v) = get_key(map, "escape", Some) {
331                 let mut match_map = Hash::new();
332                 match_map.insert(Yaml::String("match".to_string()), v.clone());
333                 match_map.insert(Yaml::String("pop".to_string()), Yaml::Boolean(true));
334                 if let Ok(y) = get_key(map, "escape_captures", Some) {
335                     match_map.insert(Yaml::String("captures".to_string()), y.clone());
336                 }
337                 embed_escape_context_yaml.push(Yaml::Hash(match_map));
338                 let escape_context = SyntaxDefinition::parse_context(
339                     &embed_escape_context_yaml,
340                     state,
341                     contexts,
342                     false,
343                     namer,
344                 )?;
345                 MatchOperation::Push(vec![ContextReference::Inline(escape_context),
346                                           SyntaxDefinition::parse_reference(y, state, contexts, namer)?])
347             } else {
348                 return Err(ParseSyntaxError::MissingMandatoryKey("escape"));
349             }
350 
351         } else {
352             MatchOperation::None
353         };
354 
355         let with_prototype = if let Ok(v) = get_key(map, "with_prototype", |x| x.as_vec()) {
356             // should a with_prototype include the prototype? I don't think so.
357             let subname = Self::parse_context(v, state, contexts, true, namer)?;
358             Some(ContextReference::Inline(subname))
359         } else if let Ok(v) = get_key(map, "escape", Some) {
360             let subname = namer.next();
361 
362             let mut context = Context::new(false);
363             let mut match_map = Hash::new();
364             match_map.insert(Yaml::String("match".to_string()), Yaml::String(format!("(?={})", v.as_str().unwrap())));
365             match_map.insert(Yaml::String("pop".to_string()), Yaml::Boolean(true));
366             let pattern = SyntaxDefinition::parse_match_pattern(&match_map, state, contexts, namer)?;
367             if pattern.has_captures {
368                 context.uses_backrefs = true;
369             }
370             context.patterns.push(Pattern::Match(pattern));
371 
372             contexts.insert(subname.clone(), context);
373             Some(ContextReference::Inline(subname))
374         } else {
375             None
376         };
377 
378         let pattern = MatchPattern::new(
379             has_captures,
380             regex_str,
381             scope,
382             captures,
383             operation,
384             with_prototype,
385         );
386 
387         Ok(pattern)
388     }
389 
parse_pushargs(y: &Yaml, state: &mut ParserState<'_>, contexts: &mut HashMap<String, Context>, namer: &mut ContextNamer) -> Result<Vec<ContextReference>, ParseSyntaxError>390     fn parse_pushargs(y: &Yaml,
391                       state: &mut ParserState<'_>,
392                       contexts: &mut HashMap<String, Context>,
393                       namer: &mut ContextNamer)
394                       -> Result<Vec<ContextReference>, ParseSyntaxError> {
395         // check for a push of multiple items
396         if y.as_vec().map_or(false, |v| !v.is_empty() && (v[0].as_str().is_some() || (v[0].as_vec().is_some() && v[0].as_vec().unwrap()[0].as_hash().is_some()))) {
397             // this works because Result implements FromIterator to handle the errors
398             y.as_vec()
399                 .unwrap()
400                 .iter()
401                 .map(|x| SyntaxDefinition::parse_reference(x, state, contexts, namer))
402                 .collect()
403         } else {
404             let reference = SyntaxDefinition::parse_reference(y, state, contexts, namer)?;
405             Ok(vec![reference])
406         }
407     }
408 
parse_regex(raw_regex: &str, state: &ParserState<'_>) -> Result<String, ParseSyntaxError>409     fn parse_regex(raw_regex: &str, state: &ParserState<'_>) -> Result<String, ParseSyntaxError> {
410         let regex = Self::resolve_variables(raw_regex, state);
411         let regex = replace_posix_char_classes(regex);
412         let regex = if state.lines_include_newline {
413             regex_for_newlines(regex)
414         } else {
415             // If the passed in strings don't include newlines (unlike Sublime) we can't match on
416             // them using the original regex. So this tries to rewrite the regex in a way that
417             // allows matching against lines without newlines (essentially replacing `\n` with `$`).
418             regex_for_no_newlines(regex)
419         };
420         Self::try_compile_regex(&regex)?;
421         Ok(regex)
422     }
423 
resolve_variables(raw_regex: &str, state: &ParserState<'_>) -> String424     fn resolve_variables(raw_regex: &str, state: &ParserState<'_>) -> String {
425         let mut result = String::new();
426         let mut index = 0;
427         let mut region = Region::new();
428         while state.variable_regex.search(raw_regex, index, raw_regex.len(), Some(&mut region)) {
429             let (begin, end) = region.pos(0).unwrap();
430 
431             result.push_str(&raw_regex[index..begin]);
432 
433             let var_pos = region.pos(1).unwrap();
434             let var_name = &raw_regex[var_pos.0..var_pos.1];
435             let var_raw = state.variables.get(var_name).map(String::as_ref).unwrap_or("");
436             let var_resolved = Self::resolve_variables(var_raw, state);
437             result.push_str(&var_resolved);
438 
439             index = end;
440         }
441         if index < raw_regex.len() {
442             result.push_str(&raw_regex[index..]);
443         }
444         result
445     }
446 
try_compile_regex(regex_str: &str) -> Result<(), ParseSyntaxError>447     fn try_compile_regex(regex_str: &str) -> Result<(), ParseSyntaxError> {
448         // Replace backreferences with a placeholder value that will also appear in errors
449         let regex_str = substitute_backrefs_in_regex(regex_str, |i| Some(format!("<placeholder_{}>", i)));
450 
451         if let Some(error) = Regex::try_compile(&regex_str) {
452             Err(ParseSyntaxError::RegexCompileError(regex_str, error))
453         } else {
454             Ok(())
455         }
456     }
457 
parse_captures( map: &Hash, regex_str: &str, state: &mut ParserState<'_>, ) -> Result<CaptureMapping, ParseSyntaxError>458     fn parse_captures(
459         map: &Hash,
460         regex_str: &str,
461         state: &mut ParserState<'_>,
462     ) -> Result<CaptureMapping, ParseSyntaxError> {
463         let valid_indexes = get_consuming_capture_indexes(regex_str);
464         let mut captures = Vec::new();
465         for (key, value) in map.iter() {
466             if let (Some(key_int), Some(val_str)) = (key.as_i64(), value.as_str()) {
467                 if valid_indexes.contains(&(key_int as usize)) {
468                     captures.push((key_int as usize, str_to_scopes(val_str, state.scope_repo)?));
469                 }
470             }
471         }
472         Ok(captures)
473     }
474 
475     /// Sublime treats the top level context slightly differently from
476     /// including the main context from other syntaxes. When main is popped
477     /// it is immediately re-added and when it is `set` over the file level
478     /// scope remains. This behaviour is emulated through some added contexts
479     /// that are the actual top level contexts used in parsing.
480     /// See https://github.com/trishume/syntect/issues/58 for more.
add_initial_contexts( contexts: &mut HashMap<String, Context>, state: &mut ParserState<'_>, top_level_scope: Scope, )481     fn add_initial_contexts(
482         contexts: &mut HashMap<String, Context>,
483         state: &mut ParserState<'_>,
484         top_level_scope: Scope,
485     ) {
486         let yaml_docs = YamlLoader::load_from_str(START_CONTEXT).unwrap();
487         let yaml = &yaml_docs[0];
488 
489         let start_yaml : &[Yaml] = yaml["__start"].as_vec().unwrap();
490         SyntaxDefinition::parse_context(start_yaml, state, contexts, false, &mut ContextNamer::new("__start")).unwrap();
491         if let Some(start) = contexts.get_mut("__start") {
492             start.meta_content_scope = vec![top_level_scope];
493         }
494 
495         let main_yaml : &[Yaml] = yaml["__main"].as_vec().unwrap();
496         SyntaxDefinition::parse_context(main_yaml, state, contexts, false, &mut ContextNamer::new("__main")).unwrap();
497 
498         let meta_include_prototype = contexts["main"].meta_include_prototype;
499         let meta_scope = contexts["main"].meta_scope.clone();
500         let meta_content_scope = contexts["main"].meta_content_scope.clone();
501 
502         if let Some(outer_main) = contexts.get_mut("__main") {
503             outer_main.meta_include_prototype = meta_include_prototype;
504             outer_main.meta_scope = meta_scope;
505             outer_main.meta_content_scope = meta_content_scope;
506         }
507 
508         // add the top_level_scope as a meta_content_scope to main so
509         // pushes from other syntaxes add the file scope
510         // TODO: this order is not quite correct if main also has a meta_scope
511         if let Some(main) = contexts.get_mut("main") {
512             main.meta_content_scope.insert(0, top_level_scope);
513         }
514     }
515 }
516 
517 struct ContextNamer {
518     name: String,
519     anonymous_index: Option<usize>,
520 }
521 
522 impl ContextNamer {
new(name: &str) -> ContextNamer523     fn new(name: &str) -> ContextNamer {
524         ContextNamer {
525             name: name.to_string(),
526             anonymous_index: None,
527         }
528     }
529 
next(&mut self) -> String530     fn next(&mut self) -> String {
531         let name = if let Some(index) = self.anonymous_index {
532             format!("#anon_{}_{}", self.name, index)
533         } else {
534             self.name.clone()
535         };
536 
537         self.anonymous_index = Some(self.anonymous_index.map(|i| i + 1).unwrap_or(0));
538         name
539     }
540 }
541 
542 /// In fancy-regex, POSIX character classes only match ASCII characters.
543 ///
544 /// Sublime's syntaxes expect them to match Unicode characters as well, so transform them to
545 /// corresponding Unicode character classes.
replace_posix_char_classes(regex: String) -> String546 fn replace_posix_char_classes(regex: String) -> String {
547     regex.replace("[:alpha:]", r"\p{L}")
548         .replace("[:alnum:]", r"\p{L}\p{N}")
549         .replace("[:lower:]", r"\p{Ll}")
550         .replace("[:upper:]", r"\p{Lu}")
551         .replace("[:digit:]", r"\p{Nd}")
552 }
553 
554 
555 /// Some of the regexes include `$` and expect it to match end of line,
556 /// e.g. *before* the `\n` in `test\n`.
557 ///
558 /// In fancy-regex, `$` means end of text by default, so that would
559 /// match *after* `\n`. Using `(?m:$)` instead means it matches end of line.
560 ///
561 /// Note that we don't want to add a `(?m)` in the beginning to change the
562 /// whole regex because that would also change the meaning of `^`. In
563 /// fancy-regex, that also matches at the end of e.g. `test\n` which is
564 /// different from onig. It would also change `.` to match more.
regex_for_newlines(regex: String) -> String565 fn regex_for_newlines(regex: String) -> String {
566     if !regex.contains('$') {
567         return regex;
568     }
569 
570     let rewriter = RegexRewriterForNewlines {
571         parser: Parser::new(regex.as_bytes()),
572     };
573     rewriter.rewrite()
574 }
575 
576 struct RegexRewriterForNewlines<'a> {
577     parser: Parser<'a>,
578 }
579 
580 impl<'a> RegexRewriterForNewlines<'a> {
rewrite(mut self) -> String581     fn rewrite(mut self) -> String {
582         let mut result = Vec::new();
583 
584         while let Some(c) = self.parser.peek() {
585             match c {
586                 b'$' => {
587                     self.parser.next();
588                     result.extend_from_slice(br"(?m:$)");
589                 }
590                 b'\\' => {
591                     self.parser.next();
592                     result.push(c);
593                     if let Some(c2) = self.parser.peek() {
594                         self.parser.next();
595                         result.push(c2);
596                     }
597                 }
598                 b'[' => {
599                     let (mut content, _) = self.parser.parse_character_class();
600                     result.append(&mut content);
601                 }
602                 _ => {
603                     self.parser.next();
604                     result.push(c);
605                 }
606             }
607         }
608         String::from_utf8(result).unwrap()
609     }
610 }
611 
612 /// Rewrite a regex that matches `\n` to one that matches `$` (end of line) instead.
613 /// That allows the regex to be used to match lines that don't include a trailing newline character.
614 ///
615 /// The reason we're doing this is because the regexes in the syntax definitions assume that the
616 /// lines that are being matched on include a trailing newline.
617 ///
618 /// Note that the rewrite is just an approximation and there's a couple of cases it can not handle,
619 /// due to `$` being an anchor whereas `\n` matches a character.
regex_for_no_newlines(regex: String) -> String620 fn regex_for_no_newlines(regex: String) -> String {
621     if !regex.contains(r"\n") {
622         return regex;
623     }
624 
625     // A special fix to rewrite a pattern from the `Rd` syntax that the RegexRewriter can not
626     // handle properly.
627     let regex = regex.replace("(?:\\n)?", "(?:$|)");
628 
629     let rewriter = RegexRewriterForNoNewlines {
630         parser: Parser::new(regex.as_bytes()),
631     };
632     rewriter.rewrite()
633 }
634 
635 struct RegexRewriterForNoNewlines<'a> {
636     parser: Parser<'a>,
637 }
638 
639 impl<'a> RegexRewriterForNoNewlines<'a> {
rewrite(mut self) -> String640     fn rewrite(mut self) -> String {
641         let mut result = Vec::new();
642         while let Some(c) = self.parser.peek() {
643             match c {
644                 b'\\' => {
645                     self.parser.next();
646                     if let Some(c2) = self.parser.peek() {
647                         self.parser.next();
648                         // Replacing `\n` with `$` in `\n?` or `\n+` would make parsing later fail
649                         // with "target of repeat operator is invalid"
650                         let c3 = self.parser.peek();
651                         if c2 == b'n' && c3 != Some(b'?') && c3 != Some(b'+') && c3 != Some(b'*') {
652                             result.extend_from_slice(b"$");
653                         } else {
654                             result.push(c);
655                             result.push(c2);
656                         }
657                     } else {
658                         result.push(c);
659                     }
660                 }
661                 b'[' => {
662                     let (mut content, matches_newline) = self.parser.parse_character_class();
663                     if matches_newline && self.parser.peek() != Some(b'?') {
664                         result.extend_from_slice(b"(?:");
665                         result.append(&mut content);
666                         result.extend_from_slice(br"|$)");
667                     } else {
668                         result.append(&mut content);
669                     }
670                 }
671                 _ => {
672                     self.parser.next();
673                     result.push(c);
674                 }
675             }
676         }
677         String::from_utf8(result).unwrap()
678     }
679 }
680 
get_consuming_capture_indexes(regex: &str) -> Vec<usize>681 fn get_consuming_capture_indexes(regex: &str) -> Vec<usize> {
682     let parser = ConsumingCaptureIndexParser {
683         parser: Parser::new(regex.as_bytes()),
684     };
685     parser.get_consuming_capture_indexes()
686 }
687 
688 struct ConsumingCaptureIndexParser<'a> {
689     parser: Parser<'a>,
690 }
691 
692 impl<'a> ConsumingCaptureIndexParser<'a> {
693     /// Find capture groups which are not inside lookarounds.
694     ///
695     /// If, in a YAML syntax definition, a scope stack is applied to a capture group inside a
696     /// lookaround, (i.e. "captures:\n x: scope.stack goes.here", where "x" is the number of a
697     /// capture group in a lookahead/behind), those those scopes are not applied, so no need to
698     /// even parse them.
get_consuming_capture_indexes(mut self) -> Vec<usize>699     fn get_consuming_capture_indexes(mut self) -> Vec<usize> {
700         let mut result = Vec::new();
701         let mut stack = Vec::new();
702         let mut cap_num = 0;
703         let mut in_lookaround = false;
704         stack.push(in_lookaround);
705         result.push(cap_num);
706 
707         while let Some(c) = self.parser.peek() {
708             match c {
709                 b'\\' => {
710                     self.parser.next();
711                     self.parser.next();
712                 }
713                 b'[' => {
714                     self.parser.parse_character_class();
715                 }
716                 b'(' => {
717                     self.parser.next();
718                     // add the current lookaround state to the stack so we can just pop at a closing paren
719                     stack.push(in_lookaround);
720                     if let Some(c2) = self.parser.peek() {
721                         if c2 != b'?' {
722                             // simple numbered capture group
723                             cap_num += 1;
724                             // if we are not currently in a lookaround,
725                             // add this capture group number to the valid ones
726                             if !in_lookaround {
727                                 result.push(cap_num);
728                             }
729                         } else {
730                             self.parser.next();
731                             if let Some(c3) = self.parser.peek() {
732                                 self.parser.next();
733                                 if c3 == b'=' || c3 == b'!' {
734                                     // lookahead
735                                     in_lookaround = true;
736                                 } else if c3 == b'<' {
737                                     if let Some(c4) = self.parser.peek() {
738                                         if c4 == b'=' || c4 == b'!' {
739                                             self.parser.next();
740                                             // lookbehind
741                                             in_lookaround = true;
742                                         }
743                                     }
744                                 } else if c3 == b'P' {
745                                     if let Some(c4) = self.parser.peek() {
746                                         if c4 == b'<' {
747                                             // named capture group
748                                             cap_num += 1;
749                                             // if we are not currently in a lookaround,
750                                             // add this capture group number to the valid ones
751                                             if !in_lookaround {
752                                                 result.push(cap_num);
753                                             }
754                                         }
755                                     }
756                                 }
757                             }
758                         }
759                     }
760                 }
761                 b')' => {
762                     if let Some(value) = stack.pop() {
763                         in_lookaround = value;
764                     }
765                     self.parser.next();
766                 }
767                 _ => {
768                     self.parser.next();
769                 }
770             }
771         }
772         result
773     }
774 }
775 
776 struct Parser<'a> {
777     bytes: &'a [u8],
778     index: usize,
779 }
780 
781 impl<'a> Parser<'a> {
new(bytes: &[u8]) -> Parser782     fn new(bytes: &[u8]) -> Parser {
783         Parser {
784             bytes,
785             index: 0,
786         }
787     }
788 
peek(&self) -> Option<u8>789     fn peek(&self) -> Option<u8> {
790         self.bytes.get(self.index).map(|&b| b)
791     }
792 
next(&mut self)793     fn next(&mut self) {
794         self.index += 1;
795     }
796 
parse_character_class(&mut self) -> (Vec<u8>, bool)797     fn parse_character_class(&mut self) -> (Vec<u8>, bool) {
798         let mut content = Vec::new();
799         let mut negated = false;
800         let mut nesting = 0;
801         let mut matches_newline = false;
802 
803         self.next();
804         content.push(b'[');
805         if let Some(b'^') = self.peek() {
806             self.next();
807             content.push(b'^');
808             negated = true;
809         }
810 
811         // An unescaped `]` is allowed after `[` or `[^` and doesn't mean the end of the class.
812         if let Some(b']') = self.peek() {
813             self.next();
814             content.push(b']');
815         }
816 
817         while let Some(c) = self.peek() {
818             match c {
819                 b'\\' => {
820                     self.next();
821                     content.push(c);
822                     if let Some(c2) = self.peek() {
823                         self.next();
824                         if c2 == b'n' && !negated && nesting == 0 {
825                             matches_newline = true;
826                         }
827                         content.push(c2);
828                     }
829                 }
830                 b'[' => {
831                     self.next();
832                     content.push(b'[');
833                     nesting += 1;
834                 }
835                 b']' => {
836                     self.next();
837                     content.push(b']');
838                     if nesting == 0 {
839                         break;
840                     }
841                     nesting -= 1;
842                 }
843                 _ => {
844                     self.next();
845                     content.push(c);
846                 }
847             }
848         }
849 
850         (content, matches_newline)
851     }
852 }
853 
854 
855 #[cfg(test)]
856 mod tests {
857     use crate::parsing::syntax_definition::*;
858     use crate::parsing::Scope;
859     use super::*;
860 
861     #[test]
can_parse()862     fn can_parse() {
863         let defn: SyntaxDefinition =
864             SyntaxDefinition::load_from_str("name: C\nscope: source.c\ncontexts: {main: []}",
865                                             false, None)
866                 .unwrap();
867         assert_eq!(defn.name, "C");
868         assert_eq!(defn.scope, Scope::new("source.c").unwrap());
869         let exts_empty: Vec<String> = Vec::new();
870         assert_eq!(defn.file_extensions, exts_empty);
871         assert_eq!(defn.hidden, false);
872         assert!(defn.variables.is_empty());
873         let defn2: SyntaxDefinition =
874             SyntaxDefinition::load_from_str("
875         name: C
876         scope: source.c
877         file_extensions: [c, h]
878         hidden: true
879         variables:
880           ident: '[QY]+'
881         contexts:
882           prototype:
883             - match: lol
884               scope: source.php
885           main:
886             - match: \\b(if|else|for|while|{{ident}})\\b
887               scope: keyword.control.c keyword.looping.c
888               captures:
889                   1: meta.preprocessor.c++
890                   2: keyword.control.include.c++
891               push: [string, 'scope:source.c#main', 'CSS.sublime-syntax#rule-list-body']
892               with_prototype:
893                 - match: wow
894                   pop: true
895             - match: '\"'
896               push: string
897           string:
898             - meta_scope: string.quoted.double.c
899             - meta_include_prototype: false
900             - match: \\\\.
901               scope: constant.character.escape.c
902             - match: '\"'
903               pop: true
904         ",
905                                             false, None)
906                 .unwrap();
907         assert_eq!(defn2.name, "C");
908         let top_level_scope = Scope::new("source.c").unwrap();
909         assert_eq!(defn2.scope, top_level_scope);
910         let exts: Vec<String> = vec![String::from("c"), String::from("h")];
911         assert_eq!(defn2.file_extensions, exts);
912         assert_eq!(defn2.hidden, true);
913         assert_eq!(defn2.variables.get("ident").unwrap(), "[QY]+");
914 
915         let n: Vec<Scope> = Vec::new();
916         println!("{:?}", defn2);
917         // assert!(false);
918         let main = &defn2.contexts["main"];
919         assert_eq!(main.meta_content_scope, vec![top_level_scope]);
920         assert_eq!(main.meta_scope, n);
921         assert_eq!(main.meta_include_prototype, true);
922 
923         assert_eq!(defn2.contexts["__main"].meta_content_scope, n);
924         assert_eq!(defn2.contexts["__start"].meta_content_scope, vec![top_level_scope]);
925 
926         assert_eq!(defn2.contexts["string"].meta_scope,
927                    vec![Scope::new("string.quoted.double.c").unwrap()]);
928         let first_pattern: &Pattern = &main.patterns[0];
929         match first_pattern {
930             &Pattern::Match(ref match_pat) => {
931                 let m: &CaptureMapping = match_pat.captures.as_ref().expect("test failed");
932                 assert_eq!(&m[0], &(1,vec![Scope::new("meta.preprocessor.c++").unwrap()]));
933                 use crate::parsing::syntax_definition::ContextReference::*;
934 
935                 // this is sadly necessary because Context is not Eq because of the Regex
936                 let expected = MatchOperation::Push(vec![
937                     Named("string".to_owned()),
938                     ByScope { scope: Scope::new("source.c").unwrap(), sub_context: Some("main".to_owned()) },
939                     File {
940                         name: "CSS".to_owned(),
941                         sub_context: Some("rule-list-body".to_owned())
942                     },
943                 ]);
944                 assert_eq!(format!("{:?}", match_pat.operation),
945                            format!("{:?}", expected));
946 
947                 assert_eq!(match_pat.scope,
948                            vec![Scope::new("keyword.control.c").unwrap(),
949                                 Scope::new("keyword.looping.c").unwrap()]);
950 
951                 assert!(match_pat.with_prototype.is_some());
952             }
953             _ => assert!(false),
954         }
955     }
956 
957     #[test]
can_parse_embed_as_with_prototypes()958     fn can_parse_embed_as_with_prototypes() {
959         let old_def = SyntaxDefinition::load_from_str(r#"
960         name: C
961         scope: source.c
962         file_extensions: [c, h]
963         variables:
964           ident: '[QY]+'
965         contexts:
966           main:
967             - match: '(>)\s*'
968               captures:
969                 1: meta.tag.style.begin.html punctuation.definition.tag.end.html
970               push:
971                 - [{ meta_include_prototype: false }, { meta_content_scope: 'source.css.embedded.html' }, { match: '(?i)(?=</style)', pop: true }]
972                 - scope:source.css
973               with_prototype:
974                 - match: (?=(?i)(?=</style))
975                   pop: true
976         "#,false, None).unwrap();
977 
978         let def_with_embed = SyntaxDefinition::load_from_str(r#"
979         name: C
980         scope: source.c
981         file_extensions: [c, h]
982         variables:
983           ident: '[QY]+'
984         contexts:
985           main:
986             - match: '(>)\s*'
987               captures:
988                 1: meta.tag.style.begin.html punctuation.definition.tag.end.html
989               embed: scope:source.css
990               embed_scope: source.css.embedded.html
991               escape: (?i)(?=</style)
992         "#,false, None).unwrap();
993 
994         assert_eq!(old_def.contexts["main"], def_with_embed.contexts["main"]);
995     }
996 
997     #[test]
errors_on_embed_without_escape()998     fn errors_on_embed_without_escape() {
999         let def = SyntaxDefinition::load_from_str(r#"
1000         name: C
1001         scope: source.c
1002         file_extensions: [c, h]
1003         variables:
1004           ident: '[QY]+'
1005         contexts:
1006           main:
1007             - match: '(>)\s*'
1008               captures:
1009                 1: meta.tag.style.begin.html punctuation.definition.tag.end.html
1010               embed: scope:source.css
1011               embed_scope: source.css.embedded.html
1012         "#,false, None);
1013         assert!(def.is_err());
1014         match def.unwrap_err() {
1015             ParseSyntaxError::MissingMandatoryKey(key) => assert_eq!(key, "escape"),
1016             _ => assert!(false, "Got unexpected ParseSyntaxError"),
1017         }
1018     }
1019 
1020     #[test]
errors_on_regex_compile_error()1021     fn errors_on_regex_compile_error() {
1022         let def = SyntaxDefinition::load_from_str(r#"
1023         name: C
1024         scope: source.c
1025         file_extensions: [test]
1026         contexts:
1027           main:
1028             - match: '[a'
1029               scope: keyword.name
1030         "#,false, None);
1031         assert!(def.is_err());
1032         match def.unwrap_err() {
1033             ParseSyntaxError::RegexCompileError(ref regex, _) => assert_eq!("[a", regex),
1034             _ => assert!(false, "Got unexpected ParseSyntaxError"),
1035         }
1036     }
1037 
1038     #[test]
can_parse_ugly_yaml()1039     fn can_parse_ugly_yaml() {
1040         let defn: SyntaxDefinition =
1041             SyntaxDefinition::load_from_str("
1042         name: LaTeX
1043         scope: text.tex.latex
1044         contexts:
1045           main:
1046             - match: '((\\\\)(?:framebox|makebox))\\b'
1047               captures:
1048                 1: support.function.box.latex
1049                 2: punctuation.definition.backslash.latex
1050               push:
1051                 - [{meta_scope: meta.function.box.latex}, {match: '', pop: true}]
1052                 - argument
1053                 - optional-arguments
1054           argument:
1055             - match: '\\{'
1056               scope: punctuation.definition.group.brace.begin.latex
1057             - match: '(?=\\S)'
1058               pop: true
1059           optional-arguments:
1060             - match: '(?=\\S)'
1061               pop: true
1062         ",
1063                                             false, None)
1064                 .unwrap();
1065         assert_eq!(defn.name, "LaTeX");
1066         let top_level_scope = Scope::new("text.tex.latex").unwrap();
1067         assert_eq!(defn.scope, top_level_scope);
1068 
1069         let first_pattern: &Pattern = &defn.contexts["main"].patterns[0];
1070         match first_pattern {
1071             &Pattern::Match(ref match_pat) => {
1072                 let m: &CaptureMapping = match_pat.captures.as_ref().expect("test failed");
1073                 assert_eq!(&m[0], &(1,vec![Scope::new("support.function.box.latex").unwrap()]));
1074 
1075                 //use parsing::syntax_definition::ContextReference::*;
1076                 // TODO: check the first pushed reference is Inline(...) and has a meta_scope of meta.function.box.latex
1077                 // TODO: check the second pushed reference is Named("argument".to_owned())
1078                 // TODO: check the third pushed reference is Named("optional-arguments".to_owned())
1079 
1080                 assert!(match_pat.with_prototype.is_none());
1081             }
1082             _ => assert!(false),
1083         }
1084     }
1085 
1086     #[test]
names_anonymous_contexts()1087     fn names_anonymous_contexts() {
1088         let def = SyntaxDefinition::load_from_str(
1089             r#"
1090             scope: source.c
1091             contexts:
1092               main:
1093                 - match: a
1094                   push: a
1095               a:
1096                 - meta_scope: a
1097                 - match: x
1098                   push:
1099                     - meta_scope: anonymous_x
1100                     - match: anything
1101                       push:
1102                         - meta_scope: anonymous_x_2
1103                 - match: y
1104                   push:
1105                     - meta_scope: anonymous_y
1106                 - match: z
1107                   escape: 'test'
1108             "#,
1109             false,
1110             None
1111         ).unwrap();
1112 
1113         assert_eq!(def.contexts["a"].meta_scope, vec![Scope::new("a").unwrap()]);
1114         assert_eq!(def.contexts["#anon_a_0"].meta_scope, vec![Scope::new("anonymous_x").unwrap()]);
1115         assert_eq!(def.contexts["#anon_a_1"].meta_scope, vec![Scope::new("anonymous_x_2").unwrap()]);
1116         assert_eq!(def.contexts["#anon_a_2"].meta_scope, vec![Scope::new("anonymous_y").unwrap()]);
1117         assert_eq!(def.contexts["#anon_a_3"].patterns.len(), 1); // escape
1118     }
1119 
1120     #[test]
can_use_fallback_name()1121     fn can_use_fallback_name() {
1122         let def = SyntaxDefinition::load_from_str(r#"
1123         scope: source.c
1124         contexts:
1125           main:
1126             - match: ''
1127         "#,false, Some("C"));
1128         assert_eq!(def.unwrap().name, "C");
1129     }
1130 
1131     #[test]
can_rewrite_regex_for_newlines()1132     fn can_rewrite_regex_for_newlines() {
1133         fn rewrite(s: &str) -> String {
1134             regex_for_newlines(s.to_string())
1135         }
1136 
1137         assert_eq!(&rewrite(r"a"), r"a");
1138         assert_eq!(&rewrite(r"\b"), r"\b");
1139         assert_eq!(&rewrite(r"(a)"), r"(a)");
1140         assert_eq!(&rewrite(r"[a]"), r"[a]");
1141         assert_eq!(&rewrite(r"[^a]"), r"[^a]");
1142         assert_eq!(&rewrite(r"[]a]"), r"[]a]");
1143         assert_eq!(&rewrite(r"[[a]]"), r"[[a]]");
1144 
1145         assert_eq!(&rewrite(r"^"), r"^");
1146         assert_eq!(&rewrite(r"$"), r"(?m:$)");
1147         assert_eq!(&rewrite(r"^ab$"), r"^ab(?m:$)");
1148         assert_eq!(&rewrite(r"\^ab\$"), r"\^ab\$");
1149         assert_eq!(&rewrite(r"(//).*$"), r"(//).*(?m:$)");
1150 
1151         // Do not rewrite this `$` because it's in a char class and doesn't mean end of line
1152         assert_eq!(&rewrite(r"[a$]"), r"[a$]");
1153     }
1154 
1155     #[test]
can_rewrite_regex_for_no_newlines()1156     fn can_rewrite_regex_for_no_newlines() {
1157         fn rewrite(s: &str) -> String {
1158             regex_for_no_newlines(s.to_string())
1159         }
1160 
1161         assert_eq!(&rewrite(r"a"), r"a");
1162         assert_eq!(&rewrite(r"\b"), r"\b");
1163         assert_eq!(&rewrite(r"(a)"), r"(a)");
1164         assert_eq!(&rewrite(r"[a]"), r"[a]");
1165         assert_eq!(&rewrite(r"[^a]"), r"[^a]");
1166         assert_eq!(&rewrite(r"[]a]"), r"[]a]");
1167         assert_eq!(&rewrite(r"[[a]]"), r"[[a]]");
1168 
1169         assert_eq!(&rewrite(r"\n"), r"$");
1170         assert_eq!(&rewrite(r"\[\n"), r"\[$");
1171         assert_eq!(&rewrite(r"a\n?"), r"a\n?");
1172         assert_eq!(&rewrite(r"a\n+"), r"a\n+");
1173         assert_eq!(&rewrite(r"a\n*"), r"a\n*");
1174         assert_eq!(&rewrite(r"[abc\n]"), r"(?:[abc\n]|$)");
1175         assert_eq!(&rewrite(r"[^\n]"), r"[^\n]");
1176         assert_eq!(&rewrite(r"[^]\n]"), r"[^]\n]");
1177         assert_eq!(&rewrite(r"[\n]?"), r"[\n]?");
1178         // Removing the `\n` might result in an empty character class, so we should leave it.
1179         assert_eq!(&rewrite(r"[\n]"), r"(?:[\n]|$)");
1180         assert_eq!(&rewrite(r"[]\n]"), r"(?:[]\n]|$)");
1181         // In order to properly understand nesting, we'd have to have a full parser, so ignore it.
1182         assert_eq!(&rewrite(r"[[a]&&[\n]]"), r"[[a]&&[\n]]");
1183 
1184         assert_eq!(&rewrite(r"ab(?:\n)?"), r"ab(?:$|)");
1185         assert_eq!(&rewrite(r"(?<!\n)ab"), r"(?<!$)ab");
1186         assert_eq!(&rewrite(r"(?<=\n)ab"), r"(?<=$)ab");
1187     }
1188 
1189     #[test]
can_get_valid_captures_from_regex()1190     fn can_get_valid_captures_from_regex() {
1191         let regex = "hello(test)(?=(world))(foo(?P<named>bar))";
1192         println!("{:?}", regex);
1193         let valid_indexes = get_consuming_capture_indexes(regex);
1194         println!("{:?}", valid_indexes);
1195         assert_eq!(valid_indexes, [0, 1, 3, 4]);
1196     }
1197 
1198     #[test]
can_get_valid_captures_from_regex2()1199     fn can_get_valid_captures_from_regex2() {
1200         let regex = "hello(test)[(?=tricked](foo(bar))";
1201         println!("{:?}", regex);
1202         let valid_indexes = get_consuming_capture_indexes(regex);
1203         println!("{:?}", valid_indexes);
1204         assert_eq!(valid_indexes, [0, 1, 2, 3]);
1205     }
1206 
1207     #[test]
can_get_valid_captures_from_nested_regex()1208     fn can_get_valid_captures_from_nested_regex() {
1209         let regex = "hello(test)(?=(world(?!(te(?<=(st))))))(foo(bar))";
1210         println!("{:?}", regex);
1211         let valid_indexes = get_consuming_capture_indexes(regex);
1212         println!("{:?}", valid_indexes);
1213         assert_eq!(valid_indexes, [0, 1, 5, 6]);
1214     }
1215 }
1216