1 use super::syntax_definition::*;
2 use super::scope::*;
3 
4 #[cfg(feature = "metadata")]
5 use super::metadata::{LoadMetadata, Metadata, RawMetadataEntry};
6 
7 #[cfg(feature = "yaml-load")]
8 use super::super::LoadingError;
9 
10 use std::collections::{HashMap, HashSet, BTreeSet};
11 use std::path::Path;
12 #[cfg(feature = "yaml-load")]
13 use walkdir::WalkDir;
14 #[cfg(feature = "yaml-load")]
15 use std::io::Read;
16 use std::io::{self, BufRead, BufReader};
17 use std::fs::File;
18 use std::mem;
19 
20 use lazycell::AtomicLazyCell;
21 use super::regex::Regex;
22 use crate::parsing::syntax_definition::ContextId;
23 
24 /// A syntax set holds multiple syntaxes that have been linked together.
25 ///
26 /// Use a [`SyntaxSetBuilder`] to load syntax definitions and build a syntax set.
27 ///
28 /// After building, the syntax set is immutable and can no longer be modified, but you can convert
29 /// it back into a builder by using the [`into_builder`] method.
30 ///
31 /// [`SyntaxSetBuilder`]: struct.SyntaxSetBuilder.html
32 /// [`into_builder`]: #method.into_builder
33 #[derive(Debug, Serialize, Deserialize)]
34 pub struct SyntaxSet {
35     syntaxes: Vec<SyntaxReference>,
36     contexts: Vec<Context>,
37     /// Stores the syntax index for every path that was loaded
38     path_syntaxes: Vec<(String, usize)>,
39 
40     #[serde(skip_serializing, skip_deserializing, default = "AtomicLazyCell::new")]
41     first_line_cache: AtomicLazyCell<FirstLineCache>,
42     /// Metadata, e.g. indent and commenting information.
43     ///
44     /// NOTE: if serializing, you should handle metadata manually; that is, you should serialize and
45     /// deserialize it separately. See `examples/gendata.rs` for an example.
46     #[cfg(feature = "metadata")]
47     #[serde(skip, default)]
48     pub(crate) metadata: Metadata,
49 }
50 
51 /// A linked version of a [`SyntaxDefinition`] that is only useful as part of the
52 /// [`SyntaxSet`] that contains it. See docs for [`SyntaxSetBuilder::build`] for
53 /// more info.
54 #[derive(Clone, Debug, Serialize, Deserialize)]
55 pub struct SyntaxReference {
56     pub name: String,
57     pub file_extensions: Vec<String>,
58     pub scope: Scope,
59     pub first_line_match: Option<String>,
60     pub hidden: bool,
61     #[serde(serialize_with = "ordered_map")]
62     pub variables: HashMap<String, String>,
63     #[serde(serialize_with = "ordered_map")]
64     pub(crate) contexts: HashMap<String, ContextId>,
65 }
66 
67 /// A syntax set builder is used for loading syntax definitions from the file
68 /// system or by adding [`SyntaxDefinition`] objects.
69 ///
70 /// Once all the syntaxes have been added, call [`build`] to turn the builder into
71 /// a [`SyntaxSet`] that can be used for parsing or highlighting.
72 ///
73 /// [`SyntaxDefinition`]: syntax_definition/struct.SyntaxDefinition.html
74 /// [`build`]: #method.build
75 /// [`SyntaxSet`]: struct.SyntaxSet.html
76 #[derive(Clone, Default)]
77 pub struct SyntaxSetBuilder {
78     syntaxes: Vec<SyntaxDefinition>,
79     path_syntaxes: Vec<(String, usize)>,
80     #[cfg(feature = "metadata")]
81     raw_metadata: LoadMetadata,
82 
83     /// If this `SyntaxSetBuilder` is created with `SyntaxSet::into_builder`
84     /// from a `SyntaxSet` that already had metadata, we keep that metadata,
85     /// merging it with newly loaded metadata.
86     #[cfg(feature = "metadata")]
87     existing_metadata: Option<Metadata>,
88 }
89 
90 #[cfg(feature = "yaml-load")]
load_syntax_file(p: &Path, lines_include_newline: bool) -> Result<SyntaxDefinition, LoadingError>91 fn load_syntax_file(p: &Path,
92                     lines_include_newline: bool)
93                     -> Result<SyntaxDefinition, LoadingError> {
94     let mut f = File::open(p)?;
95     let mut s = String::new();
96     f.read_to_string(&mut s)?;
97 
98     Ok(
99         SyntaxDefinition::load_from_str(
100             &s,
101             lines_include_newline,
102             p.file_stem().and_then(|x| x.to_str())
103         ).map_err(|e| LoadingError::ParseSyntax(e, Some(format!("{}", p.display()))))?
104     )
105 }
106 
107 impl Clone for SyntaxSet {
clone(&self) -> SyntaxSet108     fn clone(&self) -> SyntaxSet {
109         SyntaxSet {
110             syntaxes: self.syntaxes.clone(),
111             contexts: self.contexts.clone(),
112             path_syntaxes: self.path_syntaxes.clone(),
113             // Will need to be re-initialized
114             first_line_cache: AtomicLazyCell::new(),
115             #[cfg(feature = "metadata")]
116             metadata: self.metadata.clone(),
117         }
118     }
119 }
120 
121 impl Default for SyntaxSet {
default() -> Self122     fn default() -> Self {
123         SyntaxSet {
124             syntaxes: Vec::new(),
125             contexts: Vec::new(),
126             path_syntaxes: Vec::new(),
127             first_line_cache: AtomicLazyCell::new(),
128             #[cfg(feature = "metadata")]
129             metadata: Metadata::default(),
130         }
131     }
132 }
133 
134 
135 impl SyntaxSet {
new() -> SyntaxSet136     pub fn new() -> SyntaxSet {
137         SyntaxSet::default()
138     }
139 
140     /// Convenience constructor for creating a builder, then loading syntax
141     /// definitions from a folder and then building the syntax set.
142     ///
143     /// Note that this uses `lines_include_newline` set to `false`, see the
144     /// [`add_from_folder`] method docs on [`SyntaxSetBuilder`] for an explanation
145     /// as to why this might not be the best.
146     ///
147     /// [`add_from_folder`]: struct.SyntaxSetBuilder.html#method.add_from_folder
148     /// [`SyntaxSetBuilder`]: struct.SyntaxSetBuilder.html
149     #[cfg(feature = "yaml-load")]
load_from_folder<P: AsRef<Path>>(folder: P) -> Result<SyntaxSet, LoadingError>150     pub fn load_from_folder<P: AsRef<Path>>(folder: P) -> Result<SyntaxSet, LoadingError> {
151         let mut builder = SyntaxSetBuilder::new();
152         builder.add_from_folder(folder, false)?;
153         Ok(builder.build())
154     }
155 
156     /// The list of syntaxes in the set
syntaxes(&self) -> &[SyntaxReference]157     pub fn syntaxes(&self) -> &[SyntaxReference] {
158         &self.syntaxes[..]
159     }
160 
161     #[cfg(feature = "metadata")]
set_metadata(&mut self, metadata: Metadata)162     pub fn set_metadata(&mut self, metadata: Metadata) {
163         self.metadata = metadata;
164     }
165 
166     /// The loaded metadata for this set.
167     #[cfg(feature = "metadata")]
metadata(&self) -> &Metadata168     pub fn metadata(&self) -> &Metadata {
169         &self.metadata
170     }
171 
172     /// Finds a syntax by its default scope, for example `source.regexp` finds the regex syntax.
173     ///
174     /// This and all similar methods below do a linear search of syntaxes, this should be fast
175     /// because there aren't many syntaxes, but don't think you can call it a bajillion times per
176     /// second.
find_syntax_by_scope(&self, scope: Scope) -> Option<&SyntaxReference>177     pub fn find_syntax_by_scope(&self, scope: Scope) -> Option<&SyntaxReference> {
178         self.syntaxes.iter().rev().find(|&s| s.scope == scope)
179     }
180 
find_syntax_by_name<'a>(&'a self, name: &str) -> Option<&'a SyntaxReference>181     pub fn find_syntax_by_name<'a>(&'a self, name: &str) -> Option<&'a SyntaxReference> {
182         self.syntaxes.iter().rev().find(|&s| name == s.name)
183     }
184 
find_syntax_by_extension<'a>(&'a self, extension: &str) -> Option<&'a SyntaxReference>185     pub fn find_syntax_by_extension<'a>(&'a self, extension: &str) -> Option<&'a SyntaxReference> {
186         self.syntaxes.iter().rev().find(|&s| s.file_extensions.iter().any(|e| e == extension))
187     }
188 
189     /// Searches for a syntax first by extension and then by case-insensitive name
190     ///
191     /// This is useful for things like Github-flavoured-markdown code block highlighting where all
192     /// you have to go on is a short token given by the user
find_syntax_by_token<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference>193     pub fn find_syntax_by_token<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference> {
194         {
195             let ext_res = self.find_syntax_by_extension(s);
196             if ext_res.is_some() {
197                 return ext_res;
198             }
199         }
200         self.syntaxes.iter().rev().find(|&syntax| syntax.name.eq_ignore_ascii_case(s))
201     }
202 
203     /// Try to find the syntax for a file based on its first line
204     ///
205     /// This uses regexes that come with some sublime syntax grammars for matching things like
206     /// shebangs and mode lines like `-*- Mode: C -*-`
find_syntax_by_first_line<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference>207     pub fn find_syntax_by_first_line<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference> {
208         let cache = self.first_line_cache();
209         for &(ref reg, i) in cache.regexes.iter().rev() {
210             if reg.search(s, 0, s.len(), None) {
211                 return Some(&self.syntaxes[i]);
212             }
213         }
214         None
215     }
216 
217     /// Searches for a syntax by it's original file path when it was first loaded from disk
218     ///
219     /// This is primarily useful for syntax tests. Some may specify a
220     /// `Packages/PackageName/SyntaxName.sublime-syntax` path, and others may just have
221     /// `SyntaxName.sublime-syntax`. This caters for these by matching the end of the path of the
222     /// loaded syntax definition files
223     // however, if a syntax name is provided without a folder, make sure we don't accidentally match the end of a different syntax definition's name - by checking a / comes before it or it is the full path
find_syntax_by_path<'a>(&'a self, path: &str) -> Option<&'a SyntaxReference>224     pub fn find_syntax_by_path<'a>(&'a self, path: &str) -> Option<&'a SyntaxReference> {
225         let mut slash_path = "/".to_string();
226         slash_path.push_str(&path);
227         self.path_syntaxes.iter().rev().find(|t| t.0.ends_with(&slash_path) || t.0 == path).map(|&(_,i)| &self.syntaxes[i])
228     }
229 
230     /// Convenience method that tries to find the syntax for a file path, first by extension/name
231     /// and then by first line of the file if that doesn't work.
232     ///
233     /// May IO Error because it sometimes tries to read the first line of the file.
234     ///
235     /// # Examples
236     ///
237     /// When determining how to highlight a file, use this in combination with a fallback to plain
238     /// text:
239     ///
240     /// ```
241     /// use syntect::parsing::SyntaxSet;
242     /// let ss = SyntaxSet::load_defaults_newlines();
243     /// let syntax = ss.find_syntax_for_file("testdata/highlight_test.erb")
244     ///     .unwrap() // for IO errors, you may want to use try!() or another plain text fallback
245     ///     .unwrap_or_else(|| ss.find_syntax_plain_text());
246     /// assert_eq!(syntax.name, "HTML (Rails)");
247     /// ```
find_syntax_for_file<P: AsRef<Path>>(&self, path_obj: P) -> io::Result<Option<&SyntaxReference>>248     pub fn find_syntax_for_file<P: AsRef<Path>>(&self,
249                                                 path_obj: P)
250                                                 -> io::Result<Option<&SyntaxReference>> {
251         let path: &Path = path_obj.as_ref();
252         let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
253         let extension = path.extension().and_then(|x| x.to_str()).unwrap_or("");
254         let ext_syntax = self.find_syntax_by_extension(file_name).or_else(
255                             || self.find_syntax_by_extension(extension));
256         let line_syntax = if ext_syntax.is_none() {
257             let mut line = String::new();
258             let f = File::open(path)?;
259             let mut line_reader = BufReader::new(&f);
260             line_reader.read_line(&mut line)?;
261             self.find_syntax_by_first_line(&line)
262         } else {
263             None
264         };
265         let syntax = ext_syntax.or(line_syntax);
266         Ok(syntax)
267     }
268 
269     /// Finds a syntax for plain text, which usually has no highlighting rules.
270     ///
271     /// This is good as a fallback when you can't find another syntax but you still want to use the
272     /// same highlighting pipeline code.
273     ///
274     /// This syntax should always be present, if not this method will panic. If the way you load
275     /// syntaxes doesn't create one, use [`add_plain_text_syntax`].
276     ///
277     /// # Examples
278     /// ```
279     /// use syntect::parsing::SyntaxSetBuilder;
280     /// let mut builder = SyntaxSetBuilder::new();
281     /// builder.add_plain_text_syntax();
282     /// let ss = builder.build();
283     /// let syntax = ss.find_syntax_by_token("rs").unwrap_or_else(|| ss.find_syntax_plain_text());
284     /// assert_eq!(syntax.name, "Plain Text");
285     /// ```
286     ///
287     /// [`add_plain_text_syntax`]: struct.SyntaxSetBuilder.html#method.add_plain_text_syntax
find_syntax_plain_text(&self) -> &SyntaxReference288     pub fn find_syntax_plain_text(&self) -> &SyntaxReference {
289         self.find_syntax_by_name("Plain Text")
290             .expect("All syntax sets ought to have a plain text syntax")
291     }
292 
293     /// Converts this syntax set into a builder so that more syntaxes can be
294     /// added to it.
295     ///
296     /// Note that newly added syntaxes can have references to existing syntaxes
297     /// in the set, but not the other way around.
into_builder(self) -> SyntaxSetBuilder298     pub fn into_builder(self) -> SyntaxSetBuilder {
299         #[cfg(feature = "metadata")]
300         let SyntaxSet { syntaxes, contexts, path_syntaxes, metadata, .. } = self;
301         #[cfg(not(feature = "metadata"))]
302         let SyntaxSet { syntaxes, contexts, path_syntaxes, .. } = self;
303 
304         let mut context_map = HashMap::with_capacity(contexts.len());
305         for (i, context) in contexts.into_iter().enumerate() {
306             context_map.insert(i, context);
307         }
308 
309         let mut builder_syntaxes = Vec::with_capacity(syntaxes.len());
310 
311         for syntax in syntaxes {
312             let SyntaxReference {
313                 name,
314                 file_extensions,
315                 scope,
316                 first_line_match,
317                 hidden,
318                 variables,
319                 contexts,
320             } = syntax;
321 
322             let mut builder_contexts = HashMap::with_capacity(contexts.len());
323             for (name, context_id) in contexts {
324                 if let Some(context) = context_map.remove(&context_id.index()) {
325                     builder_contexts.insert(name, context);
326                 }
327             }
328 
329             let syntax_definition = SyntaxDefinition {
330                 name,
331                 file_extensions,
332                 scope,
333                 first_line_match,
334                 hidden,
335                 variables,
336                 contexts: builder_contexts,
337             };
338             builder_syntaxes.push(syntax_definition);
339         }
340 
341         SyntaxSetBuilder {
342             syntaxes: builder_syntaxes,
343             path_syntaxes,
344             #[cfg(feature = "metadata")]
345             existing_metadata: Some(metadata),
346             #[cfg(feature = "metadata")]
347             raw_metadata: LoadMetadata::default(),
348         }
349     }
350 
351     #[inline(always)]
get_context(&self, context_id: &ContextId) -> &Context352     pub(crate) fn get_context(&self, context_id: &ContextId) -> &Context {
353         &self.contexts[context_id.index()]
354     }
355 
first_line_cache(&self) -> &FirstLineCache356     fn first_line_cache(&self) -> &FirstLineCache {
357         if let Some(cache) = self.first_line_cache.borrow() {
358             cache
359         } else {
360             let cache = FirstLineCache::new(self.syntaxes());
361             self.first_line_cache.fill(cache).ok();
362             self.first_line_cache.borrow().unwrap()
363         }
364     }
365 
find_unlinked_contexts(&self) -> BTreeSet<String>366     pub fn find_unlinked_contexts(&self) -> BTreeSet<String> {
367         let SyntaxSet { syntaxes, contexts, .. } = self;
368 
369         let mut context_map = HashMap::with_capacity(contexts.len());
370         for (i, context) in contexts.into_iter().enumerate() {
371             context_map.insert(i, context);
372         }
373 
374         let mut unlinked_contexts = BTreeSet::new();
375 
376         for syntax in syntaxes {
377             let SyntaxReference {
378                 name,
379                 scope,
380                 contexts,
381                 ..
382             } = syntax;
383 
384             for (_, context_id) in contexts {
385                 if let Some(context) = context_map.remove(&context_id.index()) {
386                     for pattern in context.patterns.iter() {
387                         let maybe_refs_to_check = match pattern {
388                             Pattern::Match(match_pat) => {
389                                 match &match_pat.operation {
390                                     MatchOperation::Push(context_refs) => {
391                                         Some(context_refs)
392                                     },
393                                     MatchOperation::Set(context_refs) => {
394                                         Some(context_refs)
395                                     },
396                                     _ => None,
397                                 }
398                             },
399                             _ => None,
400                         };
401 
402                         for context_ref in maybe_refs_to_check.into_iter().flatten() {
403                             match context_ref {
404                                 ContextReference::Direct(_) => {},
405                                 _ => {
406                                     unlinked_contexts.insert(
407                                         format!(
408                                             "Syntax '{}' with scope '{}' has unresolved context reference {:?}",
409                                             name, scope, &context_ref
410                                         )
411                                     );
412                                 },
413                             }
414                         }
415                     }
416                 }
417             }
418         }
419         unlinked_contexts
420     }
421 }
422 
423 
424 impl SyntaxSetBuilder {
new() -> SyntaxSetBuilder425     pub fn new() -> SyntaxSetBuilder {
426         SyntaxSetBuilder::default()
427     }
428 
429     /// Add a syntax to the set.
add(&mut self, syntax: SyntaxDefinition)430     pub fn add(&mut self, syntax: SyntaxDefinition) {
431         self.syntaxes.push(syntax);
432     }
433 
434     /// The list of syntaxes added so far.
syntaxes(&self) -> &[SyntaxDefinition]435     pub fn syntaxes(&self) -> &[SyntaxDefinition] {
436         &self.syntaxes[..]
437     }
438 
439     /// A rarely useful method that loads in a syntax with no highlighting rules for plain text
440     ///
441     /// Exists mainly for adding the plain text syntax to syntax set dumps, because for some reason
442     /// the default Sublime plain text syntax is still in `.tmLanguage` format.
443     #[cfg(feature = "yaml-load")]
add_plain_text_syntax(&mut self)444     pub fn add_plain_text_syntax(&mut self) {
445         let s = "---\nname: Plain Text\nfile_extensions: [txt]\nscope: text.plain\ncontexts: \
446                  {main: []}";
447         let syn = SyntaxDefinition::load_from_str(s, false, None).unwrap();
448         self.syntaxes.push(syn);
449     }
450 
451     /// Loads all the `.sublime-syntax` files in a folder into this builder.
452     ///
453     /// The `lines_include_newline` parameter is used to work around the fact that Sublime Text
454     /// normally passes line strings including newline characters (`\n`) to its regex engine. This
455     /// results in many syntaxes having regexes matching `\n`, which doesn't work if you don't pass
456     /// in newlines. It is recommended that if you can you pass in lines with newlines if you can
457     /// and pass `true` for this parameter. If that is inconvenient pass `false` and the loader
458     /// will do some hacky find and replaces on the match regexes that seem to work for the default
459     /// syntax set, but may not work for any other syntaxes.
460     ///
461     /// In the future I might include a "slow mode" that copies the lines passed in and appends a
462     /// newline if there isn't one, but in the interest of performance currently this hacky fix will
463     /// have to do.
464     #[cfg(feature = "yaml-load")]
add_from_folder<P: AsRef<Path>>( &mut self, folder: P, lines_include_newline: bool ) -> Result<(), LoadingError>465     pub fn add_from_folder<P: AsRef<Path>>(
466         &mut self,
467         folder: P,
468         lines_include_newline: bool
469     ) -> Result<(), LoadingError> {
470         for entry in WalkDir::new(folder).sort_by(|a, b| a.file_name().cmp(b.file_name())) {
471             let entry = entry.map_err(LoadingError::WalkDir)?;
472             if entry.path().extension().map_or(false, |e| e == "sublime-syntax") {
473                 let syntax = load_syntax_file(entry.path(), lines_include_newline)?;
474                 if let Some(path_str) = entry.path().to_str() {
475                     // Split the path up and rejoin with slashes so that syntaxes loaded on Windows
476                     // can still be loaded the same way.
477                     let path = Path::new(path_str);
478                     let path_parts: Vec<_> = path.iter().map(|c| c.to_str().unwrap()).collect();
479                     self.path_syntaxes.push((path_parts.join("/").to_string(), self.syntaxes.len()));
480                 }
481                 self.syntaxes.push(syntax);
482             }
483 
484             #[cfg(feature = "metadata")]
485             {
486                 if entry.path().extension() == Some("tmPreferences".as_ref()) {
487                     match RawMetadataEntry::load(entry.path()) {
488                         Ok(meta) => self.raw_metadata.add_raw(meta),
489                         Err(_err) => (),
490                     }
491                 }
492             }
493         }
494 
495         Ok(())
496     }
497 
498     /// Build a [`SyntaxSet`] from the syntaxes that have been added to this
499     /// builder.
500     ///
501     /// ### Linking
502     ///
503     /// The contexts in syntaxes can reference other contexts in the same syntax
504     /// or even other syntaxes. For example, a HTML syntax can reference a CSS
505     /// syntax so that CSS blocks in HTML work as expected.
506     ///
507     /// Those references work in various ways and involve one or two lookups.
508     /// To avoid having to do these lookups during parsing/highlighting, the
509     /// references are changed to directly reference contexts via index. That's
510     /// called linking.
511     ///
512     /// Linking is done in this build step. So in order to get the best
513     /// performance, you should try to avoid calling this too much. Ideally,
514     /// create a [`SyntaxSet`] once and then use it many times. If you can,
515     /// serialize a [`SyntaxSet`] for your program and when you run the program,
516     /// directly load the [`SyntaxSet`].
517     ///
518     /// [`SyntaxSet`]: struct.SyntaxSet.html
build(self) -> SyntaxSet519     pub fn build(self) -> SyntaxSet {
520 
521         #[cfg(not(feature = "metadata"))]
522         let SyntaxSetBuilder { syntaxes: syntax_definitions, path_syntaxes } = self;
523         #[cfg(feature = "metadata")]
524         let SyntaxSetBuilder {
525             syntaxes: syntax_definitions,
526             path_syntaxes,
527             raw_metadata,
528             existing_metadata,
529         } = self;
530 
531         let mut syntaxes = Vec::with_capacity(syntax_definitions.len());
532         let mut all_contexts = Vec::new();
533 
534         for syntax_definition in syntax_definitions {
535             let SyntaxDefinition {
536                 name,
537                 file_extensions,
538                 scope,
539                 first_line_match,
540                 hidden,
541                 variables,
542                 contexts,
543             } = syntax_definition;
544 
545             let mut map = HashMap::new();
546 
547             let mut contexts: Vec<(String, Context)> = contexts.into_iter().collect();
548             // Sort the values of the HashMap so that the contexts in the
549             // resulting SyntaxSet have a deterministic order for serializing.
550             // Because we're sorting by the keys which are unique, we can use
551             // an unstable sort.
552             contexts.sort_unstable_by(|(name_a, _), (name_b, _)| name_a.cmp(&name_b));
553             for (name, context) in contexts {
554                 let index = all_contexts.len();
555                 map.insert(name, ContextId::new(index));
556                 all_contexts.push(context);
557             }
558 
559             let syntax = SyntaxReference {
560                 name,
561                 file_extensions,
562                 scope,
563                 first_line_match,
564                 hidden,
565                 variables,
566                 contexts: map,
567             };
568             syntaxes.push(syntax);
569         }
570 
571         let mut found_more_backref_includes = true;
572         for syntax in &syntaxes {
573             let mut no_prototype = HashSet::new();
574             let prototype = syntax.contexts.get("prototype");
575             if let Some(prototype_id) = prototype {
576                 // TODO: We could do this after parsing YAML, instead of here?
577                 Self::recursively_mark_no_prototype(syntax, prototype_id.index(), &all_contexts, &mut no_prototype);
578             }
579 
580             for context_id in syntax.contexts.values() {
581                 let index = context_id.index();
582                 let mut context = &mut all_contexts[index];
583                 if let Some(prototype_id) = prototype {
584                     if context.meta_include_prototype && !no_prototype.contains(&index) {
585                         context.prototype = Some(*prototype_id);
586                     }
587                 }
588                 Self::link_context(&mut context, syntax, &syntaxes);
589 
590                 if context.uses_backrefs {
591                     found_more_backref_includes = true;
592                 }
593             }
594         }
595 
596         // We need to recursively mark contexts that include contexts which
597         // use backreferences as using backreferences. In theory we could use
598         // a more efficient method here like doing a toposort or constructing
599         // a representation with reversed edges and then tracing in the
600         // opposite direction, but I benchmarked this and it adds <2% to link
601         // time on the default syntax set, and linking doesn't even happen
602         // when loading from a binary dump.
603         while found_more_backref_includes {
604             found_more_backref_includes = false;
605             // find any contexts which include a context which uses backrefs
606             // and mark those as using backrefs - to support nested includes
607             for context_index in 0..all_contexts.len() {
608                 let context = &all_contexts[context_index];
609                 if !context.uses_backrefs && context.patterns.iter().any(|pattern| {
610                     match pattern {
611                         Pattern::Include(ContextReference::Direct(id))
612                             if all_contexts[id.index()].uses_backrefs => true,
613                         _ => false,
614                     }
615                 }) {
616                     let mut context = &mut all_contexts[context_index];
617                     context.uses_backrefs = true;
618                     // look for contexts including this context
619                     found_more_backref_includes = true;
620                 }
621             }
622         }
623 
624         #[cfg(feature = "metadata")]
625         let metadata = match existing_metadata {
626             Some(existing) => existing.merged_with_raw(raw_metadata),
627             None => raw_metadata.into(),
628         };
629 
630         SyntaxSet {
631             syntaxes,
632             contexts: all_contexts,
633             path_syntaxes,
634             first_line_cache: AtomicLazyCell::new(),
635             #[cfg(feature = "metadata")]
636             metadata,
637         }
638     }
639 
640     /// Anything recursively included by the prototype shouldn't include the prototype.
641     /// This marks them as such.
recursively_mark_no_prototype( syntax: &SyntaxReference, context_id: usize, contexts: &[Context], no_prototype: &mut HashSet<usize>, )642     fn recursively_mark_no_prototype(
643         syntax: &SyntaxReference,
644         context_id: usize,
645         contexts: &[Context],
646         no_prototype: &mut HashSet<usize>,
647     ) {
648         let first_time = no_prototype.insert(context_id);
649         if !first_time {
650             return;
651         }
652 
653         for pattern in &contexts[context_id].patterns {
654             match *pattern {
655                 // Apparently inline blocks also don't include the prototype when within the prototype.
656                 // This is really weird, but necessary to run the YAML syntax.
657                 Pattern::Match(ref match_pat) => {
658                     let maybe_context_refs = match match_pat.operation {
659                         MatchOperation::Push(ref context_refs) |
660                         MatchOperation::Set(ref context_refs) => Some(context_refs),
661                         MatchOperation::Pop | MatchOperation::None => None,
662                     };
663                     if let Some(context_refs) = maybe_context_refs {
664                         for context_ref in context_refs.iter() {
665                             match context_ref {
666                                 ContextReference::Inline(ref s) | ContextReference::Named(ref s) => {
667                                     if let Some(i) = syntax.contexts.get(s) {
668                                         Self::recursively_mark_no_prototype(syntax, i.index(), contexts, no_prototype);
669                                     }
670                                 },
671                                 ContextReference::Direct(ref id) => {
672                                     Self::recursively_mark_no_prototype(syntax, id.index(), contexts, no_prototype);
673                                 },
674                                 _ => (),
675                             }
676                         }
677                     }
678                 }
679                 Pattern::Include(ref reference) => {
680                     match reference {
681                         ContextReference::Named(ref s) => {
682                             if let Some(id) = syntax.contexts.get(s) {
683                                 Self::recursively_mark_no_prototype(syntax, id.index(), contexts, no_prototype);
684                             }
685                         },
686                         ContextReference::Direct(ref id) => {
687                             Self::recursively_mark_no_prototype(syntax, id.index(), contexts, no_prototype);
688                         },
689                         _ => (),
690                     }
691                 }
692             }
693         }
694     }
695 
link_context(context: &mut Context, syntax: &SyntaxReference, syntaxes: &[SyntaxReference])696     fn link_context(context: &mut Context, syntax: &SyntaxReference, syntaxes: &[SyntaxReference]) {
697         for pattern in &mut context.patterns {
698             match *pattern {
699                 Pattern::Match(ref mut match_pat) => Self::link_match_pat(match_pat, syntax, syntaxes),
700                 Pattern::Include(ref mut context_ref) => Self::link_ref(context_ref, syntax, syntaxes),
701             }
702         }
703     }
704 
link_ref(context_ref: &mut ContextReference, syntax: &SyntaxReference, syntaxes: &[SyntaxReference])705     fn link_ref(context_ref: &mut ContextReference, syntax: &SyntaxReference, syntaxes: &[SyntaxReference]) {
706         // println!("{:?}", context_ref);
707         use super::syntax_definition::ContextReference::*;
708         let linked_context_id = match *context_ref {
709             Named(ref s) | Inline(ref s) => {
710                 // This isn't actually correct, but it is better than nothing/crashing.
711                 // This is being phased out anyhow, see https://github.com/sublimehq/Packages/issues/73
712                 // Fixes issue #30
713                 if s == "$top_level_main" {
714                     syntax.contexts.get("main")
715                 } else {
716                     syntax.contexts.get(s)
717                 }
718             }
719             ByScope { scope, ref sub_context } => {
720                 let context_name = sub_context.as_ref().map_or("main", |x| &**x);
721                 syntaxes
722                     .iter()
723                     .rev()
724                     .find(|s| s.scope == scope)
725                     .and_then(|s| s.contexts.get(context_name))
726             }
727             File { ref name, ref sub_context } => {
728                 let context_name = sub_context.as_ref().map_or("main", |x| &**x);
729                 syntaxes
730                     .iter()
731                     .rev()
732                     .find(|s| &s.name == name)
733                     .and_then(|s| s.contexts.get(context_name))
734             }
735             Direct(_) => None,
736         };
737         if let Some(context_id) = linked_context_id {
738             let mut new_ref = Direct(*context_id);
739             mem::swap(context_ref, &mut new_ref);
740         }
741     }
742 
link_match_pat(match_pat: &mut MatchPattern, syntax: &SyntaxReference, syntaxes: &[SyntaxReference])743     fn link_match_pat(match_pat: &mut MatchPattern, syntax: &SyntaxReference, syntaxes: &[SyntaxReference]) {
744         let maybe_context_refs = match match_pat.operation {
745             MatchOperation::Push(ref mut context_refs) |
746             MatchOperation::Set(ref mut context_refs) => Some(context_refs),
747             MatchOperation::Pop | MatchOperation::None => None,
748         };
749         if let Some(context_refs) = maybe_context_refs {
750             for context_ref in context_refs.iter_mut() {
751                 Self::link_ref(context_ref, syntax, syntaxes);
752             }
753         }
754         if let Some(ref mut context_ref) = match_pat.with_prototype {
755             Self::link_ref(context_ref, syntax, syntaxes);
756         }
757     }
758 }
759 
760 #[derive(Debug)]
761 struct FirstLineCache {
762     /// (first line regex, syntax index) pairs for all syntaxes with a first line regex
763     regexes: Vec<(Regex, usize)>,
764 }
765 
766 impl FirstLineCache {
new(syntaxes: &[SyntaxReference]) -> FirstLineCache767     fn new(syntaxes: &[SyntaxReference]) -> FirstLineCache {
768         let mut regexes = Vec::new();
769         for (i, syntax) in syntaxes.iter().enumerate() {
770             if let Some(ref reg_str) = syntax.first_line_match {
771                 let reg = Regex::new(reg_str.into());
772                 regexes.push((reg, i));
773             }
774         }
775         FirstLineCache {
776             regexes,
777         }
778     }
779 }
780 
781 
782 #[cfg(feature = "yaml-load")]
783 #[cfg(test)]
784 mod tests {
785     use super::*;
786     use crate::parsing::{ParseState, Scope, syntax_definition};
787     use std::collections::HashMap;
788 
789     #[test]
can_load()790     fn can_load() {
791         let mut builder = SyntaxSetBuilder::new();
792         builder.add_from_folder("testdata/Packages", false).unwrap();
793 
794         let cmake_dummy_syntax = SyntaxDefinition {
795             name: "CMake".to_string(),
796             file_extensions: vec!["CMakeLists.txt".to_string(), "cmake".to_string()],
797             scope: Scope::new("source.cmake").unwrap(),
798             first_line_match: None,
799             hidden: false,
800             variables: HashMap::new(),
801             contexts: HashMap::new(),
802         };
803 
804         builder.add(cmake_dummy_syntax);
805         builder.add_plain_text_syntax();
806 
807         let ps = builder.build();
808 
809         assert_eq!(&ps.find_syntax_by_first_line("#!/usr/bin/env node").unwrap().name,
810                    "JavaScript");
811         let rails_scope = Scope::new("source.ruby.rails").unwrap();
812         let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
813         ps.find_syntax_plain_text();
814         assert_eq!(&ps.find_syntax_by_extension("rake").unwrap().name, "Ruby");
815         assert_eq!(&ps.find_syntax_by_token("ruby").unwrap().name, "Ruby");
816         assert_eq!(&ps.find_syntax_by_first_line("lol -*- Mode: C -*- such line").unwrap().name,
817                    "C");
818         assert_eq!(&ps.find_syntax_for_file("testdata/parser.rs").unwrap().unwrap().name,
819                    "Rust");
820         assert_eq!(&ps.find_syntax_for_file("testdata/test_first_line.test")
821                        .expect("Error finding syntax for file")
822                        .expect("No syntax found for file")
823                        .name,
824                    "Ruby");
825         assert_eq!(&ps.find_syntax_for_file(".bashrc").unwrap().unwrap().name,
826                    "Bourne Again Shell (bash)");
827         assert_eq!(&ps.find_syntax_for_file("CMakeLists.txt").unwrap().unwrap().name,
828                    "CMake");
829         assert_eq!(&ps.find_syntax_for_file("test.cmake").unwrap().unwrap().name,
830                    "CMake");
831         assert_eq!(&ps.find_syntax_for_file("Rakefile").unwrap().unwrap().name, "Ruby");
832         assert!(&ps.find_syntax_by_first_line("derp derp hi lol").is_none());
833         assert_eq!(&ps.find_syntax_by_path("Packages/Rust/Rust.sublime-syntax").unwrap().name,
834                    "Rust");
835         // println!("{:#?}", syntax);
836         assert_eq!(syntax.scope, rails_scope);
837         // assert!(false);
838         let main_context = ps.get_context(&syntax.contexts["main"]);
839         let count = syntax_definition::context_iter(&ps, main_context).count();
840         assert_eq!(count, 109);
841     }
842 
843     #[test]
can_clone()844     fn can_clone() {
845         let cloned_syntax_set = {
846             let mut builder = SyntaxSetBuilder::new();
847             builder.add(syntax_a());
848             builder.add(syntax_b());
849 
850             let syntax_set_original = builder.build();
851             syntax_set_original.clone()
852             // Note: The original syntax set is dropped
853         };
854 
855         let syntax = cloned_syntax_set.find_syntax_by_extension("a").unwrap();
856         let mut parse_state = ParseState::new(syntax);
857         let ops = parse_state.parse_line("a go_b b", &cloned_syntax_set);
858         let expected = (7, ScopeStackOp::Push(Scope::new("b").unwrap()));
859         assert_ops_contain(&ops, &expected);
860     }
861 
862     #[test]
can_list_added_syntaxes()863     fn can_list_added_syntaxes() {
864         let mut builder = SyntaxSetBuilder::new();
865         builder.add(syntax_a());
866         builder.add(syntax_b());
867         let syntaxes = builder.syntaxes();
868 
869         assert_eq!(syntaxes.len(), 2);
870         assert_eq!(syntaxes[0].name, "A");
871         assert_eq!(syntaxes[1].name, "B");
872     }
873 
874     #[test]
can_add_more_syntaxes_with_builder()875     fn can_add_more_syntaxes_with_builder() {
876         let syntax_set_original = {
877             let mut builder = SyntaxSetBuilder::new();
878             builder.add(syntax_a());
879             builder.add(syntax_b());
880             builder.build()
881         };
882 
883         let mut builder = syntax_set_original.into_builder();
884 
885         let syntax_c = SyntaxDefinition::load_from_str(r#"
886         name: C
887         scope: source.c
888         file_extensions: [c]
889         contexts:
890           main:
891             - match: 'c'
892               scope: c
893             - match: 'go_a'
894               push: scope:source.a#main
895         "#, true, None).unwrap();
896 
897         builder.add(syntax_c);
898 
899         let syntax_set = builder.build();
900 
901         let syntax = syntax_set.find_syntax_by_extension("c").unwrap();
902         let mut parse_state = ParseState::new(syntax);
903         let ops = parse_state.parse_line("c go_a a go_b b", &syntax_set);
904         let expected = (14, ScopeStackOp::Push(Scope::new("b").unwrap()));
905         assert_ops_contain(&ops, &expected);
906     }
907 
908     #[test]
can_find_unlinked_contexts()909     fn can_find_unlinked_contexts() {
910         let syntax_set = {
911             let mut builder = SyntaxSetBuilder::new();
912             builder.add(syntax_a());
913             builder.add(syntax_b());
914             builder.build()
915         };
916 
917         let unlinked_contexts = syntax_set.find_unlinked_contexts();
918         assert_eq!(unlinked_contexts.len(), 0);
919 
920         let syntax_set = {
921             let mut builder = SyntaxSetBuilder::new();
922             builder.add(syntax_a());
923             builder.build()
924         };
925 
926         let unlinked_contexts : Vec<String> = syntax_set.find_unlinked_contexts().into_iter().collect();
927         assert_eq!(unlinked_contexts.len(), 1);
928         assert_eq!(unlinked_contexts[0], "Syntax 'A' with scope 'source.a' has unresolved context reference ByScope { scope: <source.b>, sub_context: Some(\"main\") }");
929     }
930 
931     #[test]
can_use_in_multiple_threads()932     fn can_use_in_multiple_threads() {
933         use rayon::prelude::*;
934 
935         let syntax_set = {
936             let mut builder = SyntaxSetBuilder::new();
937             builder.add(syntax_a());
938             builder.add(syntax_b());
939             builder.build()
940         };
941 
942         let lines = vec![
943             "a a a",
944             "a go_b b",
945             "go_b b",
946             "go_b b  b",
947         ];
948 
949         let results: Vec<Vec<(usize, ScopeStackOp)>> = lines
950             .par_iter()
951             .map(|line| {
952                 let syntax = syntax_set.find_syntax_by_extension("a").unwrap();
953                 let mut parse_state = ParseState::new(syntax);
954                 parse_state.parse_line(line, &syntax_set)
955             })
956             .collect();
957 
958         assert_ops_contain(&results[0], &(4, ScopeStackOp::Push(Scope::new("a").unwrap())));
959         assert_ops_contain(&results[1], &(7, ScopeStackOp::Push(Scope::new("b").unwrap())));
960         assert_ops_contain(&results[2], &(5, ScopeStackOp::Push(Scope::new("b").unwrap())));
961         assert_ops_contain(&results[3], &(8, ScopeStackOp::Push(Scope::new("b").unwrap())));
962     }
963 
964     #[test]
is_sync()965     fn is_sync() {
966         check_sync::<SyntaxSet>();
967     }
968 
969     #[test]
is_send()970     fn is_send() {
971         check_send::<SyntaxSet>();
972     }
973 
974     #[test]
can_override_syntaxes()975     fn can_override_syntaxes() {
976         let syntax_set = {
977             let mut builder = SyntaxSetBuilder::new();
978             builder.add(syntax_a());
979             builder.add(syntax_b());
980 
981             let syntax_a2 = SyntaxDefinition::load_from_str(r#"
982                 name: A improved
983                 scope: source.a
984                 file_extensions: [a]
985                 first_line_match: syntax\s+a
986                 contexts:
987                   main:
988                     - match: a
989                       scope: a2
990                     - match: go_b
991                       push: scope:source.b#main
992                 "#, true, None).unwrap();
993 
994             builder.add(syntax_a2);
995 
996             let syntax_c = SyntaxDefinition::load_from_str(r#"
997                 name: C
998                 scope: source.c
999                 file_extensions: [c]
1000                 first_line_match: syntax\s+.*
1001                 contexts:
1002                   main:
1003                     - match: c
1004                       scope: c
1005                     - match: go_a
1006                       push: scope:source.a#main
1007                 "#, true, None).unwrap();
1008 
1009             builder.add(syntax_c);
1010 
1011             builder.build()
1012         };
1013 
1014         let mut syntax = syntax_set.find_syntax_by_extension("a").unwrap();
1015         assert_eq!(syntax.name, "A improved");
1016         syntax = syntax_set.find_syntax_by_scope(Scope::new(&"source.a").unwrap()).unwrap();
1017         assert_eq!(syntax.name, "A improved");
1018         syntax = syntax_set.find_syntax_by_first_line(&"syntax a").unwrap();
1019         assert_eq!(syntax.name, "C");
1020 
1021         let mut parse_state = ParseState::new(syntax);
1022         let ops = parse_state.parse_line("c go_a a", &syntax_set);
1023         let expected = (7, ScopeStackOp::Push(Scope::new("a2").unwrap()));
1024         assert_ops_contain(&ops, &expected);
1025     }
1026 
1027     #[test]
can_parse_issue219()1028     fn can_parse_issue219() {
1029         // Go to builder and back after loading so that build() gets Direct references instead of
1030         // Named ones. The bug was that Direct references were not handled when marking as
1031         // "no prototype", so prototype contexts accidentally had the prototype set, which made
1032         // the parser loop forever.
1033         let syntax_set = SyntaxSet::load_defaults_newlines().into_builder().build();
1034         let syntax = syntax_set.find_syntax_by_extension("yaml").unwrap();
1035 
1036         let mut parse_state = ParseState::new(syntax);
1037         let ops = parse_state.parse_line("# test\n", &syntax_set);
1038         let expected = (0, ScopeStackOp::Push(Scope::new("comment.line.number-sign.yaml").unwrap()));
1039         assert_ops_contain(&ops, &expected);
1040     }
1041 
1042     #[test]
no_prototype_for_contexts_included_from_prototype()1043     fn no_prototype_for_contexts_included_from_prototype() {
1044         let mut builder = SyntaxSetBuilder::new();
1045         let syntax = SyntaxDefinition::load_from_str(r#"
1046                 name: Test Prototype
1047                 scope: source.test
1048                 file_extensions: [test]
1049                 contexts:
1050                   prototype:
1051                     - include: included_from_prototype
1052                   main:
1053                     - match: main
1054                     - match: other
1055                       push: other
1056                   other:
1057                     - match: o
1058                   included_from_prototype:
1059                     - match: p
1060                       scope: p
1061                 "#, true, None).unwrap();
1062         builder.add(syntax);
1063         let ss = builder.build();
1064 
1065         // "main" and "other" should have context set, "prototype" and "included_from_prototype"
1066         // must not have a prototype set.
1067         assert_prototype_only_on(&["main", "other"], &ss, &ss.syntaxes()[0]);
1068 
1069         // Building again should have the same result. The difference is that after the first
1070         // build(), the references have been replaced with Direct references, so the code needs to
1071         // handle that correctly.
1072         let rebuilt = ss.into_builder().build();
1073         assert_prototype_only_on(&["main", "other"], &rebuilt, &rebuilt.syntaxes()[0]);
1074     }
1075 
1076     #[test]
no_prototype_for_contexts_inline_in_prototype()1077     fn no_prototype_for_contexts_inline_in_prototype() {
1078         let mut builder = SyntaxSetBuilder::new();
1079         let syntax = SyntaxDefinition::load_from_str(r#"
1080                 name: Test Prototype
1081                 scope: source.test
1082                 file_extensions: [test]
1083                 contexts:
1084                   prototype:
1085                     - match: p
1086                       push:
1087                         - match: p2
1088                   main:
1089                     - match: main
1090                 "#, true, None).unwrap();
1091         builder.add(syntax);
1092         let ss = builder.build();
1093 
1094         assert_prototype_only_on(&["main"], &ss, &ss.syntaxes()[0]);
1095 
1096         let rebuilt = ss.into_builder().build();
1097         assert_prototype_only_on(&["main"], &rebuilt, &rebuilt.syntaxes()[0]);
1098     }
1099 
assert_ops_contain( ops: &[(usize, ScopeStackOp)], expected: &(usize, ScopeStackOp) )1100     fn assert_ops_contain(
1101         ops: &[(usize, ScopeStackOp)],
1102         expected: &(usize, ScopeStackOp)
1103     ) {
1104         assert!(ops.contains(expected),
1105                 "expected operations to contain {:?}: {:?}", expected, ops);
1106     }
1107 
assert_prototype_only_on(expected: &[&str], syntax_set: &SyntaxSet, syntax: &SyntaxReference)1108     fn assert_prototype_only_on(expected: &[&str], syntax_set: &SyntaxSet, syntax: &SyntaxReference) {
1109         for (name, id) in &syntax.contexts {
1110             if name == "__main" || name == "__start" {
1111                 // Skip special contexts
1112                 continue;
1113             }
1114             let context = syntax_set.get_context(id);
1115             if expected.contains(&name.as_str()) {
1116                 assert!(context.prototype.is_some(), "Expected context {} to have prototype", name);
1117             } else {
1118                 assert!(context.prototype.is_none(), "Expected context {} to not have prototype", name);
1119             }
1120         }
1121     }
1122 
check_send<T: Send>()1123     fn check_send<T: Send>() {}
1124 
check_sync<T: Sync>()1125     fn check_sync<T: Sync>() {}
1126 
syntax_a() -> SyntaxDefinition1127     fn syntax_a() -> SyntaxDefinition {
1128         SyntaxDefinition::load_from_str(
1129             r#"
1130             name: A
1131             scope: source.a
1132             file_extensions: [a]
1133             contexts:
1134               main:
1135                 - match: 'a'
1136                   scope: a
1137                 - match: 'go_b'
1138                   push: scope:source.b#main
1139             "#,
1140             true,
1141             None,
1142         ).unwrap()
1143     }
1144 
syntax_b() -> SyntaxDefinition1145     fn syntax_b() -> SyntaxDefinition {
1146         SyntaxDefinition::load_from_str(
1147             r#"
1148             name: B
1149             scope: source.b
1150             file_extensions: [b]
1151             contexts:
1152               main:
1153                 - match: 'b'
1154                   scope: b
1155             "#,
1156             true,
1157             None,
1158         ).unwrap()
1159     }
1160 }
1161