1 use super::syntax_definition::*;
2 use super::scope::*;
3
4 #[cfg(feature = "metadata")]
5 use super::metadata::{LoadMetadata, Metadata, RawMetadataEntry};
6
7 #[cfg(feature = "yaml-load")]
8 use super::super::LoadingError;
9
10 use std::collections::{HashMap, HashSet};
11 use std::path::Path;
12 #[cfg(feature = "yaml-load")]
13 use walkdir::WalkDir;
14 #[cfg(feature = "yaml-load")]
15 use std::io::Read;
16 use std::io::{self, BufRead, BufReader};
17 use std::fs::File;
18 use std::mem;
19
20 use lazycell::AtomicLazyCell;
21 use onig::Regex;
22 use parsing::syntax_definition::ContextId;
23
24 /// A syntax set holds multiple syntaxes that have been linked together.
25 ///
26 /// Use a `SyntaxSetBuilder` to load syntax definitions and build a syntax set.
27 ///
28 /// After building, the syntax set is immutable and can no longer be modified.
29 /// But you can convert it back to a builder by using `into_builder`.
30 #[derive(Debug, Serialize, Deserialize)]
31 pub struct SyntaxSet {
32 syntaxes: Vec<SyntaxReference>,
33 contexts: Vec<Context>,
34 /// Stores the syntax index for every path that was loaded
35 path_syntaxes: Vec<(String, usize)>,
36
37 #[serde(skip_serializing, skip_deserializing, default = "AtomicLazyCell::new")]
38 first_line_cache: AtomicLazyCell<FirstLineCache>,
39 /// Metadata, e.g. indent and commenting information.
40 /// NOTE: if serializing, you should handle metadata manually; that is,
41 /// you should serialize and deserialize it separately. See
42 /// `examples/gendata.rs` for an example.
43 #[cfg(feature = "metadata")]
44 #[serde(skip, default)]
45 pub(crate) metadata: Metadata,
46 }
47
48 #[derive(Clone, Debug, Serialize, Deserialize)]
49 pub struct SyntaxReference {
50 pub name: String,
51 pub file_extensions: Vec<String>,
52 pub scope: Scope,
53 pub first_line_match: Option<String>,
54 pub hidden: bool,
55 #[serde(serialize_with = "ordered_map")]
56 pub variables: HashMap<String, String>,
57 #[serde(serialize_with = "ordered_map")]
58 pub(crate) contexts: HashMap<String, ContextId>,
59 }
60
61 /// A syntax set builder is used for loading syntax definitions from the file
62 /// system or by adding `SyntaxDefinition` objects.
63 ///
64 /// Once all the syntaxes have been added, call `build` to turn the builder into
65 /// a `SyntaxSet` that can be used for parsing or highlighting.
66 #[derive(Clone, Default)]
67 pub struct SyntaxSetBuilder {
68 syntaxes: Vec<SyntaxDefinition>,
69 path_syntaxes: Vec<(String, usize)>,
70 #[cfg(feature = "metadata")]
71 raw_metadata: LoadMetadata,
72 /// If this `SyntaxSetBuilder` is created with `SyntaxSet::into_builder`
73 /// from a `SyntaxSet` that already had metadata, we keep that metadata,
74 /// merging it with newly loaded metadata.
75 #[cfg(feature = "metadata")]
76 existing_metadata: Option<Metadata>,
77 }
78
79 #[cfg(feature = "yaml-load")]
load_syntax_file(p: &Path, lines_include_newline: bool) -> Result<SyntaxDefinition, LoadingError>80 fn load_syntax_file(p: &Path,
81 lines_include_newline: bool)
82 -> Result<SyntaxDefinition, LoadingError> {
83 let mut f = File::open(p)?;
84 let mut s = String::new();
85 f.read_to_string(&mut s)?;
86
87 Ok(
88 SyntaxDefinition::load_from_str(
89 &s,
90 lines_include_newline,
91 p.file_stem().and_then(|x| x.to_str())
92 ).map_err(|e| LoadingError::ParseSyntax(e, Some(format!("{}", p.display()))))?
93 )
94 }
95
96 impl Clone for SyntaxSet {
clone(&self) -> SyntaxSet97 fn clone(&self) -> SyntaxSet {
98 SyntaxSet {
99 syntaxes: self.syntaxes.clone(),
100 contexts: self.contexts.clone(),
101 path_syntaxes: self.path_syntaxes.clone(),
102 // Will need to be re-initialized
103 first_line_cache: AtomicLazyCell::new(),
104 #[cfg(feature = "metadata")]
105 metadata: self.metadata.clone(),
106 }
107 }
108 }
109
110 impl Default for SyntaxSet {
default() -> Self111 fn default() -> Self {
112 SyntaxSet {
113 syntaxes: Vec::new(),
114 contexts: Vec::new(),
115 path_syntaxes: Vec::new(),
116 first_line_cache: AtomicLazyCell::new(),
117 #[cfg(feature = "metadata")]
118 metadata: Metadata::default(),
119 }
120 }
121 }
122
123
124 impl SyntaxSet {
new() -> SyntaxSet125 pub fn new() -> SyntaxSet {
126 SyntaxSet::default()
127 }
128
129 /// Convenience constructor for creating a builder, then loading syntax
130 /// definitions from a folder and then building the syntax set.
131 ///
132 /// Note that this uses `lines_include_newline` set to `false`, see the
133 /// `add_from_folder` method docs on `SyntaxSetBuilder` for an explanation
134 /// as to why this might not be the best.
135 #[cfg(feature = "yaml-load")]
load_from_folder<P: AsRef<Path>>(folder: P) -> Result<SyntaxSet, LoadingError>136 pub fn load_from_folder<P: AsRef<Path>>(folder: P) -> Result<SyntaxSet, LoadingError> {
137 let mut builder = SyntaxSetBuilder::new();
138 builder.add_from_folder(folder, false)?;
139 Ok(builder.build())
140 }
141
142 /// The list of syntaxes in the set
syntaxes(&self) -> &[SyntaxReference]143 pub fn syntaxes(&self) -> &[SyntaxReference] {
144 &self.syntaxes[..]
145 }
146
147 #[cfg(feature = "metadata")]
set_metadata(&mut self, metadata: Metadata)148 pub fn set_metadata(&mut self, metadata: Metadata) {
149 self.metadata = metadata;
150 }
151
152 /// The loaded metadata for this set.
153 #[cfg(feature = "metadata")]
metadata(&self) -> &Metadata154 pub fn metadata(&self) -> &Metadata {
155 &self.metadata
156 }
157
158 /// Finds a syntax by its default scope, for example `source.regexp` finds the regex syntax.
159 /// This and all similar methods below do a linear search of syntaxes, this should be fast
160 /// because there aren't many syntaxes, but don't think you can call it a bajillion times per second.
find_syntax_by_scope(&self, scope: Scope) -> Option<&SyntaxReference>161 pub fn find_syntax_by_scope(&self, scope: Scope) -> Option<&SyntaxReference> {
162 self.syntaxes.iter().rev().find(|&s| s.scope == scope)
163 }
164
find_syntax_by_name<'a>(&'a self, name: &str) -> Option<&'a SyntaxReference>165 pub fn find_syntax_by_name<'a>(&'a self, name: &str) -> Option<&'a SyntaxReference> {
166 self.syntaxes.iter().rev().find(|&s| name == s.name)
167 }
168
find_syntax_by_extension<'a>(&'a self, extension: &str) -> Option<&'a SyntaxReference>169 pub fn find_syntax_by_extension<'a>(&'a self, extension: &str) -> Option<&'a SyntaxReference> {
170 self.syntaxes.iter().rev().find(|&s| s.file_extensions.iter().any(|e| e == extension))
171 }
172
173 /// Searches for a syntax first by extension and then by case-insensitive name
174 /// useful for things like Github-flavoured-markdown code block highlighting where
175 /// all you have to go on is a short token given by the user
find_syntax_by_token<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference>176 pub fn find_syntax_by_token<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference> {
177 {
178 let ext_res = self.find_syntax_by_extension(s);
179 if ext_res.is_some() {
180 return ext_res;
181 }
182 }
183 self.syntaxes.iter().rev().find(|&syntax| syntax.name.eq_ignore_ascii_case(s))
184 }
185
186 /// Try to find the syntax for a file based on its first line.
187 /// This uses regexes that come with some sublime syntax grammars
188 /// for matching things like shebangs and mode lines like `-*- Mode: C -*-`
find_syntax_by_first_line<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference>189 pub fn find_syntax_by_first_line<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference> {
190 let cache = self.first_line_cache();
191 for &(ref reg, i) in cache.regexes.iter().rev() {
192 if reg.find(s).is_some() {
193 return Some(&self.syntaxes[i]);
194 }
195 }
196 None
197 }
198
199 /// Searches for a syntax by it's original file path when it was first loaded from disk
200 /// primarily useful for syntax tests
201 /// some may specify a Packages/PackageName/SyntaxName.sublime-syntax path
202 /// others may just have SyntaxName.sublime-syntax
203 /// this caters for these by matching the end of the path of the loaded syntax definition files
204 // however, if a syntax name is provided without a folder, make sure we don't accidentally match the end of a different syntax definition's name - by checking a / comes before it or it is the full path
find_syntax_by_path<'a>(&'a self, path: &str) -> Option<&'a SyntaxReference>205 pub fn find_syntax_by_path<'a>(&'a self, path: &str) -> Option<&'a SyntaxReference> {
206 let mut slash_path = "/".to_string();
207 slash_path.push_str(&path);
208 self.path_syntaxes.iter().rev().find(|t| t.0.ends_with(&slash_path) || t.0 == path).map(|&(_,i)| &self.syntaxes[i])
209 }
210
211 /// Convenience method that tries to find the syntax for a file path,
212 /// first by extension/name and then by first line of the file if that doesn't work.
213 /// May IO Error because it sometimes tries to read the first line of the file.
214 ///
215 /// # Examples
216 /// When determining how to highlight a file, use this in combination with a fallback to plain text:
217 ///
218 /// ```
219 /// use syntect::parsing::SyntaxSet;
220 /// let ss = SyntaxSet::load_defaults_newlines();
221 /// let syntax = ss.find_syntax_for_file("testdata/highlight_test.erb")
222 /// .unwrap() // for IO errors, you may want to use try!() or another plain text fallback
223 /// .unwrap_or_else(|| ss.find_syntax_plain_text());
224 /// assert_eq!(syntax.name, "HTML (Rails)");
225 /// ```
find_syntax_for_file<P: AsRef<Path>>(&self, path_obj: P) -> io::Result<Option<&SyntaxReference>>226 pub fn find_syntax_for_file<P: AsRef<Path>>(&self,
227 path_obj: P)
228 -> io::Result<Option<&SyntaxReference>> {
229 let path: &Path = path_obj.as_ref();
230 let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
231 let extension = path.extension().and_then(|x| x.to_str()).unwrap_or("");
232 let ext_syntax = self.find_syntax_by_extension(file_name).or_else(
233 || self.find_syntax_by_extension(extension));
234 let line_syntax = if ext_syntax.is_none() {
235 let mut line = String::new();
236 let f = File::open(path)?;
237 let mut line_reader = BufReader::new(&f);
238 line_reader.read_line(&mut line)?;
239 self.find_syntax_by_first_line(&line)
240 } else {
241 None
242 };
243 let syntax = ext_syntax.or(line_syntax);
244 Ok(syntax)
245 }
246
247 /// Finds a syntax for plain text, which usually has no highlighting rules.
248 /// Good as a fallback when you can't find another syntax but you still want
249 /// to use the same highlighting pipeline code.
250 ///
251 /// This syntax should always be present, if not this method will panic.
252 /// If the way you load syntaxes doesn't create one, use `add_plain_text_syntax`.
253 ///
254 /// # Examples
255 /// ```
256 /// use syntect::parsing::SyntaxSetBuilder;
257 /// let mut builder = SyntaxSetBuilder::new();
258 /// builder.add_plain_text_syntax();
259 /// let ss = builder.build();
260 /// let syntax = ss.find_syntax_by_token("rs").unwrap_or_else(|| ss.find_syntax_plain_text());
261 /// assert_eq!(syntax.name, "Plain Text");
262 /// ```
find_syntax_plain_text(&self) -> &SyntaxReference263 pub fn find_syntax_plain_text(&self) -> &SyntaxReference {
264 self.find_syntax_by_name("Plain Text")
265 .expect("All syntax sets ought to have a plain text syntax")
266 }
267
268 /// Converts this syntax set into a builder so that more syntaxes can be
269 /// added to it.
270 ///
271 /// Note that newly added syntaxes can have references to existing syntaxes
272 /// in the set, but not the other way around.
into_builder(self) -> SyntaxSetBuilder273 pub fn into_builder(self) -> SyntaxSetBuilder {
274 #[cfg(feature = "metadata")]
275 let SyntaxSet { syntaxes, contexts, path_syntaxes, metadata, .. } = self;
276 #[cfg(not(feature = "metadata"))]
277 let SyntaxSet { syntaxes, contexts, path_syntaxes, .. } = self;
278
279 let mut context_map = HashMap::with_capacity(contexts.len());
280 for (i, context) in contexts.into_iter().enumerate() {
281 context_map.insert(i, context);
282 }
283
284 let mut builder_syntaxes = Vec::with_capacity(syntaxes.len());
285
286 for syntax in syntaxes {
287 let SyntaxReference {
288 name,
289 file_extensions,
290 scope,
291 first_line_match,
292 hidden,
293 variables,
294 contexts,
295 } = syntax;
296
297 let mut builder_contexts = HashMap::with_capacity(contexts.len());
298 for (name, context_id) in contexts {
299 if let Some(context) = context_map.remove(&context_id.index()) {
300 builder_contexts.insert(name, context);
301 }
302 }
303
304 let syntax_definition = SyntaxDefinition {
305 name,
306 file_extensions,
307 scope,
308 first_line_match,
309 hidden,
310 variables,
311 contexts: builder_contexts,
312 };
313 builder_syntaxes.push(syntax_definition);
314 }
315
316 SyntaxSetBuilder {
317 syntaxes: builder_syntaxes,
318 path_syntaxes,
319 #[cfg(feature = "metadata")]
320 existing_metadata: Some(metadata),
321 #[cfg(feature = "metadata")]
322 raw_metadata: LoadMetadata::default(),
323 }
324 }
325
326 #[inline(always)]
get_context(&self, context_id: &ContextId) -> &Context327 pub(crate) fn get_context(&self, context_id: &ContextId) -> &Context {
328 &self.contexts[context_id.index()]
329 }
330
first_line_cache(&self) -> &FirstLineCache331 fn first_line_cache(&self) -> &FirstLineCache {
332 if let Some(cache) = self.first_line_cache.borrow() {
333 cache
334 } else {
335 let cache = FirstLineCache::new(self.syntaxes());
336 self.first_line_cache.fill(cache).ok();
337 self.first_line_cache.borrow().unwrap()
338 }
339 }
340 }
341
342
343 impl SyntaxSetBuilder {
new() -> SyntaxSetBuilder344 pub fn new() -> SyntaxSetBuilder {
345 SyntaxSetBuilder::default()
346 }
347
348 /// Add a syntax to the set.
add(&mut self, syntax: SyntaxDefinition)349 pub fn add(&mut self, syntax: SyntaxDefinition) {
350 self.syntaxes.push(syntax);
351 }
352
353 /// Rarely useful method that loads in a syntax with no highlighting rules for plain text.
354 /// Exists mainly for adding the plain text syntax to syntax set dumps, because for some
355 /// reason the default Sublime plain text syntax is still in `.tmLanguage` format.
356 #[cfg(feature = "yaml-load")]
add_plain_text_syntax(&mut self)357 pub fn add_plain_text_syntax(&mut self) {
358 let s = "---\nname: Plain Text\nfile_extensions: [txt]\nscope: text.plain\ncontexts: \
359 {main: []}";
360 let syn = SyntaxDefinition::load_from_str(s, false, None).unwrap();
361 self.syntaxes.push(syn);
362 }
363
364 /// Loads all the .sublime-syntax files in a folder into this builder.
365 ///
366 /// The `lines_include_newline` parameter is used to work around the fact that Sublime Text normally
367 /// passes line strings including newline characters (`\n`) to its regex engine. This results in many
368 /// syntaxes having regexes matching `\n`, which doesn't work if you don't pass in newlines.
369 /// It is recommended that if you can you pass in lines with newlines if you can and pass `true` for this parameter.
370 /// If that is inconvenient pass `false` and the loader will do some hacky find and replaces on the
371 /// match regexes that seem to work for the default syntax set, but may not work for any other syntaxes.
372 ///
373 /// In the future I might include a "slow mode" that copies the lines passed in and appends a newline if there isn't one.
374 /// but in the interest of performance currently this hacky fix will have to do.
375 #[cfg(feature = "yaml-load")]
add_from_folder<P: AsRef<Path>>( &mut self, folder: P, lines_include_newline: bool ) -> Result<(), LoadingError>376 pub fn add_from_folder<P: AsRef<Path>>(
377 &mut self,
378 folder: P,
379 lines_include_newline: bool
380 ) -> Result<(), LoadingError> {
381 for entry in WalkDir::new(folder).sort_by(|a, b| a.file_name().cmp(b.file_name())) {
382 let entry = entry.map_err(LoadingError::WalkDir)?;
383 if entry.path().extension().map_or(false, |e| e == "sublime-syntax") {
384 let syntax = load_syntax_file(entry.path(), lines_include_newline)?;
385 if let Some(path_str) = entry.path().to_str() {
386 // Split the path up and rejoin with slashes so that syntaxes loaded on Windows
387 // can still be loaded the same way.
388 let path = Path::new(path_str);
389 let path_parts: Vec<_> = path.iter().map(|c| c.to_str().unwrap()).collect();
390 self.path_syntaxes.push((path_parts.join("/").to_string(), self.syntaxes.len()));
391 }
392 self.syntaxes.push(syntax);
393 }
394
395 #[cfg(feature = "metadata")]
396 {
397 if entry.path().extension() == Some("tmPreferences".as_ref()) {
398 match RawMetadataEntry::load(entry.path()) {
399 Ok(meta) => self.raw_metadata.add_raw(meta),
400 Err(_err) => (),
401 }
402 }
403 }
404 }
405
406 Ok(())
407 }
408
409 /// Build a `SyntaxSet` from the syntaxes that have been added to this
410 /// builder.
411 ///
412 /// ### Linking
413 ///
414 /// The contexts in syntaxes can reference other contexts in the same syntax
415 /// or even other syntaxes. For example, a HTML syntax can reference a CSS
416 /// syntax so that CSS blocks in HTML work as expected.
417 ///
418 /// Those references work in various ways and involve one or two lookups.
419 /// To avoid having to do these lookups during parsing/highlighting, the
420 /// references are changed to directly reference contexts via index. That's
421 /// called linking.
422 ///
423 /// Linking is done in this build step. So in order to get the best
424 /// performance, you should try to avoid calling this too much. Ideally,
425 /// create a `SyntaxSet` once and then use it many times. If you can,
426 /// serialize a `SyntaxSet` for your program and when you run the program,
427 /// directly load the `SyntaxSet`.
build(self) -> SyntaxSet428 pub fn build(self) -> SyntaxSet {
429
430 #[cfg(not(feature = "metadata"))]
431 let SyntaxSetBuilder { syntaxes: syntax_definitions, path_syntaxes } = self;
432 #[cfg(feature = "metadata")]
433 let SyntaxSetBuilder {
434 syntaxes: syntax_definitions,
435 path_syntaxes,
436 raw_metadata,
437 existing_metadata,
438 } = self;
439
440 let mut syntaxes = Vec::with_capacity(syntax_definitions.len());
441 let mut all_contexts = Vec::new();
442
443 for syntax_definition in syntax_definitions {
444 let SyntaxDefinition {
445 name,
446 file_extensions,
447 scope,
448 first_line_match,
449 hidden,
450 variables,
451 contexts,
452 } = syntax_definition;
453
454 let mut map = HashMap::new();
455
456 let mut contexts: Vec<(String, Context)> = contexts.into_iter().collect();
457 // Sort the values of the HashMap so that the contexts in the
458 // resulting SyntaxSet have a deterministic order for serializing.
459 // Because we're sorting by the keys which are unique, we can use
460 // an unstable sort.
461 contexts.sort_unstable_by(|(name_a, _), (name_b, _)| name_a.cmp(&name_b));
462 for (name, context) in contexts {
463 let index = all_contexts.len();
464 map.insert(name, ContextId::new(index));
465 all_contexts.push(context);
466 }
467
468 let syntax = SyntaxReference {
469 name,
470 file_extensions,
471 scope,
472 first_line_match,
473 hidden,
474 variables,
475 contexts: map,
476 };
477 syntaxes.push(syntax);
478 }
479
480 for syntax in &syntaxes {
481 let mut no_prototype = HashSet::new();
482 let prototype = syntax.contexts.get("prototype");
483 if let Some(prototype_id) = prototype {
484 // TODO: We could do this after parsing YAML, instead of here?
485 Self::recursively_mark_no_prototype(syntax, prototype_id.index(), &all_contexts, &mut no_prototype);
486 }
487
488 for context_id in syntax.contexts.values() {
489 let index = context_id.index();
490 let mut context = &mut all_contexts[index];
491 if let Some(prototype_id) = prototype {
492 if context.meta_include_prototype && !no_prototype.contains(&index) {
493 context.prototype = Some(*prototype_id);
494 }
495 }
496 Self::link_context(&mut context, syntax, &syntaxes);
497 }
498 }
499
500 #[cfg(feature = "metadata")]
501 let metadata = match existing_metadata {
502 Some(mut existing) => existing.merged_with_raw(raw_metadata),
503 None => raw_metadata.into(),
504 };
505
506 SyntaxSet {
507 syntaxes,
508 contexts: all_contexts,
509 path_syntaxes,
510 first_line_cache: AtomicLazyCell::new(),
511 #[cfg(feature = "metadata")]
512 metadata,
513 }
514 }
515
516 /// Anything recursively included by the prototype shouldn't include the prototype.
517 /// This marks them as such.
recursively_mark_no_prototype( syntax: &SyntaxReference, context_id: usize, contexts: &[Context], no_prototype: &mut HashSet<usize>, )518 fn recursively_mark_no_prototype(
519 syntax: &SyntaxReference,
520 context_id: usize,
521 contexts: &[Context],
522 no_prototype: &mut HashSet<usize>,
523 ) {
524 let first_time = no_prototype.insert(context_id);
525 if !first_time {
526 return;
527 }
528
529 for pattern in &contexts[context_id].patterns {
530 match *pattern {
531 // Apparently inline blocks also don't include the prototype when within the prototype.
532 // This is really weird, but necessary to run the YAML syntax.
533 Pattern::Match(ref match_pat) => {
534 let maybe_context_refs = match match_pat.operation {
535 MatchOperation::Push(ref context_refs) |
536 MatchOperation::Set(ref context_refs) => Some(context_refs),
537 MatchOperation::Pop | MatchOperation::None => None,
538 };
539 if let Some(context_refs) = maybe_context_refs {
540 for context_ref in context_refs.iter() {
541 match context_ref {
542 ContextReference::Inline(ref s) | ContextReference::Named(ref s) => {
543 if let Some(i) = syntax.contexts.get(s) {
544 Self::recursively_mark_no_prototype(syntax, i.index(), contexts, no_prototype);
545 }
546 },
547 ContextReference::Direct(ref id) => {
548 Self::recursively_mark_no_prototype(syntax, id.index(), contexts, no_prototype);
549 },
550 _ => (),
551 }
552 }
553 }
554 }
555 Pattern::Include(ref reference) => {
556 match reference {
557 ContextReference::Named(ref s) => {
558 if let Some(id) = syntax.contexts.get(s) {
559 Self::recursively_mark_no_prototype(syntax, id.index(), contexts, no_prototype);
560 }
561 },
562 ContextReference::Direct(ref id) => {
563 Self::recursively_mark_no_prototype(syntax, id.index(), contexts, no_prototype);
564 },
565 _ => (),
566 }
567 }
568 }
569 }
570 }
571
link_context(context: &mut Context, syntax: &SyntaxReference, syntaxes: &[SyntaxReference])572 fn link_context(context: &mut Context, syntax: &SyntaxReference, syntaxes: &[SyntaxReference]) {
573 for pattern in &mut context.patterns {
574 match *pattern {
575 Pattern::Match(ref mut match_pat) => Self::link_match_pat(match_pat, syntax, syntaxes),
576 Pattern::Include(ref mut context_ref) => Self::link_ref(context_ref, syntax, syntaxes),
577 }
578 }
579 }
580
link_ref(context_ref: &mut ContextReference, syntax: &SyntaxReference, syntaxes: &[SyntaxReference])581 fn link_ref(context_ref: &mut ContextReference, syntax: &SyntaxReference, syntaxes: &[SyntaxReference]) {
582 // println!("{:?}", context_ref);
583 use super::syntax_definition::ContextReference::*;
584 let linked_context_id = match *context_ref {
585 Named(ref s) | Inline(ref s) => {
586 // This isn't actually correct, but it is better than nothing/crashing.
587 // This is being phased out anyhow, see https://github.com/sublimehq/Packages/issues/73
588 // Fixes issue #30
589 if s == "$top_level_main" {
590 syntax.contexts.get("main")
591 } else {
592 syntax.contexts.get(s)
593 }
594 }
595 ByScope { scope, ref sub_context } => {
596 let context_name = sub_context.as_ref().map_or("main", |x| &**x);
597 syntaxes
598 .iter()
599 .rev()
600 .find(|s| s.scope == scope)
601 .and_then(|s| s.contexts.get(context_name))
602 }
603 File { ref name, ref sub_context } => {
604 let context_name = sub_context.as_ref().map_or("main", |x| &**x);
605 syntaxes
606 .iter()
607 .rev()
608 .find(|s| &s.name == name)
609 .and_then(|s| s.contexts.get(context_name))
610 }
611 Direct(_) => None,
612 };
613 if let Some(context_id) = linked_context_id {
614 let mut new_ref = Direct(*context_id);
615 mem::swap(context_ref, &mut new_ref);
616 }
617 }
618
link_match_pat(match_pat: &mut MatchPattern, syntax: &SyntaxReference, syntaxes: &[SyntaxReference])619 fn link_match_pat(match_pat: &mut MatchPattern, syntax: &SyntaxReference, syntaxes: &[SyntaxReference]) {
620 let maybe_context_refs = match match_pat.operation {
621 MatchOperation::Push(ref mut context_refs) |
622 MatchOperation::Set(ref mut context_refs) => Some(context_refs),
623 MatchOperation::Pop | MatchOperation::None => None,
624 };
625 if let Some(context_refs) = maybe_context_refs {
626 for context_ref in context_refs.iter_mut() {
627 Self::link_ref(context_ref, syntax, syntaxes);
628 }
629 }
630 if let Some(ref mut context_ref) = match_pat.with_prototype {
631 Self::link_ref(context_ref, syntax, syntaxes);
632 }
633 }
634 }
635
636 #[derive(Debug)]
637 struct FirstLineCache {
638 /// (first line regex, syntax index) pairs for all syntaxes with a first line regex
639 regexes: Vec<(Regex, usize)>,
640 }
641
642 impl FirstLineCache {
new(syntaxes: &[SyntaxReference]) -> FirstLineCache643 fn new(syntaxes: &[SyntaxReference]) -> FirstLineCache {
644 let mut regexes = Vec::new();
645 for (i, syntax) in syntaxes.iter().enumerate() {
646 if let Some(ref reg_str) = syntax.first_line_match {
647 if let Ok(reg) = Regex::new(reg_str) {
648 regexes.push((reg, i));
649 }
650 }
651 }
652 FirstLineCache {
653 regexes,
654 }
655 }
656 }
657
658
659 #[cfg(feature = "yaml-load")]
660 #[cfg(test)]
661 mod tests {
662 use super::*;
663 use parsing::{ParseState, Scope, syntax_definition};
664 use std::collections::HashMap;
665
666 #[test]
can_load()667 fn can_load() {
668 let mut builder = SyntaxSetBuilder::new();
669 builder.add_from_folder("testdata/Packages", false).unwrap();
670
671 let cmake_dummy_syntax = SyntaxDefinition {
672 name: "CMake".to_string(),
673 file_extensions: vec!["CMakeLists.txt".to_string(), "cmake".to_string()],
674 scope: Scope::new("source.cmake").unwrap(),
675 first_line_match: None,
676 hidden: false,
677 variables: HashMap::new(),
678 contexts: HashMap::new(),
679 };
680
681 builder.add(cmake_dummy_syntax);
682 builder.add_plain_text_syntax();
683
684 let ps = builder.build();
685
686 assert_eq!(&ps.find_syntax_by_first_line("#!/usr/bin/env node").unwrap().name,
687 "JavaScript");
688 let rails_scope = Scope::new("source.ruby.rails").unwrap();
689 let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
690 ps.find_syntax_plain_text();
691 assert_eq!(&ps.find_syntax_by_extension("rake").unwrap().name, "Ruby");
692 assert_eq!(&ps.find_syntax_by_token("ruby").unwrap().name, "Ruby");
693 assert_eq!(&ps.find_syntax_by_first_line("lol -*- Mode: C -*- such line").unwrap().name,
694 "C");
695 assert_eq!(&ps.find_syntax_for_file("testdata/parser.rs").unwrap().unwrap().name,
696 "Rust");
697 assert_eq!(&ps.find_syntax_for_file("testdata/test_first_line.test")
698 .expect("Error finding syntax for file")
699 .expect("No syntax found for file")
700 .name,
701 "Ruby");
702 assert_eq!(&ps.find_syntax_for_file(".bashrc").unwrap().unwrap().name,
703 "Bourne Again Shell (bash)");
704 assert_eq!(&ps.find_syntax_for_file("CMakeLists.txt").unwrap().unwrap().name,
705 "CMake");
706 assert_eq!(&ps.find_syntax_for_file("test.cmake").unwrap().unwrap().name,
707 "CMake");
708 assert_eq!(&ps.find_syntax_for_file("Rakefile").unwrap().unwrap().name, "Ruby");
709 assert!(&ps.find_syntax_by_first_line("derp derp hi lol").is_none());
710 assert_eq!(&ps.find_syntax_by_path("Packages/Rust/Rust.sublime-syntax").unwrap().name,
711 "Rust");
712 // println!("{:#?}", syntax);
713 assert_eq!(syntax.scope, rails_scope);
714 // assert!(false);
715 let main_context = ps.get_context(&syntax.contexts["main"]);
716 let count = syntax_definition::context_iter(&ps, main_context).count();
717 assert_eq!(count, 109);
718 }
719
720 #[test]
can_clone()721 fn can_clone() {
722 let cloned_syntax_set = {
723 let mut builder = SyntaxSetBuilder::new();
724 builder.add(syntax_a());
725 builder.add(syntax_b());
726
727 let syntax_set_original = builder.build();
728 syntax_set_original.clone()
729 // Note: The original syntax set is dropped
730 };
731
732 let syntax = cloned_syntax_set.find_syntax_by_extension("a").unwrap();
733 let mut parse_state = ParseState::new(syntax);
734 let ops = parse_state.parse_line("a go_b b", &cloned_syntax_set);
735 let expected = (7, ScopeStackOp::Push(Scope::new("b").unwrap()));
736 assert_ops_contain(&ops, &expected);
737 }
738
739 #[test]
can_add_more_syntaxes_with_builder()740 fn can_add_more_syntaxes_with_builder() {
741 let syntax_set_original = {
742 let mut builder = SyntaxSetBuilder::new();
743 builder.add(syntax_a());
744 builder.add(syntax_b());
745 builder.build()
746 };
747
748 let mut builder = syntax_set_original.into_builder();
749
750 let syntax_c = SyntaxDefinition::load_from_str(r#"
751 name: C
752 scope: source.c
753 file_extensions: [c]
754 contexts:
755 main:
756 - match: 'c'
757 scope: c
758 - match: 'go_a'
759 push: scope:source.a#main
760 "#, true, None).unwrap();
761
762 builder.add(syntax_c);
763
764 let syntax_set = builder.build();
765
766 let syntax = syntax_set.find_syntax_by_extension("c").unwrap();
767 let mut parse_state = ParseState::new(syntax);
768 let ops = parse_state.parse_line("c go_a a go_b b", &syntax_set);
769 let expected = (14, ScopeStackOp::Push(Scope::new("b").unwrap()));
770 assert_ops_contain(&ops, &expected);
771 }
772
773 #[test]
can_use_in_multiple_threads()774 fn can_use_in_multiple_threads() {
775 use rayon::prelude::*;
776
777 let syntax_set = {
778 let mut builder = SyntaxSetBuilder::new();
779 builder.add(syntax_a());
780 builder.add(syntax_b());
781 builder.build()
782 };
783
784 let lines = vec![
785 "a a a",
786 "a go_b b",
787 "go_b b",
788 "go_b b b",
789 ];
790
791 let results: Vec<Vec<(usize, ScopeStackOp)>> = lines
792 .par_iter()
793 .map(|line| {
794 let syntax = syntax_set.find_syntax_by_extension("a").unwrap();
795 let mut parse_state = ParseState::new(syntax);
796 parse_state.parse_line(line, &syntax_set)
797 })
798 .collect();
799
800 assert_ops_contain(&results[0], &(4, ScopeStackOp::Push(Scope::new("a").unwrap())));
801 assert_ops_contain(&results[1], &(7, ScopeStackOp::Push(Scope::new("b").unwrap())));
802 assert_ops_contain(&results[2], &(5, ScopeStackOp::Push(Scope::new("b").unwrap())));
803 assert_ops_contain(&results[3], &(8, ScopeStackOp::Push(Scope::new("b").unwrap())));
804 }
805
806 #[test]
is_sync()807 fn is_sync() {
808 check_sync::<SyntaxSet>();
809 }
810
811 #[test]
is_send()812 fn is_send() {
813 check_send::<SyntaxSet>();
814 }
815
816 #[test]
can_override_syntaxes()817 fn can_override_syntaxes() {
818 let syntax_set = {
819 let mut builder = SyntaxSetBuilder::new();
820 builder.add(syntax_a());
821 builder.add(syntax_b());
822
823 let syntax_a2 = SyntaxDefinition::load_from_str(r#"
824 name: A improved
825 scope: source.a
826 file_extensions: [a]
827 first_line_match: syntax\s+a
828 contexts:
829 main:
830 - match: a
831 scope: a2
832 - match: go_b
833 push: scope:source.b#main
834 "#, true, None).unwrap();
835
836 builder.add(syntax_a2);
837
838 let syntax_c = SyntaxDefinition::load_from_str(r#"
839 name: C
840 scope: source.c
841 file_extensions: [c]
842 first_line_match: syntax\s+.*
843 contexts:
844 main:
845 - match: c
846 scope: c
847 - match: go_a
848 push: scope:source.a#main
849 "#, true, None).unwrap();
850
851 builder.add(syntax_c);
852
853 builder.build()
854 };
855
856 let mut syntax = syntax_set.find_syntax_by_extension("a").unwrap();
857 assert_eq!(syntax.name, "A improved");
858 syntax = syntax_set.find_syntax_by_scope(Scope::new(&"source.a").unwrap()).unwrap();
859 assert_eq!(syntax.name, "A improved");
860 syntax = syntax_set.find_syntax_by_first_line(&"syntax a").unwrap();
861 assert_eq!(syntax.name, "C");
862
863 let mut parse_state = ParseState::new(syntax);
864 let ops = parse_state.parse_line("c go_a a", &syntax_set);
865 let expected = (7, ScopeStackOp::Push(Scope::new("a2").unwrap()));
866 assert_ops_contain(&ops, &expected);
867 }
868
869 #[test]
can_parse_issue219()870 fn can_parse_issue219() {
871 // Go to builder and back after loading so that build() gets Direct references instead of
872 // Named ones. The bug was that Direct references were not handled when marking as
873 // "no prototype", so prototype contexts accidentally had the prototype set, which made
874 // the parser loop forever.
875 let syntax_set = SyntaxSet::load_defaults_newlines().into_builder().build();
876 let syntax = syntax_set.find_syntax_by_extension("yaml").unwrap();
877
878 let mut parse_state = ParseState::new(syntax);
879 let ops = parse_state.parse_line("# test\n", &syntax_set);
880 let expected = (0, ScopeStackOp::Push(Scope::new("comment.line.number-sign.yaml").unwrap()));
881 assert_ops_contain(&ops, &expected);
882 }
883
884 #[test]
no_prototype_for_contexts_included_from_prototype()885 fn no_prototype_for_contexts_included_from_prototype() {
886 let mut builder = SyntaxSetBuilder::new();
887 let syntax = SyntaxDefinition::load_from_str(r#"
888 name: Test Prototype
889 scope: source.test
890 file_extensions: [test]
891 contexts:
892 prototype:
893 - include: included_from_prototype
894 main:
895 - match: main
896 - match: other
897 push: other
898 other:
899 - match: o
900 included_from_prototype:
901 - match: p
902 scope: p
903 "#, true, None).unwrap();
904 builder.add(syntax);
905 let ss = builder.build();
906
907 // "main" and "other" should have context set, "prototype" and "included_from_prototype"
908 // must not have a prototype set.
909 assert_prototype_only_on(&["main", "other"], &ss, &ss.syntaxes()[0]);
910
911 // Building again should have the same result. The difference is that after the first
912 // build(), the references have been replaced with Direct references, so the code needs to
913 // handle that correctly.
914 let rebuilt = ss.into_builder().build();
915 assert_prototype_only_on(&["main", "other"], &rebuilt, &rebuilt.syntaxes()[0]);
916 }
917
918 #[test]
no_prototype_for_contexts_inline_in_prototype()919 fn no_prototype_for_contexts_inline_in_prototype() {
920 let mut builder = SyntaxSetBuilder::new();
921 let syntax = SyntaxDefinition::load_from_str(r#"
922 name: Test Prototype
923 scope: source.test
924 file_extensions: [test]
925 contexts:
926 prototype:
927 - match: p
928 push:
929 - match: p2
930 main:
931 - match: main
932 "#, true, None).unwrap();
933 builder.add(syntax);
934 let ss = builder.build();
935
936 assert_prototype_only_on(&["main"], &ss, &ss.syntaxes()[0]);
937
938 let rebuilt = ss.into_builder().build();
939 assert_prototype_only_on(&["main"], &rebuilt, &rebuilt.syntaxes()[0]);
940 }
941
assert_ops_contain( ops: &[(usize, ScopeStackOp)], expected: &(usize, ScopeStackOp) )942 fn assert_ops_contain(
943 ops: &[(usize, ScopeStackOp)],
944 expected: &(usize, ScopeStackOp)
945 ) {
946 assert!(ops.contains(expected),
947 "expected operations to contain {:?}: {:?}", expected, ops);
948 }
949
assert_prototype_only_on(expected: &[&str], syntax_set: &SyntaxSet, syntax: &SyntaxReference)950 fn assert_prototype_only_on(expected: &[&str], syntax_set: &SyntaxSet, syntax: &SyntaxReference) {
951 for (name, id) in &syntax.contexts {
952 if name == "__main" || name == "__start" {
953 // Skip special contexts
954 continue;
955 }
956 let context = syntax_set.get_context(id);
957 if expected.contains(&name.as_str()) {
958 assert!(context.prototype.is_some(), "Expected context {} to have prototype", name);
959 } else {
960 assert!(context.prototype.is_none(), "Expected context {} to not have prototype", name);
961 }
962 }
963 }
964
check_send<T: Send>()965 fn check_send<T: Send>() {}
966
check_sync<T: Sync>()967 fn check_sync<T: Sync>() {}
968
syntax_a() -> SyntaxDefinition969 fn syntax_a() -> SyntaxDefinition {
970 SyntaxDefinition::load_from_str(
971 r#"
972 name: A
973 scope: source.a
974 file_extensions: [a]
975 contexts:
976 main:
977 - match: 'a'
978 scope: a
979 - match: 'go_b'
980 push: scope:source.b#main
981 "#,
982 true,
983 None,
984 ).unwrap()
985 }
986
syntax_b() -> SyntaxDefinition987 fn syntax_b() -> SyntaxDefinition {
988 SyntaxDefinition::load_from_str(
989 r#"
990 name: B
991 scope: source.b
992 file_extensions: [b]
993 contexts:
994 main:
995 - match: 'b'
996 scope: b
997 "#,
998 true,
999 None,
1000 ).unwrap()
1001 }
1002 }
1003