1 mod binding_files;
2 mod build_tables;
3 mod char_tree;
4 mod dedup;
5 mod grammars;
6 mod nfa;
7 mod node_types;
8 pub mod parse_grammar;
9 mod prepare_grammar;
10 mod render;
11 mod rules;
12 mod tables;
13 
14 use self::build_tables::build_tables;
15 use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
16 use self::parse_grammar::parse_grammar;
17 use self::prepare_grammar::prepare_grammar;
18 use self::render::render_c_code;
19 use self::rules::AliasMap;
20 use anyhow::{anyhow, Context, Result};
21 use lazy_static::lazy_static;
22 use regex::{Regex, RegexBuilder};
23 use std::fs;
24 use std::io::Write;
25 use std::path::{Path, PathBuf};
26 use std::process::{Command, Stdio};
27 
28 lazy_static! {
29     static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
30         .multi_line(true)
31         .build()
32         .unwrap();
33 }
34 
35 struct GeneratedParser {
36     c_code: String,
37     node_types_json: String,
38 }
39 
generate_parser_in_directory( repo_path: &PathBuf, grammar_path: Option<&str>, next_abi: bool, generate_bindings: bool, report_symbol_name: Option<&str>, ) -> Result<()>40 pub fn generate_parser_in_directory(
41     repo_path: &PathBuf,
42     grammar_path: Option<&str>,
43     next_abi: bool,
44     generate_bindings: bool,
45     report_symbol_name: Option<&str>,
46 ) -> Result<()> {
47     let src_path = repo_path.join("src");
48     let header_path = src_path.join("tree_sitter");
49 
50     // Ensure that the output directories exist.
51     fs::create_dir_all(&src_path)?;
52     fs::create_dir_all(&header_path)?;
53 
54     // Read the grammar.json.
55     let grammar_json;
56     match grammar_path {
57         Some(path) => {
58             grammar_json = load_grammar_file(path.as_ref())?;
59         }
60         None => {
61             let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
62             grammar_json = load_grammar_file(&grammar_js_path)?;
63             fs::write(&src_path.join("grammar.json"), &grammar_json)?;
64         }
65     }
66 
67     // Parse and preprocess the grammar.
68     let input_grammar = parse_grammar(&grammar_json)?;
69     let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
70         prepare_grammar(&input_grammar)?;
71     let language_name = input_grammar.name;
72 
73     // Generate the parser and related files.
74     let GeneratedParser {
75         c_code,
76         node_types_json,
77     } = generate_parser_for_grammar_with_opts(
78         &language_name,
79         syntax_grammar,
80         lexical_grammar,
81         inlines,
82         simple_aliases,
83         next_abi,
84         report_symbol_name,
85     )?;
86 
87     write_file(&src_path.join("parser.c"), c_code)?;
88     write_file(&src_path.join("node-types.json"), node_types_json)?;
89 
90     if next_abi {
91         write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
92     }
93 
94     if generate_bindings {
95         binding_files::generate_binding_files(&repo_path, &language_name)?;
96     }
97 
98     Ok(())
99 }
100 
generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)>101 pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> {
102     let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
103     let input_grammar = parse_grammar(&grammar_json)?;
104     let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
105         prepare_grammar(&input_grammar)?;
106     let parser = generate_parser_for_grammar_with_opts(
107         &input_grammar.name,
108         syntax_grammar,
109         lexical_grammar,
110         inlines,
111         simple_aliases,
112         true,
113         None,
114     )?;
115     Ok((input_grammar.name, parser.c_code))
116 }
117 
generate_parser_for_grammar_with_opts( name: &String, syntax_grammar: SyntaxGrammar, lexical_grammar: LexicalGrammar, inlines: InlinedProductionMap, simple_aliases: AliasMap, next_abi: bool, report_symbol_name: Option<&str>, ) -> Result<GeneratedParser>118 fn generate_parser_for_grammar_with_opts(
119     name: &String,
120     syntax_grammar: SyntaxGrammar,
121     lexical_grammar: LexicalGrammar,
122     inlines: InlinedProductionMap,
123     simple_aliases: AliasMap,
124     next_abi: bool,
125     report_symbol_name: Option<&str>,
126 ) -> Result<GeneratedParser> {
127     let variable_info =
128         node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
129     let node_types_json = node_types::generate_node_types_json(
130         &syntax_grammar,
131         &lexical_grammar,
132         &simple_aliases,
133         &variable_info,
134     );
135     let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
136         &syntax_grammar,
137         &lexical_grammar,
138         &simple_aliases,
139         &variable_info,
140         &inlines,
141         report_symbol_name,
142     )?;
143     let c_code = render_c_code(
144         name,
145         parse_table,
146         main_lex_table,
147         keyword_lex_table,
148         keyword_capture_token,
149         syntax_grammar,
150         lexical_grammar,
151         simple_aliases,
152         next_abi,
153     );
154     Ok(GeneratedParser {
155         c_code,
156         node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(),
157     })
158 }
159 
load_grammar_file(grammar_path: &Path) -> Result<String>160 pub fn load_grammar_file(grammar_path: &Path) -> Result<String> {
161     match grammar_path.extension().and_then(|e| e.to_str()) {
162         Some("js") => Ok(load_js_grammar_file(grammar_path)?),
163         Some("json") => Ok(fs::read_to_string(grammar_path)?),
164         _ => Err(anyhow!(
165             "Unknown grammar file extension: {:?}",
166             grammar_path
167         )),
168     }
169 }
170 
load_js_grammar_file(grammar_path: &Path) -> Result<String>171 fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
172     let grammar_path = fs::canonicalize(grammar_path)?;
173     let mut node_process = Command::new("node")
174         .env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
175         .stdin(Stdio::piped())
176         .stdout(Stdio::piped())
177         .spawn()
178         .expect("Failed to run `node`");
179 
180     let mut node_stdin = node_process
181         .stdin
182         .take()
183         .expect("Failed to open stdin for node");
184     let javascript_code = include_bytes!("./dsl.js");
185     node_stdin
186         .write(javascript_code)
187         .expect("Failed to write to node's stdin");
188     drop(node_stdin);
189     let output = node_process
190         .wait_with_output()
191         .expect("Failed to read output from node");
192     match output.status.code() {
193         None => panic!("Node process was killed"),
194         Some(0) => {}
195         Some(code) => return Err(anyhow!("Node process exited with status {}", code)),
196     }
197 
198     let mut result = String::from_utf8(output.stdout).expect("Got invalid UTF8 from node");
199     result.push('\n');
200     Ok(result)
201 }
202 
write_file(path: &Path, body: impl AsRef<[u8]>) -> Result<()>203 fn write_file(path: &Path, body: impl AsRef<[u8]>) -> Result<()> {
204     fs::write(path, body)
205         .with_context(|| format!("Failed to write {:?}", path.file_name().unwrap()))
206 }
207