1 use super::helpers::{
2 edits::{get_random_edit, invert_edit},
3 fixtures::{fixtures_dir, get_language, get_test_language},
4 random::Rand,
5 scope_sequence::ScopeSequence,
6 EXAMPLE_FILTER, LANGUAGE_FILTER, LOG_ENABLED, LOG_GRAPH_ENABLED, SEED, TRIAL_FILTER,
7 };
8 use crate::{
9 generate,
10 parse::perform_edit,
11 test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
12 util,
13 };
14 use std::{fs, usize};
15 use tree_sitter::{allocations, LogType, Node, Parser, Tree};
16
17 const EDIT_COUNT: usize = 3;
18 const TRIAL_COUNT: usize = 10;
19
20 #[test]
test_bash_corpus()21 fn test_bash_corpus() {
22 test_language_corpus("bash");
23 }
24
25 #[test]
test_c_corpus()26 fn test_c_corpus() {
27 test_language_corpus("c");
28 }
29
30 #[test]
test_cpp_corpus()31 fn test_cpp_corpus() {
32 test_language_corpus("cpp");
33 }
34
35 #[test]
test_embedded_template_corpus()36 fn test_embedded_template_corpus() {
37 test_language_corpus("embedded-template");
38 }
39
40 #[test]
test_go_corpus()41 fn test_go_corpus() {
42 test_language_corpus("go");
43 }
44
45 #[test]
test_html_corpus()46 fn test_html_corpus() {
47 test_language_corpus("html");
48 }
49
50 #[test]
test_javascript_corpus()51 fn test_javascript_corpus() {
52 test_language_corpus("javascript");
53 }
54
55 #[test]
test_json_corpus()56 fn test_json_corpus() {
57 test_language_corpus("json");
58 }
59
60 #[test]
test_php_corpus()61 fn test_php_corpus() {
62 test_language_corpus("php");
63 }
64
65 #[test]
test_python_corpus()66 fn test_python_corpus() {
67 test_language_corpus("python");
68 }
69
70 #[test]
test_ruby_corpus()71 fn test_ruby_corpus() {
72 test_language_corpus("ruby");
73 }
74
75 #[test]
test_rust_corpus()76 fn test_rust_corpus() {
77 test_language_corpus("rust");
78 }
79
test_language_corpus(language_name: &str)80 fn test_language_corpus(language_name: &str) {
81 if let Some(language_filter) = LANGUAGE_FILTER.as_ref() {
82 if language_filter != language_name {
83 return;
84 }
85 }
86
87 let grammars_dir = fixtures_dir().join("grammars");
88 let error_corpus_dir = fixtures_dir().join("error_corpus");
89
90 let mut failure_count = 0;
91
92 let language = get_language(language_name);
93 let mut corpus_dir = grammars_dir.join(language_name).join("corpus");
94 if !corpus_dir.is_dir() {
95 corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
96 }
97
98 let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name));
99 let main_tests = parse_tests(&corpus_dir).unwrap();
100 let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default());
101 let mut tests = flatten_tests(main_tests);
102 tests.extend(flatten_tests(error_tests));
103
104 for (example_name, input, expected_output, has_fields) in tests {
105 println!(" {} example - {}", language_name, example_name);
106
107 let trial = 0;
108 if TRIAL_FILTER.map_or(true, |t| t == trial) {
109 let passed = allocations::record(|| {
110 let mut log_session = None;
111 let mut parser = get_parser(&mut log_session, "log.html");
112 parser.set_language(language).unwrap();
113 let tree = parser.parse(&input, None).unwrap();
114 let mut actual_output = tree.root_node().to_sexp();
115 if !has_fields {
116 actual_output = strip_sexp_fields(actual_output);
117 }
118 if actual_output == expected_output {
119 true
120 } else {
121 println!(
122 "Incorrect initial parse for {} - {}",
123 language_name, example_name,
124 );
125 print_diff_key();
126 print_diff(&actual_output, &expected_output);
127 println!("");
128 false
129 }
130 });
131
132 if !passed {
133 failure_count += 1;
134 continue;
135 }
136 }
137
138 let mut parser = Parser::new();
139 parser.set_language(language).unwrap();
140 let tree = parser.parse(&input, None).unwrap();
141 drop(parser);
142
143 for trial in 1..=TRIAL_COUNT {
144 if TRIAL_FILTER.map_or(true, |filter| filter == trial) {
145 let mut rand = Rand::new(*SEED + trial);
146
147 let passed = allocations::record(|| {
148 let mut log_session = None;
149 let mut parser = get_parser(&mut log_session, "log.html");
150 parser.set_language(language).unwrap();
151 let mut tree = tree.clone();
152 let mut input = input.clone();
153
154 if *LOG_GRAPH_ENABLED {
155 eprintln!("{}\n", String::from_utf8_lossy(&input));
156 }
157
158 // Perform a random series of edits and reparse.
159 let mut undo_stack = Vec::new();
160 for _ in 0..EDIT_COUNT {
161 let edit = get_random_edit(&mut rand, &input);
162 undo_stack.push(invert_edit(&input, &edit));
163 perform_edit(&mut tree, &mut input, &edit);
164 }
165 if *LOG_GRAPH_ENABLED {
166 eprintln!("{}\n", String::from_utf8_lossy(&input));
167 }
168
169 let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
170
171 // Check that the new tree is consistent.
172 check_consistent_sizes(&tree2, &input);
173 if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
174 println!(
175 "\nUnexpected scope change in trial {}\n{}\n\n",
176 trial, message
177 );
178 return false;
179 }
180
181 // Undo all of the edits and re-parse again.
182 while let Some(edit) = undo_stack.pop() {
183 perform_edit(&mut tree2, &mut input, &edit);
184 }
185 if *LOG_GRAPH_ENABLED {
186 eprintln!("{}\n", String::from_utf8_lossy(&input));
187 }
188
189 let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
190
191 // Verify that the final tree matches the expectation from the corpus.
192 let mut actual_output = tree3.root_node().to_sexp();
193 if !has_fields {
194 actual_output = strip_sexp_fields(actual_output);
195 }
196
197 if actual_output != expected_output {
198 println!(
199 "Incorrect parse for {} - {} - trial {}",
200 language_name, example_name, trial
201 );
202 print_diff_key();
203 print_diff(&actual_output, &expected_output);
204 println!("");
205 return false;
206 }
207
208 // Check that the edited tree is consistent.
209 check_consistent_sizes(&tree3, &input);
210 if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
211 eprintln!(
212 "Unexpected scope change in trial {}\n{}\n\n",
213 trial, message
214 );
215 return false;
216 }
217
218 true
219 });
220
221 if !passed {
222 failure_count += 1;
223 break;
224 }
225 }
226 }
227 }
228
229 if failure_count > 0 {
230 panic!("{} {} corpus tests failed", failure_count, language_name);
231 }
232 }
233
234 #[test]
test_feature_corpus_files()235 fn test_feature_corpus_files() {
236 let test_grammars_dir = fixtures_dir().join("test_grammars");
237
238 let mut failure_count = 0;
239 for entry in fs::read_dir(&test_grammars_dir).unwrap() {
240 let entry = entry.unwrap();
241 if !entry.metadata().unwrap().is_dir() {
242 continue;
243 }
244 let language_name = entry.file_name();
245 let language_name = language_name.to_str().unwrap();
246
247 if let Some(filter) = LANGUAGE_FILTER.as_ref() {
248 if language_name != filter.as_str() {
249 continue;
250 }
251 }
252
253 let test_path = entry.path();
254 let mut grammar_path = test_path.join("grammar.js");
255 if !grammar_path.exists() {
256 grammar_path = test_path.join("grammar.json");
257 }
258 let error_message_path = test_path.join("expected_error.txt");
259 let grammar_json = generate::load_grammar_file(&grammar_path).unwrap();
260 let generate_result = generate::generate_parser_for_grammar(&grammar_json);
261
262 if error_message_path.exists() {
263 if EXAMPLE_FILTER.is_some() {
264 continue;
265 }
266
267 eprintln!("test language: {:?}", language_name);
268
269 let expected_message = fs::read_to_string(&error_message_path)
270 .unwrap()
271 .replace("\r\n", "\n");
272 if let Err(e) = generate_result {
273 let actual_message = e.to_string().replace("\r\n", "\n");
274 if expected_message != actual_message {
275 eprintln!(
276 "Unexpected error message.\n\nExpected:\n\n{}\nActual:\n\n{}\n",
277 expected_message, actual_message
278 );
279 failure_count += 1;
280 }
281 } else {
282 eprintln!(
283 "Expected error message but got none for test grammar '{}'",
284 language_name
285 );
286 failure_count += 1;
287 }
288 } else {
289 if let Err(e) = &generate_result {
290 eprintln!(
291 "Unexpected error for test grammar '{}':\n{}",
292 language_name, e
293 );
294 failure_count += 1;
295 continue;
296 }
297
298 let corpus_path = test_path.join("corpus.txt");
299 let c_code = generate_result.unwrap().1;
300 let language = get_test_language(language_name, &c_code, Some(&test_path));
301 let test = parse_tests(&corpus_path).unwrap();
302 let tests = flatten_tests(test);
303
304 if !tests.is_empty() {
305 eprintln!("test language: {:?}", language_name);
306 }
307
308 for (name, input, expected_output, has_fields) in tests {
309 eprintln!(" example: {:?}", name);
310
311 let passed = allocations::record(|| {
312 let mut log_session = None;
313 let mut parser = get_parser(&mut log_session, "log.html");
314 parser.set_language(language).unwrap();
315 let tree = parser.parse(&input, None).unwrap();
316 let mut actual_output = tree.root_node().to_sexp();
317 if !has_fields {
318 actual_output = strip_sexp_fields(actual_output);
319 }
320 if actual_output == expected_output {
321 true
322 } else {
323 print_diff_key();
324 print_diff(&actual_output, &expected_output);
325 println!("");
326 false
327 }
328 });
329
330 if !passed {
331 failure_count += 1;
332 continue;
333 }
334 }
335 }
336 }
337 if failure_count > 0 {
338 panic!("{} corpus tests failed", failure_count);
339 }
340 }
341
check_consistent_sizes(tree: &Tree, input: &Vec<u8>)342 fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {
343 fn check(node: Node, line_offsets: &Vec<usize>) {
344 let start_byte = node.start_byte();
345 let end_byte = node.end_byte();
346 let start_point = node.start_position();
347 let end_point = node.end_position();
348
349 assert!(start_byte <= end_byte);
350 assert!(start_point <= end_point);
351 assert_eq!(
352 start_byte,
353 line_offsets[start_point.row] + start_point.column
354 );
355 assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
356
357 let mut last_child_end_byte = start_byte;
358 let mut last_child_end_point = start_point;
359 let mut some_child_has_changes = false;
360 let mut actual_named_child_count = 0;
361 for i in 0..node.child_count() {
362 let child = node.child(i).unwrap();
363 assert!(child.start_byte() >= last_child_end_byte);
364 assert!(child.start_position() >= last_child_end_point);
365 check(child, line_offsets);
366 if child.has_changes() {
367 some_child_has_changes = true;
368 }
369 if child.is_named() {
370 actual_named_child_count += 1;
371 }
372 last_child_end_byte = child.end_byte();
373 last_child_end_point = child.end_position();
374 }
375
376 assert_eq!(actual_named_child_count, node.named_child_count());
377
378 if node.child_count() > 0 {
379 assert!(end_byte >= last_child_end_byte);
380 assert!(end_point >= last_child_end_point);
381 }
382
383 if some_child_has_changes {
384 assert!(node.has_changes());
385 }
386 }
387
388 let mut line_offsets = vec![0];
389 for (i, c) in input.iter().enumerate() {
390 if *c == '\n' as u8 {
391 line_offsets.push(i + 1);
392 }
393 }
394
395 check(tree.root_node(), &line_offsets);
396 }
397
check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Result<(), String>398 fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Result<(), String> {
399 let changed_ranges = old_tree.changed_ranges(new_tree).collect();
400 let old_scope_sequence = ScopeSequence::new(old_tree);
401 let new_scope_sequence = ScopeSequence::new(new_tree);
402 old_scope_sequence.check_changes(&new_scope_sequence, &input, &changed_ranges)
403 }
404
get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser405 fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
406 let mut parser = Parser::new();
407
408 if *LOG_ENABLED {
409 parser.set_logger(Some(Box::new(|log_type, msg| {
410 if log_type == LogType::Lex {
411 eprintln!(" {}", msg);
412 } else {
413 eprintln!("{}", msg);
414 }
415 })));
416 } else if *LOG_GRAPH_ENABLED {
417 *session = Some(util::log_graphs(&mut parser, log_filename).unwrap());
418 }
419
420 parser
421 }
422
flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)>423 fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
424 fn helper(
425 test: TestEntry,
426 is_root: bool,
427 prefix: &str,
428 result: &mut Vec<(String, Vec<u8>, String, bool)>,
429 ) {
430 match test {
431 TestEntry::Example {
432 mut name,
433 input,
434 output,
435 has_fields,
436 } => {
437 if !prefix.is_empty() {
438 name.insert_str(0, " - ");
439 name.insert_str(0, prefix);
440 }
441 if let Some(filter) = EXAMPLE_FILTER.as_ref() {
442 if !name.contains(filter.as_str()) {
443 return;
444 }
445 }
446 result.push((name, input, output, has_fields));
447 }
448 TestEntry::Group {
449 mut name, children, ..
450 } => {
451 if !is_root && !prefix.is_empty() {
452 name.insert_str(0, " - ");
453 name.insert_str(0, prefix);
454 }
455 for child in children {
456 helper(child, false, &name, result);
457 }
458 }
459 }
460 }
461 let mut result = Vec::new();
462 helper(test, true, "", &mut result);
463 result
464 }
465