1 use super::helpers::{
2     edits::{get_random_edit, invert_edit},
3     fixtures::{fixtures_dir, get_language, get_test_language},
4     random::Rand,
5     scope_sequence::ScopeSequence,
6     EXAMPLE_FILTER, LANGUAGE_FILTER, LOG_ENABLED, LOG_GRAPH_ENABLED, SEED, TRIAL_FILTER,
7 };
8 use crate::{
9     generate,
10     parse::perform_edit,
11     test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
12     util,
13 };
14 use std::{fs, usize};
15 use tree_sitter::{allocations, LogType, Node, Parser, Tree};
16 
17 const EDIT_COUNT: usize = 3;
18 const TRIAL_COUNT: usize = 10;
19 
20 #[test]
test_bash_corpus()21 fn test_bash_corpus() {
22     test_language_corpus("bash");
23 }
24 
25 #[test]
test_c_corpus()26 fn test_c_corpus() {
27     test_language_corpus("c");
28 }
29 
30 #[test]
test_cpp_corpus()31 fn test_cpp_corpus() {
32     test_language_corpus("cpp");
33 }
34 
35 #[test]
test_embedded_template_corpus()36 fn test_embedded_template_corpus() {
37     test_language_corpus("embedded-template");
38 }
39 
40 #[test]
test_go_corpus()41 fn test_go_corpus() {
42     test_language_corpus("go");
43 }
44 
45 #[test]
test_html_corpus()46 fn test_html_corpus() {
47     test_language_corpus("html");
48 }
49 
50 #[test]
test_javascript_corpus()51 fn test_javascript_corpus() {
52     test_language_corpus("javascript");
53 }
54 
55 #[test]
test_json_corpus()56 fn test_json_corpus() {
57     test_language_corpus("json");
58 }
59 
60 #[test]
test_php_corpus()61 fn test_php_corpus() {
62     test_language_corpus("php");
63 }
64 
65 #[test]
test_python_corpus()66 fn test_python_corpus() {
67     test_language_corpus("python");
68 }
69 
70 #[test]
test_ruby_corpus()71 fn test_ruby_corpus() {
72     test_language_corpus("ruby");
73 }
74 
75 #[test]
test_rust_corpus()76 fn test_rust_corpus() {
77     test_language_corpus("rust");
78 }
79 
test_language_corpus(language_name: &str)80 fn test_language_corpus(language_name: &str) {
81     if let Some(language_filter) = LANGUAGE_FILTER.as_ref() {
82         if language_filter != language_name {
83             return;
84         }
85     }
86 
87     let grammars_dir = fixtures_dir().join("grammars");
88     let error_corpus_dir = fixtures_dir().join("error_corpus");
89 
90     let mut failure_count = 0;
91 
92     let language = get_language(language_name);
93     let mut corpus_dir = grammars_dir.join(language_name).join("corpus");
94     if !corpus_dir.is_dir() {
95         corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
96     }
97 
98     let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name));
99     let main_tests = parse_tests(&corpus_dir).unwrap();
100     let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default());
101     let mut tests = flatten_tests(main_tests);
102     tests.extend(flatten_tests(error_tests));
103 
104     for (example_name, input, expected_output, has_fields) in tests {
105         println!("  {} example - {}", language_name, example_name);
106 
107         let trial = 0;
108         if TRIAL_FILTER.map_or(true, |t| t == trial) {
109             let passed = allocations::record(|| {
110                 let mut log_session = None;
111                 let mut parser = get_parser(&mut log_session, "log.html");
112                 parser.set_language(language).unwrap();
113                 let tree = parser.parse(&input, None).unwrap();
114                 let mut actual_output = tree.root_node().to_sexp();
115                 if !has_fields {
116                     actual_output = strip_sexp_fields(actual_output);
117                 }
118                 if actual_output == expected_output {
119                     true
120                 } else {
121                     println!(
122                         "Incorrect initial parse for {} - {}",
123                         language_name, example_name,
124                     );
125                     print_diff_key();
126                     print_diff(&actual_output, &expected_output);
127                     println!("");
128                     false
129                 }
130             });
131 
132             if !passed {
133                 failure_count += 1;
134                 continue;
135             }
136         }
137 
138         let mut parser = Parser::new();
139         parser.set_language(language).unwrap();
140         let tree = parser.parse(&input, None).unwrap();
141         drop(parser);
142 
143         for trial in 1..=TRIAL_COUNT {
144             if TRIAL_FILTER.map_or(true, |filter| filter == trial) {
145                 let mut rand = Rand::new(*SEED + trial);
146 
147                 let passed = allocations::record(|| {
148                     let mut log_session = None;
149                     let mut parser = get_parser(&mut log_session, "log.html");
150                     parser.set_language(language).unwrap();
151                     let mut tree = tree.clone();
152                     let mut input = input.clone();
153 
154                     if *LOG_GRAPH_ENABLED {
155                         eprintln!("{}\n", String::from_utf8_lossy(&input));
156                     }
157 
158                     // Perform a random series of edits and reparse.
159                     let mut undo_stack = Vec::new();
160                     for _ in 0..EDIT_COUNT {
161                         let edit = get_random_edit(&mut rand, &input);
162                         undo_stack.push(invert_edit(&input, &edit));
163                         perform_edit(&mut tree, &mut input, &edit);
164                     }
165                     if *LOG_GRAPH_ENABLED {
166                         eprintln!("{}\n", String::from_utf8_lossy(&input));
167                     }
168 
169                     let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
170 
171                     // Check that the new tree is consistent.
172                     check_consistent_sizes(&tree2, &input);
173                     if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
174                         println!(
175                             "\nUnexpected scope change in trial {}\n{}\n\n",
176                             trial, message
177                         );
178                         return false;
179                     }
180 
181                     // Undo all of the edits and re-parse again.
182                     while let Some(edit) = undo_stack.pop() {
183                         perform_edit(&mut tree2, &mut input, &edit);
184                     }
185                     if *LOG_GRAPH_ENABLED {
186                         eprintln!("{}\n", String::from_utf8_lossy(&input));
187                     }
188 
189                     let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
190 
191                     // Verify that the final tree matches the expectation from the corpus.
192                     let mut actual_output = tree3.root_node().to_sexp();
193                     if !has_fields {
194                         actual_output = strip_sexp_fields(actual_output);
195                     }
196 
197                     if actual_output != expected_output {
198                         println!(
199                             "Incorrect parse for {} - {} - trial {}",
200                             language_name, example_name, trial
201                         );
202                         print_diff_key();
203                         print_diff(&actual_output, &expected_output);
204                         println!("");
205                         return false;
206                     }
207 
208                     // Check that the edited tree is consistent.
209                     check_consistent_sizes(&tree3, &input);
210                     if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
211                         eprintln!(
212                             "Unexpected scope change in trial {}\n{}\n\n",
213                             trial, message
214                         );
215                         return false;
216                     }
217 
218                     true
219                 });
220 
221                 if !passed {
222                     failure_count += 1;
223                     break;
224                 }
225             }
226         }
227     }
228 
229     if failure_count > 0 {
230         panic!("{} {} corpus tests failed", failure_count, language_name);
231     }
232 }
233 
234 #[test]
test_feature_corpus_files()235 fn test_feature_corpus_files() {
236     let test_grammars_dir = fixtures_dir().join("test_grammars");
237 
238     let mut failure_count = 0;
239     for entry in fs::read_dir(&test_grammars_dir).unwrap() {
240         let entry = entry.unwrap();
241         if !entry.metadata().unwrap().is_dir() {
242             continue;
243         }
244         let language_name = entry.file_name();
245         let language_name = language_name.to_str().unwrap();
246 
247         if let Some(filter) = LANGUAGE_FILTER.as_ref() {
248             if language_name != filter.as_str() {
249                 continue;
250             }
251         }
252 
253         let test_path = entry.path();
254         let mut grammar_path = test_path.join("grammar.js");
255         if !grammar_path.exists() {
256             grammar_path = test_path.join("grammar.json");
257         }
258         let error_message_path = test_path.join("expected_error.txt");
259         let grammar_json = generate::load_grammar_file(&grammar_path).unwrap();
260         let generate_result = generate::generate_parser_for_grammar(&grammar_json);
261 
262         if error_message_path.exists() {
263             if EXAMPLE_FILTER.is_some() {
264                 continue;
265             }
266 
267             eprintln!("test language: {:?}", language_name);
268 
269             let expected_message = fs::read_to_string(&error_message_path)
270                 .unwrap()
271                 .replace("\r\n", "\n");
272             if let Err(e) = generate_result {
273                 let actual_message = e.to_string().replace("\r\n", "\n");
274                 if expected_message != actual_message {
275                     eprintln!(
276                         "Unexpected error message.\n\nExpected:\n\n{}\nActual:\n\n{}\n",
277                         expected_message, actual_message
278                     );
279                     failure_count += 1;
280                 }
281             } else {
282                 eprintln!(
283                     "Expected error message but got none for test grammar '{}'",
284                     language_name
285                 );
286                 failure_count += 1;
287             }
288         } else {
289             if let Err(e) = &generate_result {
290                 eprintln!(
291                     "Unexpected error for test grammar '{}':\n{}",
292                     language_name, e
293                 );
294                 failure_count += 1;
295                 continue;
296             }
297 
298             let corpus_path = test_path.join("corpus.txt");
299             let c_code = generate_result.unwrap().1;
300             let language = get_test_language(language_name, &c_code, Some(&test_path));
301             let test = parse_tests(&corpus_path).unwrap();
302             let tests = flatten_tests(test);
303 
304             if !tests.is_empty() {
305                 eprintln!("test language: {:?}", language_name);
306             }
307 
308             for (name, input, expected_output, has_fields) in tests {
309                 eprintln!("  example: {:?}", name);
310 
311                 let passed = allocations::record(|| {
312                     let mut log_session = None;
313                     let mut parser = get_parser(&mut log_session, "log.html");
314                     parser.set_language(language).unwrap();
315                     let tree = parser.parse(&input, None).unwrap();
316                     let mut actual_output = tree.root_node().to_sexp();
317                     if !has_fields {
318                         actual_output = strip_sexp_fields(actual_output);
319                     }
320                     if actual_output == expected_output {
321                         true
322                     } else {
323                         print_diff_key();
324                         print_diff(&actual_output, &expected_output);
325                         println!("");
326                         false
327                     }
328                 });
329 
330                 if !passed {
331                     failure_count += 1;
332                     continue;
333                 }
334             }
335         }
336     }
337     if failure_count > 0 {
338         panic!("{} corpus tests failed", failure_count);
339     }
340 }
341 
check_consistent_sizes(tree: &Tree, input: &Vec<u8>)342 fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {
343     fn check(node: Node, line_offsets: &Vec<usize>) {
344         let start_byte = node.start_byte();
345         let end_byte = node.end_byte();
346         let start_point = node.start_position();
347         let end_point = node.end_position();
348 
349         assert!(start_byte <= end_byte);
350         assert!(start_point <= end_point);
351         assert_eq!(
352             start_byte,
353             line_offsets[start_point.row] + start_point.column
354         );
355         assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
356 
357         let mut last_child_end_byte = start_byte;
358         let mut last_child_end_point = start_point;
359         let mut some_child_has_changes = false;
360         let mut actual_named_child_count = 0;
361         for i in 0..node.child_count() {
362             let child = node.child(i).unwrap();
363             assert!(child.start_byte() >= last_child_end_byte);
364             assert!(child.start_position() >= last_child_end_point);
365             check(child, line_offsets);
366             if child.has_changes() {
367                 some_child_has_changes = true;
368             }
369             if child.is_named() {
370                 actual_named_child_count += 1;
371             }
372             last_child_end_byte = child.end_byte();
373             last_child_end_point = child.end_position();
374         }
375 
376         assert_eq!(actual_named_child_count, node.named_child_count());
377 
378         if node.child_count() > 0 {
379             assert!(end_byte >= last_child_end_byte);
380             assert!(end_point >= last_child_end_point);
381         }
382 
383         if some_child_has_changes {
384             assert!(node.has_changes());
385         }
386     }
387 
388     let mut line_offsets = vec![0];
389     for (i, c) in input.iter().enumerate() {
390         if *c == '\n' as u8 {
391             line_offsets.push(i + 1);
392         }
393     }
394 
395     check(tree.root_node(), &line_offsets);
396 }
397 
check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Result<(), String>398 fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Result<(), String> {
399     let changed_ranges = old_tree.changed_ranges(new_tree).collect();
400     let old_scope_sequence = ScopeSequence::new(old_tree);
401     let new_scope_sequence = ScopeSequence::new(new_tree);
402     old_scope_sequence.check_changes(&new_scope_sequence, &input, &changed_ranges)
403 }
404 
get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser405 fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
406     let mut parser = Parser::new();
407 
408     if *LOG_ENABLED {
409         parser.set_logger(Some(Box::new(|log_type, msg| {
410             if log_type == LogType::Lex {
411                 eprintln!("  {}", msg);
412             } else {
413                 eprintln!("{}", msg);
414             }
415         })));
416     } else if *LOG_GRAPH_ENABLED {
417         *session = Some(util::log_graphs(&mut parser, log_filename).unwrap());
418     }
419 
420     parser
421 }
422 
flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)>423 fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
424     fn helper(
425         test: TestEntry,
426         is_root: bool,
427         prefix: &str,
428         result: &mut Vec<(String, Vec<u8>, String, bool)>,
429     ) {
430         match test {
431             TestEntry::Example {
432                 mut name,
433                 input,
434                 output,
435                 has_fields,
436             } => {
437                 if !prefix.is_empty() {
438                     name.insert_str(0, " - ");
439                     name.insert_str(0, prefix);
440                 }
441                 if let Some(filter) = EXAMPLE_FILTER.as_ref() {
442                     if !name.contains(filter.as_str()) {
443                         return;
444                     }
445                 }
446                 result.push((name, input, output, has_fields));
447             }
448             TestEntry::Group {
449                 mut name, children, ..
450             } => {
451                 if !is_root && !prefix.is_empty() {
452                     name.insert_str(0, " - ");
453                     name.insert_str(0, prefix);
454                 }
455                 for child in children {
456                     helper(child, false, &name, result);
457                 }
458             }
459         }
460     }
461     let mut result = Vec::new();
462     helper(test, true, "", &mut result);
463     result
464 }
465