1 //! This module generates AST datatype used by rust-analyzer.
2 //!
3 //! Specifically, it generates the `SyntaxKind` enum and a number of newtype
4 //! wrappers around `SyntaxNode` which implement `syntax::AstNode`.
5 
6 use std::{
7     collections::{BTreeSet, HashSet},
8     fmt::Write,
9 };
10 
11 use itertools::Itertools;
12 use proc_macro2::{Punct, Spacing};
13 use quote::{format_ident, quote};
14 use ungrammar::{rust_grammar, Grammar, Rule};
15 
16 use crate::tests::ast_src::{
17     AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc, KINDS_SRC,
18 };
19 
20 #[test]
sourcegen_ast()21 fn sourcegen_ast() {
22     let syntax_kinds = generate_syntax_kinds(KINDS_SRC);
23     let syntax_kinds_file =
24         sourcegen::project_root().join("crates/parser/src/syntax_kind/generated.rs");
25     sourcegen::ensure_file_contents(syntax_kinds_file.as_path(), &syntax_kinds);
26 
27     let grammar = rust_grammar();
28     let ast = lower(&grammar);
29 
30     let ast_tokens = generate_tokens(&ast);
31     let ast_tokens_file =
32         sourcegen::project_root().join("crates/syntax/src/ast/generated/tokens.rs");
33     sourcegen::ensure_file_contents(ast_tokens_file.as_path(), &ast_tokens);
34 
35     let ast_nodes = generate_nodes(KINDS_SRC, &ast);
36     let ast_nodes_file = sourcegen::project_root().join("crates/syntax/src/ast/generated/nodes.rs");
37     sourcegen::ensure_file_contents(ast_nodes_file.as_path(), &ast_nodes);
38 }
39 
generate_tokens(grammar: &AstSrc) -> String40 fn generate_tokens(grammar: &AstSrc) -> String {
41     let tokens = grammar.tokens.iter().map(|token| {
42         let name = format_ident!("{}", token);
43         let kind = format_ident!("{}", to_upper_snake_case(token));
44         quote! {
45             #[derive(Debug, Clone, PartialEq, Eq, Hash)]
46             pub struct #name {
47                 pub(crate) syntax: SyntaxToken,
48             }
49             impl std::fmt::Display for #name {
50                 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51                     std::fmt::Display::fmt(&self.syntax, f)
52                 }
53             }
54             impl AstToken for #name {
55                 fn can_cast(kind: SyntaxKind) -> bool { kind == #kind }
56                 fn cast(syntax: SyntaxToken) -> Option<Self> {
57                     if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
58                 }
59                 fn syntax(&self) -> &SyntaxToken { &self.syntax }
60             }
61         }
62     });
63 
64     sourcegen::add_preamble(
65         "sourcegen_ast",
66         sourcegen::reformat(
67             quote! {
68                 use crate::{SyntaxKind::{self, *}, SyntaxToken, ast::AstToken};
69                 #(#tokens)*
70             }
71             .to_string(),
72         ),
73     )
74     .replace("#[derive", "\n#[derive")
75 }
76 
generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> String77 fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> String {
78     let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
79         .nodes
80         .iter()
81         .map(|node| {
82             let name = format_ident!("{}", node.name);
83             let kind = format_ident!("{}", to_upper_snake_case(&node.name));
84             let traits = node
85                 .traits
86                 .iter()
87                 .filter(|trait_name| {
88                     // For loops have two expressions so this might collide, therefor manual impl it
89                     node.name != "ForExpr" || trait_name.as_str() != "HasLoopBody"
90                 })
91                 .map(|trait_name| {
92                     let trait_name = format_ident!("{}", trait_name);
93                     quote!(impl ast::#trait_name for #name {})
94                 });
95 
96             let methods = node.fields.iter().map(|field| {
97                 let method_name = field.method_name();
98                 let ty = field.ty();
99 
100                 if field.is_many() {
101                     quote! {
102                         pub fn #method_name(&self) -> AstChildren<#ty> {
103                             support::children(&self.syntax)
104                         }
105                     }
106                 } else if let Some(token_kind) = field.token_kind() {
107                     quote! {
108                         pub fn #method_name(&self) -> Option<#ty> {
109                             support::token(&self.syntax, #token_kind)
110                         }
111                     }
112                 } else {
113                     quote! {
114                         pub fn #method_name(&self) -> Option<#ty> {
115                             support::child(&self.syntax)
116                         }
117                     }
118                 }
119             });
120             (
121                 quote! {
122                     #[pretty_doc_comment_placeholder_workaround]
123                     #[derive(Debug, Clone, PartialEq, Eq, Hash)]
124                     pub struct #name {
125                         pub(crate) syntax: SyntaxNode,
126                     }
127 
128                     #(#traits)*
129 
130                     impl #name {
131                         #(#methods)*
132                     }
133                 },
134                 quote! {
135                     impl AstNode for #name {
136                         fn can_cast(kind: SyntaxKind) -> bool {
137                             kind == #kind
138                         }
139                         fn cast(syntax: SyntaxNode) -> Option<Self> {
140                             if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
141                         }
142                         fn syntax(&self) -> &SyntaxNode { &self.syntax }
143                     }
144                 },
145             )
146         })
147         .unzip();
148 
149     let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
150         .enums
151         .iter()
152         .map(|en| {
153             let variants: Vec<_> = en.variants.iter().map(|var| format_ident!("{}", var)).collect();
154             let name = format_ident!("{}", en.name);
155             let kinds: Vec<_> = variants
156                 .iter()
157                 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))
158                 .collect();
159             let traits = en.traits.iter().map(|trait_name| {
160                 let trait_name = format_ident!("{}", trait_name);
161                 quote!(impl ast::#trait_name for #name {})
162             });
163 
164             let ast_node = if en.name == "Stmt" {
165                 quote! {}
166             } else {
167                 quote! {
168                     impl AstNode for #name {
169                         fn can_cast(kind: SyntaxKind) -> bool {
170                             match kind {
171                                 #(#kinds)|* => true,
172                                 _ => false,
173                             }
174                         }
175                         fn cast(syntax: SyntaxNode) -> Option<Self> {
176                             let res = match syntax.kind() {
177                                 #(
178                                 #kinds => #name::#variants(#variants { syntax }),
179                                 )*
180                                 _ => return None,
181                             };
182                             Some(res)
183                         }
184                         fn syntax(&self) -> &SyntaxNode {
185                             match self {
186                                 #(
187                                 #name::#variants(it) => &it.syntax,
188                                 )*
189                             }
190                         }
191                     }
192                 }
193             };
194 
195             (
196                 quote! {
197                     #[pretty_doc_comment_placeholder_workaround]
198                     #[derive(Debug, Clone, PartialEq, Eq, Hash)]
199                     pub enum #name {
200                         #(#variants(#variants),)*
201                     }
202 
203                     #(#traits)*
204                 },
205                 quote! {
206                     #(
207                         impl From<#variants> for #name {
208                             fn from(node: #variants) -> #name {
209                                 #name::#variants(node)
210                             }
211                         }
212                     )*
213                     #ast_node
214                 },
215             )
216         })
217         .unzip();
218 
219     let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
220         .nodes
221         .iter()
222         .flat_map(|node| node.traits.iter().map(move |t| (t, node)))
223         .into_group_map()
224         .into_iter()
225         .sorted_by_key(|(k, _)| *k)
226         .map(|(trait_name, nodes)| {
227             let name = format_ident!("Any{}", trait_name);
228             let trait_name = format_ident!("{}", trait_name);
229             let kinds: Vec<_> = nodes
230                 .iter()
231                 .map(|name| format_ident!("{}", to_upper_snake_case(&name.name.to_string())))
232                 .collect();
233 
234             (
235                 quote! {
236                     #[pretty_doc_comment_placeholder_workaround]
237                     #[derive(Debug, Clone, PartialEq, Eq, Hash)]
238                     pub struct #name {
239                         pub(crate) syntax: SyntaxNode,
240                     }
241                     impl ast::#trait_name for #name {}
242                 },
243                 quote! {
244                     impl #name {
245                         #[inline]
246                         pub fn new<T: ast::#trait_name>(node: T) -> #name {
247                             #name {
248                                 syntax: node.syntax().clone()
249                             }
250                         }
251                     }
252                     impl AstNode for #name {
253                         fn can_cast(kind: SyntaxKind) -> bool {
254                             match kind {
255                                 #(#kinds)|* => true,
256                                 _ => false,
257                             }
258                         }
259                         fn cast(syntax: SyntaxNode) -> Option<Self> {
260                             Self::can_cast(syntax.kind()).then(|| #name { syntax })
261                         }
262                         fn syntax(&self) -> &SyntaxNode {
263                             &self.syntax
264                         }
265                     }
266                 },
267             )
268         })
269         .unzip();
270 
271     let enum_names = grammar.enums.iter().map(|it| &it.name);
272     let node_names = grammar.nodes.iter().map(|it| &it.name);
273 
274     let display_impls =
275         enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| {
276             quote! {
277                 impl std::fmt::Display for #name {
278                     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
279                         std::fmt::Display::fmt(self.syntax(), f)
280                     }
281                 }
282             }
283         });
284 
285     let defined_nodes: HashSet<_> = node_names.collect();
286 
287     for node in kinds
288         .nodes
289         .iter()
290         .map(|kind| to_pascal_case(kind))
291         .filter(|name| !defined_nodes.iter().any(|&it| it == name))
292     {
293         drop(node)
294         // FIXME: restore this
295         // eprintln!("Warning: node {} not defined in ast source", node);
296     }
297 
298     let ast = quote! {
299         use crate::{
300             SyntaxNode, SyntaxToken, SyntaxKind::{self, *},
301             ast::{self, AstNode, AstChildren, support},
302             T,
303         };
304 
305         #(#node_defs)*
306         #(#enum_defs)*
307         #(#any_node_defs)*
308         #(#node_boilerplate_impls)*
309         #(#enum_boilerplate_impls)*
310         #(#any_node_boilerplate_impls)*
311         #(#display_impls)*
312     };
313 
314     let ast = ast.to_string().replace("T ! [", "T![");
315 
316     let mut res = String::with_capacity(ast.len() * 2);
317 
318     let mut docs =
319         grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc));
320 
321     for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {
322         res.push_str(chunk);
323         if let Some(doc) = docs.next() {
324             write_doc_comment(doc, &mut res);
325         }
326     }
327 
328     let res = sourcegen::add_preamble("sourcegen_ast", sourcegen::reformat(res));
329     res.replace("#[derive", "\n#[derive")
330 }
331 
write_doc_comment(contents: &[String], dest: &mut String)332 fn write_doc_comment(contents: &[String], dest: &mut String) {
333     for line in contents {
334         writeln!(dest, "///{}", line).unwrap();
335     }
336 }
337 
generate_syntax_kinds(grammar: KindsSrc<'_>) -> String338 fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> String {
339     let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
340         .punct
341         .iter()
342         .filter(|(token, _name)| token.len() == 1)
343         .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name)))
344         .unzip();
345 
346     let punctuation_values = grammar.punct.iter().map(|(token, _name)| {
347         if "{}[]()".contains(token) {
348             let c = token.chars().next().unwrap();
349             quote! { #c }
350         } else {
351             let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));
352             quote! { #(#cs)* }
353         }
354     });
355     let punctuation =
356         grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>();
357 
358     let full_keywords_values = &grammar.keywords;
359     let full_keywords =
360         full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(kw)));
361 
362     let contextual_keywords_values = &grammar.contextual_keywords;
363     let contextual_keywords =
364         contextual_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(kw)));
365 
366     let all_keywords_values =
367         grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::<Vec<_>>();
368     let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw));
369     let all_keywords = all_keywords_values
370         .iter()
371         .map(|name| format_ident!("{}_KW", to_upper_snake_case(name)))
372         .collect::<Vec<_>>();
373 
374     let literals =
375         grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
376 
377     let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
378 
379     let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
380 
381     let ast = quote! {
382         #![allow(bad_style, missing_docs, unreachable_pub)]
383         /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.
384         #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
385         #[repr(u16)]
386         pub enum SyntaxKind {
387             // Technical SyntaxKinds: they appear temporally during parsing,
388             // but never end up in the final tree
389             #[doc(hidden)]
390             TOMBSTONE,
391             #[doc(hidden)]
392             EOF,
393             #(#punctuation,)*
394             #(#all_keywords,)*
395             #(#literals,)*
396             #(#tokens,)*
397             #(#nodes,)*
398 
399             // Technical kind so that we can cast from u16 safely
400             #[doc(hidden)]
401             __LAST,
402         }
403         use self::SyntaxKind::*;
404 
405         impl SyntaxKind {
406             pub fn is_keyword(self) -> bool {
407                 match self {
408                     #(#all_keywords)|* => true,
409                     _ => false,
410                 }
411             }
412 
413             pub fn is_punct(self) -> bool {
414                 match self {
415                     #(#punctuation)|* => true,
416                     _ => false,
417                 }
418             }
419 
420             pub fn is_literal(self) -> bool {
421                 match self {
422                     #(#literals)|* => true,
423                     _ => false,
424                 }
425             }
426 
427             pub fn from_keyword(ident: &str) -> Option<SyntaxKind> {
428                 let kw = match ident {
429                     #(#full_keywords_values => #full_keywords,)*
430                     _ => return None,
431                 };
432                 Some(kw)
433             }
434 
435             pub fn from_contextual_keyword(ident: &str) -> Option<SyntaxKind> {
436                 let kw = match ident {
437                     #(#contextual_keywords_values => #contextual_keywords,)*
438                     _ => return None,
439                 };
440                 Some(kw)
441             }
442 
443             pub fn from_char(c: char) -> Option<SyntaxKind> {
444                 let tok = match c {
445                     #(#single_byte_tokens_values => #single_byte_tokens,)*
446                     _ => return None,
447                 };
448                 Some(tok)
449             }
450         }
451 
452         #[macro_export]
453         macro_rules! T {
454             #([#punctuation_values] => { $crate::SyntaxKind::#punctuation };)*
455             #([#all_keywords_idents] => { $crate::SyntaxKind::#all_keywords };)*
456             [lifetime_ident] => { $crate::SyntaxKind::LIFETIME_IDENT };
457             [ident] => { $crate::SyntaxKind::IDENT };
458             [shebang] => { $crate::SyntaxKind::SHEBANG };
459         }
460         pub use T;
461     };
462 
463     sourcegen::add_preamble("sourcegen_ast", sourcegen::reformat(ast.to_string()))
464 }
465 
to_upper_snake_case(s: &str) -> String466 fn to_upper_snake_case(s: &str) -> String {
467     let mut buf = String::with_capacity(s.len());
468     let mut prev = false;
469     for c in s.chars() {
470         if c.is_ascii_uppercase() && prev {
471             buf.push('_')
472         }
473         prev = true;
474 
475         buf.push(c.to_ascii_uppercase());
476     }
477     buf
478 }
479 
to_lower_snake_case(s: &str) -> String480 fn to_lower_snake_case(s: &str) -> String {
481     let mut buf = String::with_capacity(s.len());
482     let mut prev = false;
483     for c in s.chars() {
484         if c.is_ascii_uppercase() && prev {
485             buf.push('_')
486         }
487         prev = true;
488 
489         buf.push(c.to_ascii_lowercase());
490     }
491     buf
492 }
493 
to_pascal_case(s: &str) -> String494 fn to_pascal_case(s: &str) -> String {
495     let mut buf = String::with_capacity(s.len());
496     let mut prev_is_underscore = true;
497     for c in s.chars() {
498         if c == '_' {
499             prev_is_underscore = true;
500         } else if prev_is_underscore {
501             buf.push(c.to_ascii_uppercase());
502             prev_is_underscore = false;
503         } else {
504             buf.push(c.to_ascii_lowercase());
505         }
506     }
507     buf
508 }
509 
pluralize(s: &str) -> String510 fn pluralize(s: &str) -> String {
511     format!("{}s", s)
512 }
513 
514 impl Field {
is_many(&self) -> bool515     fn is_many(&self) -> bool {
516         matches!(self, Field::Node { cardinality: Cardinality::Many, .. })
517     }
token_kind(&self) -> Option<proc_macro2::TokenStream>518     fn token_kind(&self) -> Option<proc_macro2::TokenStream> {
519         match self {
520             Field::Token(token) => {
521                 let token: proc_macro2::TokenStream = token.parse().unwrap();
522                 Some(quote! { T![#token] })
523             }
524             _ => None,
525         }
526     }
method_name(&self) -> proc_macro2::Ident527     fn method_name(&self) -> proc_macro2::Ident {
528         match self {
529             Field::Token(name) => {
530                 let name = match name.as_str() {
531                     ";" => "semicolon",
532                     "->" => "thin_arrow",
533                     "'{'" => "l_curly",
534                     "'}'" => "r_curly",
535                     "'('" => "l_paren",
536                     "')'" => "r_paren",
537                     "'['" => "l_brack",
538                     "']'" => "r_brack",
539                     "<" => "l_angle",
540                     ">" => "r_angle",
541                     "=" => "eq",
542                     "!" => "excl",
543                     "*" => "star",
544                     "&" => "amp",
545                     "_" => "underscore",
546                     "." => "dot",
547                     ".." => "dotdot",
548                     "..." => "dotdotdot",
549                     "..=" => "dotdoteq",
550                     "=>" => "fat_arrow",
551                     "@" => "at",
552                     ":" => "colon",
553                     "::" => "coloncolon",
554                     "#" => "pound",
555                     "?" => "question_mark",
556                     "," => "comma",
557                     "|" => "pipe",
558                     "~" => "tilde",
559                     _ => name,
560                 };
561                 format_ident!("{}_token", name)
562             }
563             Field::Node { name, .. } => {
564                 if name == "type" {
565                     format_ident!("ty")
566                 } else {
567                     format_ident!("{}", name)
568                 }
569             }
570         }
571     }
ty(&self) -> proc_macro2::Ident572     fn ty(&self) -> proc_macro2::Ident {
573         match self {
574             Field::Token(_) => format_ident!("SyntaxToken"),
575             Field::Node { ty, .. } => format_ident!("{}", ty),
576         }
577     }
578 }
579 
lower(grammar: &Grammar) -> AstSrc580 fn lower(grammar: &Grammar) -> AstSrc {
581     let mut res = AstSrc {
582         tokens: "Whitespace Comment String ByteString IntNumber FloatNumber Ident"
583             .split_ascii_whitespace()
584             .map(|it| it.to_string())
585             .collect::<Vec<_>>(),
586         ..Default::default()
587     };
588 
589     let nodes = grammar.iter().collect::<Vec<_>>();
590 
591     for &node in &nodes {
592         let name = grammar[node].name.clone();
593         let rule = &grammar[node].rule;
594         match lower_enum(grammar, rule) {
595             Some(variants) => {
596                 let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants };
597                 res.enums.push(enum_src);
598             }
599             None => {
600                 let mut fields = Vec::new();
601                 lower_rule(&mut fields, grammar, None, rule);
602                 res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields });
603             }
604         }
605     }
606 
607     deduplicate_fields(&mut res);
608     extract_enums(&mut res);
609     extract_struct_traits(&mut res);
610     extract_enum_traits(&mut res);
611     res
612 }
613 
lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>>614 fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> {
615     let alternatives = match rule {
616         Rule::Alt(it) => it,
617         _ => return None,
618     };
619     let mut variants = Vec::new();
620     for alternative in alternatives {
621         match alternative {
622             Rule::Node(it) => variants.push(grammar[*it].name.clone()),
623             Rule::Token(it) if grammar[*it].name == ";" => (),
624             _ => return None,
625         }
626     }
627     Some(variants)
628 }
629 
lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, label: Option<&String>, rule: &Rule)630 fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, label: Option<&String>, rule: &Rule) {
631     if lower_comma_list(acc, grammar, label, rule) {
632         return;
633     }
634 
635     match rule {
636         Rule::Node(node) => {
637             let ty = grammar[*node].name.clone();
638             let name = label.cloned().unwrap_or_else(|| to_lower_snake_case(&ty));
639             let field = Field::Node { name, ty, cardinality: Cardinality::Optional };
640             acc.push(field);
641         }
642         Rule::Token(token) => {
643             assert!(label.is_none());
644             let mut name = grammar[*token].name.clone();
645             if name != "int_number" && name != "string" {
646                 if "[]{}()".contains(&name) {
647                     name = format!("'{}'", name);
648                 }
649                 let field = Field::Token(name);
650                 acc.push(field);
651             }
652         }
653         Rule::Rep(inner) => {
654             if let Rule::Node(node) = &**inner {
655                 let ty = grammar[*node].name.clone();
656                 let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
657                 let field = Field::Node { name, ty, cardinality: Cardinality::Many };
658                 acc.push(field);
659                 return;
660             }
661             panic!("unhandled rule: {:?}", rule)
662         }
663         Rule::Labeled { label: l, rule } => {
664             assert!(label.is_none());
665             let manually_implemented = matches!(
666                 l.as_str(),
667                 "lhs"
668                     | "rhs"
669                     | "then_branch"
670                     | "else_branch"
671                     | "start"
672                     | "end"
673                     | "op"
674                     | "index"
675                     | "base"
676                     | "value"
677                     | "trait"
678                     | "self_ty"
679             );
680             if manually_implemented {
681                 return;
682             }
683             lower_rule(acc, grammar, Some(l), rule);
684         }
685         Rule::Seq(rules) | Rule::Alt(rules) => {
686             for rule in rules {
687                 lower_rule(acc, grammar, label, rule)
688             }
689         }
690         Rule::Opt(rule) => lower_rule(acc, grammar, label, rule),
691     }
692 }
693 
694 // (T (',' T)* ','?)
lower_comma_list( acc: &mut Vec<Field>, grammar: &Grammar, label: Option<&String>, rule: &Rule, ) -> bool695 fn lower_comma_list(
696     acc: &mut Vec<Field>,
697     grammar: &Grammar,
698     label: Option<&String>,
699     rule: &Rule,
700 ) -> bool {
701     let rule = match rule {
702         Rule::Seq(it) => it,
703         _ => return false,
704     };
705     let (node, repeat, trailing_comma) = match rule.as_slice() {
706         [Rule::Node(node), Rule::Rep(repeat), Rule::Opt(trailing_comma)] => {
707             (node, repeat, trailing_comma)
708         }
709         _ => return false,
710     };
711     let repeat = match &**repeat {
712         Rule::Seq(it) => it,
713         _ => return false,
714     };
715     match repeat.as_slice() {
716         [comma, Rule::Node(n)] if comma == &**trailing_comma && n == node => (),
717         _ => return false,
718     }
719     let ty = grammar[*node].name.clone();
720     let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
721     let field = Field::Node { name, ty, cardinality: Cardinality::Many };
722     acc.push(field);
723     true
724 }
725 
deduplicate_fields(ast: &mut AstSrc)726 fn deduplicate_fields(ast: &mut AstSrc) {
727     for node in &mut ast.nodes {
728         let mut i = 0;
729         'outer: while i < node.fields.len() {
730             for j in 0..i {
731                 let f1 = &node.fields[i];
732                 let f2 = &node.fields[j];
733                 if f1 == f2 {
734                     node.fields.remove(i);
735                     continue 'outer;
736                 }
737             }
738             i += 1;
739         }
740     }
741 }
742 
extract_enums(ast: &mut AstSrc)743 fn extract_enums(ast: &mut AstSrc) {
744     for node in &mut ast.nodes {
745         for enm in &ast.enums {
746             let mut to_remove = Vec::new();
747             for (i, field) in node.fields.iter().enumerate() {
748                 let ty = field.ty().to_string();
749                 if enm.variants.iter().any(|it| it == &ty) {
750                     to_remove.push(i);
751                 }
752             }
753             if to_remove.len() == enm.variants.len() {
754                 node.remove_field(to_remove);
755                 let ty = enm.name.clone();
756                 let name = to_lower_snake_case(&ty);
757                 node.fields.push(Field::Node { name, ty, cardinality: Cardinality::Optional });
758             }
759         }
760     }
761 }
762 
extract_struct_traits(ast: &mut AstSrc)763 fn extract_struct_traits(ast: &mut AstSrc) {
764     let traits: &[(&str, &[&str])] = &[
765         ("HasAttrs", &["attrs"]),
766         ("HasName", &["name"]),
767         ("HasVisibility", &["visibility"]),
768         ("HasGenericParams", &["generic_param_list", "where_clause"]),
769         ("HasTypeBounds", &["type_bound_list", "colon_token"]),
770         ("HasModuleItem", &["items"]),
771         ("HasLoopBody", &["label", "loop_body"]),
772         ("HasArgList", &["arg_list"]),
773     ];
774 
775     for node in &mut ast.nodes {
776         for (name, methods) in traits {
777             extract_struct_trait(node, name, methods);
778         }
779     }
780 }
781 
extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str])782 fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
783     let mut to_remove = Vec::new();
784     for (i, field) in node.fields.iter().enumerate() {
785         let method_name = field.method_name().to_string();
786         if methods.iter().any(|&it| it == method_name) {
787             to_remove.push(i);
788         }
789     }
790     if to_remove.len() == methods.len() {
791         node.traits.push(trait_name.to_string());
792         node.remove_field(to_remove);
793     }
794 }
795 
extract_enum_traits(ast: &mut AstSrc)796 fn extract_enum_traits(ast: &mut AstSrc) {
797     for enm in &mut ast.enums {
798         if enm.name == "Stmt" {
799             continue;
800         }
801         let nodes = &ast.nodes;
802         let mut variant_traits = enm
803             .variants
804             .iter()
805             .map(|var| nodes.iter().find(|it| &it.name == var).unwrap())
806             .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>());
807 
808         let mut enum_traits = match variant_traits.next() {
809             Some(it) => it,
810             None => continue,
811         };
812         for traits in variant_traits {
813             enum_traits = enum_traits.intersection(&traits).cloned().collect();
814         }
815         enm.traits = enum_traits.into_iter().collect();
816     }
817 }
818 
819 impl AstNodeSrc {
remove_field(&mut self, to_remove: Vec<usize>)820     fn remove_field(&mut self, to_remove: Vec<usize>) {
821         to_remove.into_iter().rev().for_each(|idx| {
822             self.fields.remove(idx);
823         });
824     }
825 }
826