1 mod expand_repeats;
2 mod expand_tokens;
3 mod extract_default_aliases;
4 mod extract_tokens;
5 mod flatten_grammar;
6 mod intern_symbols;
7 mod process_inlines;
8 
9 pub(crate) use self::expand_tokens::expand_tokens;
10 
11 use self::expand_repeats::expand_repeats;
12 use self::extract_default_aliases::extract_default_aliases;
13 use self::extract_tokens::extract_tokens;
14 use self::flatten_grammar::flatten_grammar;
15 use self::intern_symbols::intern_symbols;
16 use self::process_inlines::process_inlines;
17 use super::grammars::{
18     ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry,
19     SyntaxGrammar, Variable,
20 };
21 use super::rules::{AliasMap, Precedence, Rule, Symbol};
22 use anyhow::{anyhow, Result};
23 use std::{
24     cmp::Ordering,
25     collections::{hash_map, HashMap, HashSet},
26     mem,
27 };
28 
29 pub(crate) struct IntermediateGrammar<T, U> {
30     variables: Vec<Variable>,
31     extra_symbols: Vec<T>,
32     expected_conflicts: Vec<Vec<Symbol>>,
33     precedence_orderings: Vec<Vec<PrecedenceEntry>>,
34     external_tokens: Vec<U>,
35     variables_to_inline: Vec<Symbol>,
36     supertype_symbols: Vec<Symbol>,
37     word_token: Option<Symbol>,
38 }
39 
40 pub(crate) type InternedGrammar = IntermediateGrammar<Rule, Variable>;
41 
42 pub(crate) type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
43 
44 #[derive(Debug, PartialEq, Eq)]
45 pub(crate) struct ExtractedLexicalGrammar {
46     pub variables: Vec<Variable>,
47     pub separators: Vec<Rule>,
48 }
49 
50 /// Transform an input grammar into separate components that are ready
51 /// for parse table construction.
prepare_grammar( input_grammar: &InputGrammar, ) -> Result<( SyntaxGrammar, LexicalGrammar, InlinedProductionMap, AliasMap, )>52 pub(crate) fn prepare_grammar(
53     input_grammar: &InputGrammar,
54 ) -> Result<(
55     SyntaxGrammar,
56     LexicalGrammar,
57     InlinedProductionMap,
58     AliasMap,
59 )> {
60     validate_precedences(input_grammar)?;
61 
62     let interned_grammar = intern_symbols(input_grammar)?;
63     let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
64     let syntax_grammar = expand_repeats(syntax_grammar);
65     let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
66     let lexical_grammar = expand_tokens(lexical_grammar)?;
67     let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
68     let inlines = process_inlines(&syntax_grammar, &lexical_grammar)?;
69     Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
70 }
71 
72 /// Check that all of the named precedences used in the grammar are declared
73 /// within the `precedences` lists, and also that there are no conflicting
74 /// precedence orderings declared in those lists.
validate_precedences(grammar: &InputGrammar) -> Result<()>75 fn validate_precedences(grammar: &InputGrammar) -> Result<()> {
76     // For any two precedence names `a` and `b`, if `a` comes before `b`
77     // in some list, then it cannot come *after* `b` in any list.
78     let mut pairs = HashMap::new();
79     for list in &grammar.precedence_orderings {
80         for (i, mut entry1) in list.iter().enumerate() {
81             for mut entry2 in list.iter().skip(i + 1) {
82                 if entry2 == entry1 {
83                     continue;
84                 }
85                 let mut ordering = Ordering::Greater;
86                 if entry1 > entry2 {
87                     ordering = Ordering::Less;
88                     mem::swap(&mut entry1, &mut entry2);
89                 }
90                 match pairs.entry((entry1, entry2)) {
91                     hash_map::Entry::Vacant(e) => {
92                         e.insert(ordering);
93                     }
94                     hash_map::Entry::Occupied(e) => {
95                         if e.get() != &ordering {
96                             return Err(anyhow!(
97                                 "Conflicting orderings for precedences {} and {}",
98                                 entry1,
99                                 entry2
100                             ));
101                         }
102                     }
103                 }
104             }
105         }
106     }
107 
108     // Check that no rule contains a named precedence that is not present in
109     // any of the `precedences` lists.
110     fn validate(rule_name: &str, rule: &Rule, names: &HashSet<&String>) -> Result<()> {
111         match rule {
112             Rule::Repeat(rule) => validate(rule_name, rule, names),
113             Rule::Seq(elements) | Rule::Choice(elements) => elements
114                 .iter()
115                 .map(|e| validate(rule_name, e, names))
116                 .collect(),
117             Rule::Metadata { rule, params } => {
118                 if let Precedence::Name(n) = &params.precedence {
119                     if !names.contains(n) {
120                         return Err(anyhow!(
121                             "Undeclared precedence '{}' in rule '{}'",
122                             n,
123                             rule_name
124                         ));
125                     }
126                 }
127                 validate(rule_name, rule, names)?;
128                 Ok(())
129             }
130             _ => Ok(()),
131         }
132     }
133 
134     let precedence_names = grammar
135         .precedence_orderings
136         .iter()
137         .flat_map(|l| l.iter())
138         .filter_map(|p| {
139             if let PrecedenceEntry::Name(n) = p {
140                 Some(n)
141             } else {
142                 None
143             }
144         })
145         .collect::<HashSet<&String>>();
146     for variable in &grammar.variables {
147         validate(&variable.name, &variable.rule, &precedence_names)?;
148     }
149 
150     Ok(())
151 }
152 
153 #[cfg(test)]
154 mod tests {
155     use super::*;
156     use crate::generate::grammars::{InputGrammar, Variable, VariableType};
157 
158     #[test]
test_validate_precedences_with_undeclared_precedence()159     fn test_validate_precedences_with_undeclared_precedence() {
160         let grammar = InputGrammar {
161             name: String::new(),
162             word_token: None,
163             extra_symbols: vec![],
164             external_tokens: vec![],
165             supertype_symbols: vec![],
166             expected_conflicts: vec![],
167             variables_to_inline: vec![],
168             precedence_orderings: vec![
169                 vec![
170                     PrecedenceEntry::Name("a".to_string()),
171                     PrecedenceEntry::Name("b".to_string()),
172                 ],
173                 vec![
174                     PrecedenceEntry::Name("b".to_string()),
175                     PrecedenceEntry::Name("c".to_string()),
176                     PrecedenceEntry::Name("d".to_string()),
177                 ],
178             ],
179             variables: vec![
180                 Variable {
181                     name: "v1".to_string(),
182                     kind: VariableType::Named,
183                     rule: Rule::Seq(vec![
184                         Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")),
185                         Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")),
186                     ]),
187                 },
188                 Variable {
189                     name: "v2".to_string(),
190                     kind: VariableType::Named,
191                     rule: Rule::repeat(Rule::Choice(vec![
192                         Rule::prec_left(Precedence::Name("omg".to_string()), Rule::string("y")),
193                         Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")),
194                     ])),
195                 },
196             ],
197         };
198 
199         let result = validate_precedences(&grammar);
200         assert_eq!(
201             result.unwrap_err().to_string(),
202             "Undeclared precedence 'omg' in rule 'v2'",
203         );
204     }
205 
206     #[test]
test_validate_precedences_with_conflicting_order()207     fn test_validate_precedences_with_conflicting_order() {
208         let grammar = InputGrammar {
209             name: String::new(),
210             word_token: None,
211             extra_symbols: vec![],
212             external_tokens: vec![],
213             supertype_symbols: vec![],
214             expected_conflicts: vec![],
215             variables_to_inline: vec![],
216             precedence_orderings: vec![
217                 vec![
218                     PrecedenceEntry::Name("a".to_string()),
219                     PrecedenceEntry::Name("b".to_string()),
220                 ],
221                 vec![
222                     PrecedenceEntry::Name("b".to_string()),
223                     PrecedenceEntry::Name("c".to_string()),
224                     PrecedenceEntry::Name("a".to_string()),
225                 ],
226             ],
227             variables: vec![
228                 Variable {
229                     name: "v1".to_string(),
230                     kind: VariableType::Named,
231                     rule: Rule::Seq(vec![
232                         Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")),
233                         Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")),
234                     ]),
235                 },
236                 Variable {
237                     name: "v2".to_string(),
238                     kind: VariableType::Named,
239                     rule: Rule::repeat(Rule::Choice(vec![
240                         Rule::prec_left(Precedence::Name("a".to_string()), Rule::string("y")),
241                         Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")),
242                     ])),
243                 },
244             ],
245         };
246 
247         let result = validate_precedences(&grammar);
248         assert_eq!(
249             result.unwrap_err().to_string(),
250             "Conflicting orderings for precedences 'a' and 'b'",
251         );
252     }
253 }
254