1import unittest
2
3from test import test_tools
4from typing import Dict, Set
5
6test_tools.skip_if_missing('peg_generator')
7with test_tools.imports_under_tool('peg_generator'):
8    from pegen.grammar_parser import GeneratedParser as GrammarParser
9    from pegen.testutil import parse_string
10    from pegen.first_sets import FirstSetCalculator
11    from pegen.grammar import Grammar
12
13
14class TestFirstSets(unittest.TestCase):
15    def calculate_first_sets(self, grammar_source: str) -> Dict[str, Set[str]]:
16        grammar: Grammar = parse_string(grammar_source, GrammarParser)
17        return FirstSetCalculator(grammar.rules).calculate()
18
19    def test_alternatives(self) -> None:
20        grammar = """
21            start: expr NEWLINE? ENDMARKER
22            expr: A | B
23            A: 'a' | '-'
24            B: 'b' | '+'
25        """
26        self.assertEqual(self.calculate_first_sets(grammar), {
27            "A": {"'a'", "'-'"},
28            "B": {"'+'", "'b'"},
29            "expr": {"'+'", "'a'", "'b'", "'-'"},
30            "start": {"'+'", "'a'", "'b'", "'-'"},
31        })
32
33    def test_optionals(self) -> None:
34        grammar = """
35            start: expr NEWLINE
36            expr: ['a'] ['b'] 'c'
37        """
38        self.assertEqual(self.calculate_first_sets(grammar), {
39            "expr": {"'c'", "'a'", "'b'"},
40            "start": {"'c'", "'a'", "'b'"},
41        })
42
43    def test_repeat_with_separator(self) -> None:
44        grammar = """
45        start: ','.thing+ NEWLINE
46        thing: NUMBER
47        """
48        self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
49
50    def test_optional_operator(self) -> None:
51        grammar = """
52        start: sum NEWLINE
53        sum: (term)? 'b'
54        term: NUMBER
55        """
56        self.assertEqual(self.calculate_first_sets(grammar), {
57            "term": {"NUMBER"},
58            "sum": {"NUMBER", "'b'"},
59            "start": {"'b'", "NUMBER"},
60        })
61
62    def test_optional_literal(self) -> None:
63        grammar = """
64        start: sum NEWLINE
65        sum: '+' ? term
66        term: NUMBER
67        """
68        self.assertEqual(self.calculate_first_sets(grammar), {
69            "term": {"NUMBER"},
70            "sum": {"'+'", "NUMBER"},
71            "start": {"'+'", "NUMBER"},
72        })
73
74    def test_optional_after(self) -> None:
75        grammar = """
76        start: term NEWLINE
77        term: NUMBER ['+']
78        """
79        self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}})
80
81    def test_optional_before(self) -> None:
82        grammar = """
83        start: term NEWLINE
84        term: ['+'] NUMBER
85        """
86        self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}})
87
88    def test_repeat_0(self) -> None:
89        grammar = """
90        start: thing* "+" NEWLINE
91        thing: NUMBER
92        """
93        self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}})
94
95    def test_repeat_0_with_group(self) -> None:
96        grammar = """
97        start: ('+' '-')* term NEWLINE
98        term: NUMBER
99        """
100        self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}})
101
102    def test_repeat_1(self) -> None:
103        grammar = """
104        start: thing+ '-' NEWLINE
105        thing: NUMBER
106        """
107        self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
108
109    def test_repeat_1_with_group(self) -> None:
110        grammar = """
111        start: ('+' term)+ term NEWLINE
112        term: NUMBER
113        """
114        self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}})
115
116    def test_gather(self) -> None:
117        grammar = """
118        start: ','.thing+ NEWLINE
119        thing: NUMBER
120        """
121        self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
122
123    def test_positive_lookahead(self) -> None:
124        grammar = """
125        start: expr NEWLINE
126        expr: &'a' opt
127        opt: 'a' | 'b' | 'c'
128        """
129        self.assertEqual(self.calculate_first_sets(grammar), {
130            "expr": {"'a'"},
131            "start": {"'a'"},
132            "opt": {"'b'", "'c'", "'a'"},
133        })
134
135    def test_negative_lookahead(self) -> None:
136        grammar = """
137        start: expr NEWLINE
138        expr: !'a' opt
139        opt: 'a' | 'b' | 'c'
140        """
141        self.assertEqual(self.calculate_first_sets(grammar), {
142            "opt": {"'b'", "'a'", "'c'"},
143            "expr": {"'b'", "'c'"},
144            "start": {"'b'", "'c'"},
145        })
146
147    def test_left_recursion(self) -> None:
148        grammar = """
149        start: expr NEWLINE
150        expr: ('-' term | expr '+' term | term)
151        term: NUMBER
152        foo: 'foo'
153        bar: 'bar'
154        baz: 'baz'
155        """
156        self.assertEqual(self.calculate_first_sets(grammar), {
157            "expr": {"NUMBER", "'-'"},
158            "term": {"NUMBER"},
159            "start": {"NUMBER", "'-'"},
160            "foo": {"'foo'"},
161            "bar": {"'bar'"},
162            "baz": {"'baz'"},
163        })
164
165    def test_advance_left_recursion(self) -> None:
166        grammar = """
167        start: NUMBER | sign start
168        sign: ['-']
169        """
170        self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}})
171
172    def test_mutual_left_recursion(self) -> None:
173        grammar = """
174        start: foo 'E'
175        foo: bar 'A' | 'B'
176        bar: foo 'C' | 'D'
177        """
178        self.assertEqual(self.calculate_first_sets(grammar), {
179            "foo": {"'D'", "'B'"},
180            "bar": {"'D'"},
181            "start": {"'D'", "'B'"},
182        })
183
184    def test_nasty_left_recursion(self) -> None:
185        # TODO: Validate this
186        grammar = """
187        start: target '='
188        target: maybe '+' | NAME
189        maybe: maybe '-' | target
190        """
191        self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}})
192
193    def test_nullable_rule(self) -> None:
194        grammar = """
195        start: sign thing $
196        sign: ['-']
197        thing: NUMBER
198        """
199        self.assertEqual(self.calculate_first_sets(grammar), {
200            "sign": {"", "'-'"},
201            "thing": {"NUMBER"},
202            "start": {"NUMBER", "'-'"},
203        })
204
205    def test_epsilon_production_in_start_rule(self) -> None:
206        grammar = """
207        start: ['-'] $
208        """
209        self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}})
210
211    def test_multiple_nullable_rules(self) -> None:
212        grammar = """
213        start: sign thing other another $
214        sign: ['-']
215        thing: ['+']
216        other: '*'
217        another: '/'
218        """
219        self.assertEqual(self.calculate_first_sets(grammar), {
220            "sign": {"", "'-'"},
221            "thing": {"'+'", ""},
222            "start": {"'+'", "'-'", "'*'"},
223            "other": {"'*'"},
224            "another": {"'/'"},
225        })
226