1import unittest 2 3from test import test_tools 4from typing import Dict, Set 5 6test_tools.skip_if_missing('peg_generator') 7with test_tools.imports_under_tool('peg_generator'): 8 from pegen.grammar_parser import GeneratedParser as GrammarParser 9 from pegen.testutil import parse_string 10 from pegen.first_sets import FirstSetCalculator 11 from pegen.grammar import Grammar 12 13 14class TestFirstSets(unittest.TestCase): 15 def calculate_first_sets(self, grammar_source: str) -> Dict[str, Set[str]]: 16 grammar: Grammar = parse_string(grammar_source, GrammarParser) 17 return FirstSetCalculator(grammar.rules).calculate() 18 19 def test_alternatives(self) -> None: 20 grammar = """ 21 start: expr NEWLINE? ENDMARKER 22 expr: A | B 23 A: 'a' | '-' 24 B: 'b' | '+' 25 """ 26 self.assertEqual(self.calculate_first_sets(grammar), { 27 "A": {"'a'", "'-'"}, 28 "B": {"'+'", "'b'"}, 29 "expr": {"'+'", "'a'", "'b'", "'-'"}, 30 "start": {"'+'", "'a'", "'b'", "'-'"}, 31 }) 32 33 def test_optionals(self) -> None: 34 grammar = """ 35 start: expr NEWLINE 36 expr: ['a'] ['b'] 'c' 37 """ 38 self.assertEqual(self.calculate_first_sets(grammar), { 39 "expr": {"'c'", "'a'", "'b'"}, 40 "start": {"'c'", "'a'", "'b'"}, 41 }) 42 43 def test_repeat_with_separator(self) -> None: 44 grammar = """ 45 start: ','.thing+ NEWLINE 46 thing: NUMBER 47 """ 48 self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}}) 49 50 def test_optional_operator(self) -> None: 51 grammar = """ 52 start: sum NEWLINE 53 sum: (term)? 'b' 54 term: NUMBER 55 """ 56 self.assertEqual(self.calculate_first_sets(grammar), { 57 "term": {"NUMBER"}, 58 "sum": {"NUMBER", "'b'"}, 59 "start": {"'b'", "NUMBER"}, 60 }) 61 62 def test_optional_literal(self) -> None: 63 grammar = """ 64 start: sum NEWLINE 65 sum: '+' ? term 66 term: NUMBER 67 """ 68 self.assertEqual(self.calculate_first_sets(grammar), { 69 "term": {"NUMBER"}, 70 "sum": {"'+'", "NUMBER"}, 71 "start": {"'+'", "NUMBER"}, 72 }) 73 74 def test_optional_after(self) -> None: 75 grammar = """ 76 start: term NEWLINE 77 term: NUMBER ['+'] 78 """ 79 self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}}) 80 81 def test_optional_before(self) -> None: 82 grammar = """ 83 start: term NEWLINE 84 term: ['+'] NUMBER 85 """ 86 self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}}) 87 88 def test_repeat_0(self) -> None: 89 grammar = """ 90 start: thing* "+" NEWLINE 91 thing: NUMBER 92 """ 93 self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}}) 94 95 def test_repeat_0_with_group(self) -> None: 96 grammar = """ 97 start: ('+' '-')* term NEWLINE 98 term: NUMBER 99 """ 100 self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}}) 101 102 def test_repeat_1(self) -> None: 103 grammar = """ 104 start: thing+ '-' NEWLINE 105 thing: NUMBER 106 """ 107 self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}}) 108 109 def test_repeat_1_with_group(self) -> None: 110 grammar = """ 111 start: ('+' term)+ term NEWLINE 112 term: NUMBER 113 """ 114 self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}}) 115 116 def test_gather(self) -> None: 117 grammar = """ 118 start: ','.thing+ NEWLINE 119 thing: NUMBER 120 """ 121 self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}}) 122 123 def test_positive_lookahead(self) -> None: 124 grammar = """ 125 start: expr NEWLINE 126 expr: &'a' opt 127 opt: 'a' | 'b' | 'c' 128 """ 129 self.assertEqual(self.calculate_first_sets(grammar), { 130 "expr": {"'a'"}, 131 "start": {"'a'"}, 132 "opt": {"'b'", "'c'", "'a'"}, 133 }) 134 135 def test_negative_lookahead(self) -> None: 136 grammar = """ 137 start: expr NEWLINE 138 expr: !'a' opt 139 opt: 'a' | 'b' | 'c' 140 """ 141 self.assertEqual(self.calculate_first_sets(grammar), { 142 "opt": {"'b'", "'a'", "'c'"}, 143 "expr": {"'b'", "'c'"}, 144 "start": {"'b'", "'c'"}, 145 }) 146 147 def test_left_recursion(self) -> None: 148 grammar = """ 149 start: expr NEWLINE 150 expr: ('-' term | expr '+' term | term) 151 term: NUMBER 152 foo: 'foo' 153 bar: 'bar' 154 baz: 'baz' 155 """ 156 self.assertEqual(self.calculate_first_sets(grammar), { 157 "expr": {"NUMBER", "'-'"}, 158 "term": {"NUMBER"}, 159 "start": {"NUMBER", "'-'"}, 160 "foo": {"'foo'"}, 161 "bar": {"'bar'"}, 162 "baz": {"'baz'"}, 163 }) 164 165 def test_advance_left_recursion(self) -> None: 166 grammar = """ 167 start: NUMBER | sign start 168 sign: ['-'] 169 """ 170 self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}}) 171 172 def test_mutual_left_recursion(self) -> None: 173 grammar = """ 174 start: foo 'E' 175 foo: bar 'A' | 'B' 176 bar: foo 'C' | 'D' 177 """ 178 self.assertEqual(self.calculate_first_sets(grammar), { 179 "foo": {"'D'", "'B'"}, 180 "bar": {"'D'"}, 181 "start": {"'D'", "'B'"}, 182 }) 183 184 def test_nasty_left_recursion(self) -> None: 185 # TODO: Validate this 186 grammar = """ 187 start: target '=' 188 target: maybe '+' | NAME 189 maybe: maybe '-' | target 190 """ 191 self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}}) 192 193 def test_nullable_rule(self) -> None: 194 grammar = """ 195 start: sign thing $ 196 sign: ['-'] 197 thing: NUMBER 198 """ 199 self.assertEqual(self.calculate_first_sets(grammar), { 200 "sign": {"", "'-'"}, 201 "thing": {"NUMBER"}, 202 "start": {"NUMBER", "'-'"}, 203 }) 204 205 def test_epsilon_production_in_start_rule(self) -> None: 206 grammar = """ 207 start: ['-'] $ 208 """ 209 self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}}) 210 211 def test_multiple_nullable_rules(self) -> None: 212 grammar = """ 213 start: sign thing other another $ 214 sign: ['-'] 215 thing: ['+'] 216 other: '*' 217 another: '/' 218 """ 219 self.assertEqual(self.calculate_first_sets(grammar), { 220 "sign": {"", "'-'"}, 221 "thing": {"'+'", ""}, 222 "start": {"'+'", "'-'", "'*'"}, 223 "other": {"'*'"}, 224 "another": {"'/'"}, 225 }) 226