1import ast 2import os.path 3import re 4from dataclasses import dataclass, field 5from enum import Enum 6from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple 7 8from pegen import grammar 9from pegen.grammar import ( 10 Alt, 11 Cut, 12 Forced, 13 Gather, 14 GrammarVisitor, 15 Group, 16 Leaf, 17 Lookahead, 18 NamedItem, 19 NameLeaf, 20 NegativeLookahead, 21 Opt, 22 PositiveLookahead, 23 Repeat0, 24 Repeat1, 25 Rhs, 26 Rule, 27 StringLeaf, 28) 29from pegen.parser_generator import ParserGenerator 30 31EXTENSION_PREFIX = """\ 32#include "pegen.h" 33 34#if defined(Py_DEBUG) && defined(Py_BUILD_CORE) 35# define D(x) if (Py_DebugFlag) x; 36#else 37# define D(x) 38#endif 39 40""" 41 42 43EXTENSION_SUFFIX = """ 44void * 45_PyPegen_parse(Parser *p) 46{ 47 // Initialize keywords 48 p->keywords = reserved_keywords; 49 p->n_keyword_lists = n_keyword_lists; 50 p->soft_keywords = soft_keywords; 51 52 return start_rule(p); 53} 54""" 55 56 57class NodeTypes(Enum): 58 NAME_TOKEN = 0 59 NUMBER_TOKEN = 1 60 STRING_TOKEN = 2 61 GENERIC_TOKEN = 3 62 KEYWORD = 4 63 SOFT_KEYWORD = 5 64 CUT_OPERATOR = 6 65 66 67BASE_NODETYPES = { 68 "NAME": NodeTypes.NAME_TOKEN, 69 "NUMBER": NodeTypes.NUMBER_TOKEN, 70 "STRING": NodeTypes.STRING_TOKEN, 71 "SOFT_KEYWORD": NodeTypes.SOFT_KEYWORD, 72} 73 74 75@dataclass 76class FunctionCall: 77 function: str 78 arguments: List[Any] = field(default_factory=list) 79 assigned_variable: Optional[str] = None 80 assigned_variable_type: Optional[str] = None 81 return_type: Optional[str] = None 82 nodetype: Optional[NodeTypes] = None 83 force_true: bool = False 84 comment: Optional[str] = None 85 86 def __str__(self) -> str: 87 parts = [] 88 parts.append(self.function) 89 if self.arguments: 90 parts.append(f"({', '.join(map(str, self.arguments))})") 91 if self.force_true: 92 parts.append(", !p->error_indicator") 93 if self.assigned_variable: 94 if self.assigned_variable_type: 95 parts = [ 96 "(", 97 self.assigned_variable, 98 " = ", 99 "(", 100 self.assigned_variable_type, 101 ")", 102 *parts, 103 ")", 104 ] 105 else: 106 parts = ["(", self.assigned_variable, " = ", *parts, ")"] 107 if self.comment: 108 parts.append(f" // {self.comment}") 109 return "".join(parts) 110 111 112class CCallMakerVisitor(GrammarVisitor): 113 def __init__( 114 self, 115 parser_generator: ParserGenerator, 116 exact_tokens: Dict[str, int], 117 non_exact_tokens: Set[str], 118 ): 119 self.gen = parser_generator 120 self.exact_tokens = exact_tokens 121 self.non_exact_tokens = non_exact_tokens 122 self.cache: Dict[Any, FunctionCall] = {} 123 124 def keyword_helper(self, keyword: str) -> FunctionCall: 125 return FunctionCall( 126 assigned_variable="_keyword", 127 function="_PyPegen_expect_token", 128 arguments=["p", self.gen.keywords[keyword]], 129 return_type="Token *", 130 nodetype=NodeTypes.KEYWORD, 131 comment=f"token='{keyword}'", 132 ) 133 134 def soft_keyword_helper(self, value: str) -> FunctionCall: 135 return FunctionCall( 136 assigned_variable="_keyword", 137 function="_PyPegen_expect_soft_keyword", 138 arguments=["p", value], 139 return_type="expr_ty", 140 nodetype=NodeTypes.SOFT_KEYWORD, 141 comment=f"soft_keyword='{value}'", 142 ) 143 144 def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall: 145 name = node.value 146 if name in self.non_exact_tokens: 147 if name in BASE_NODETYPES: 148 return FunctionCall( 149 assigned_variable=f"{name.lower()}_var", 150 function=f"_PyPegen_{name.lower()}_token", 151 arguments=["p"], 152 nodetype=BASE_NODETYPES[name], 153 return_type="expr_ty", 154 comment=name, 155 ) 156 return FunctionCall( 157 assigned_variable=f"{name.lower()}_var", 158 function=f"_PyPegen_expect_token", 159 arguments=["p", name], 160 nodetype=NodeTypes.GENERIC_TOKEN, 161 return_type="Token *", 162 comment=f"token='{name}'", 163 ) 164 165 type = None 166 rule = self.gen.all_rules.get(name.lower()) 167 if rule is not None: 168 type = "asdl_seq *" if rule.is_loop() or rule.is_gather() else rule.type 169 170 return FunctionCall( 171 assigned_variable=f"{name}_var", 172 function=f"{name}_rule", 173 arguments=["p"], 174 return_type=type, 175 comment=f"{node}", 176 ) 177 178 def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall: 179 val = ast.literal_eval(node.value) 180 if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword 181 if node.value.endswith("'"): 182 return self.keyword_helper(val) 183 else: 184 return self.soft_keyword_helper(node.value) 185 else: 186 assert val in self.exact_tokens, f"{node.value} is not a known literal" 187 type = self.exact_tokens[val] 188 return FunctionCall( 189 assigned_variable="_literal", 190 function=f"_PyPegen_expect_token", 191 arguments=["p", type], 192 nodetype=NodeTypes.GENERIC_TOKEN, 193 return_type="Token *", 194 comment=f"token='{val}'", 195 ) 196 197 def visit_Rhs(self, node: Rhs) -> FunctionCall: 198 if node in self.cache: 199 return self.cache[node] 200 if node.can_be_inlined: 201 self.cache[node] = self.generate_call(node.alts[0].items[0]) 202 else: 203 name = self.gen.artifical_rule_from_rhs(node) 204 self.cache[node] = FunctionCall( 205 assigned_variable=f"{name}_var", 206 function=f"{name}_rule", 207 arguments=["p"], 208 comment=f"{node}", 209 ) 210 return self.cache[node] 211 212 def visit_NamedItem(self, node: NamedItem) -> FunctionCall: 213 call = self.generate_call(node.item) 214 if node.name: 215 call.assigned_variable = node.name 216 if node.type: 217 call.assigned_variable_type = node.type 218 return call 219 220 def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall: 221 call = self.generate_call(node.node) 222 if call.nodetype == NodeTypes.NAME_TOKEN: 223 return FunctionCall( 224 function=f"_PyPegen_lookahead_with_name", 225 arguments=[positive, call.function, *call.arguments], 226 return_type="int", 227 ) 228 elif call.nodetype == NodeTypes.SOFT_KEYWORD: 229 return FunctionCall( 230 function=f"_PyPegen_lookahead_with_string", 231 arguments=[positive, call.function, *call.arguments], 232 return_type="int", 233 ) 234 elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}: 235 return FunctionCall( 236 function=f"_PyPegen_lookahead_with_int", 237 arguments=[positive, call.function, *call.arguments], 238 return_type="int", 239 comment=f"token={node.node}", 240 ) 241 else: 242 return FunctionCall( 243 function=f"_PyPegen_lookahead", 244 arguments=[positive, call.function, *call.arguments], 245 return_type="int", 246 ) 247 248 def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall: 249 return self.lookahead_call_helper(node, 1) 250 251 def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall: 252 return self.lookahead_call_helper(node, 0) 253 254 def visit_Forced(self, node: Forced) -> FunctionCall: 255 call = self.generate_call(node.node) 256 if isinstance(node.node, Leaf): 257 assert isinstance(node.node, Leaf) 258 val = ast.literal_eval(node.node.value) 259 assert val in self.exact_tokens, f"{node.node.value} is not a known literal" 260 type = self.exact_tokens[val] 261 return FunctionCall( 262 assigned_variable="_literal", 263 function=f"_PyPegen_expect_forced_token", 264 arguments=["p", type, f'"{val}"'], 265 nodetype=NodeTypes.GENERIC_TOKEN, 266 return_type="Token *", 267 comment=f"forced_token='{val}'", 268 ) 269 if isinstance(node.node, Group): 270 call = self.visit(node.node.rhs) 271 call.assigned_variable = None 272 call.comment = None 273 return FunctionCall( 274 assigned_variable="_literal", 275 function=f"_PyPegen_expect_forced_result", 276 arguments=["p", str(call), f'"{node.node.rhs!s}"'], 277 return_type="void *", 278 comment=f"forced_token=({node.node.rhs!s})", 279 ) 280 else: 281 raise NotImplementedError(f"Forced tokens don't work with {node.node} nodes") 282 283 def visit_Opt(self, node: Opt) -> FunctionCall: 284 call = self.generate_call(node.node) 285 return FunctionCall( 286 assigned_variable="_opt_var", 287 function=call.function, 288 arguments=call.arguments, 289 force_true=True, 290 comment=f"{node}", 291 ) 292 293 def visit_Repeat0(self, node: Repeat0) -> FunctionCall: 294 if node in self.cache: 295 return self.cache[node] 296 name = self.gen.artificial_rule_from_repeat(node.node, False) 297 self.cache[node] = FunctionCall( 298 assigned_variable=f"{name}_var", 299 function=f"{name}_rule", 300 arguments=["p"], 301 return_type="asdl_seq *", 302 comment=f"{node}", 303 ) 304 return self.cache[node] 305 306 def visit_Repeat1(self, node: Repeat1) -> FunctionCall: 307 if node in self.cache: 308 return self.cache[node] 309 name = self.gen.artificial_rule_from_repeat(node.node, True) 310 self.cache[node] = FunctionCall( 311 assigned_variable=f"{name}_var", 312 function=f"{name}_rule", 313 arguments=["p"], 314 return_type="asdl_seq *", 315 comment=f"{node}", 316 ) 317 return self.cache[node] 318 319 def visit_Gather(self, node: Gather) -> FunctionCall: 320 if node in self.cache: 321 return self.cache[node] 322 name = self.gen.artifical_rule_from_gather(node) 323 self.cache[node] = FunctionCall( 324 assigned_variable=f"{name}_var", 325 function=f"{name}_rule", 326 arguments=["p"], 327 return_type="asdl_seq *", 328 comment=f"{node}", 329 ) 330 return self.cache[node] 331 332 def visit_Group(self, node: Group) -> FunctionCall: 333 return self.generate_call(node.rhs) 334 335 def visit_Cut(self, node: Cut) -> FunctionCall: 336 return FunctionCall( 337 assigned_variable="_cut_var", 338 return_type="int", 339 function="1", 340 nodetype=NodeTypes.CUT_OPERATOR, 341 ) 342 343 def generate_call(self, node: Any) -> FunctionCall: 344 return super().visit(node) 345 346 347class CParserGenerator(ParserGenerator, GrammarVisitor): 348 def __init__( 349 self, 350 grammar: grammar.Grammar, 351 tokens: Dict[int, str], 352 exact_tokens: Dict[str, int], 353 non_exact_tokens: Set[str], 354 file: Optional[IO[Text]], 355 debug: bool = False, 356 skip_actions: bool = False, 357 ): 358 super().__init__(grammar, set(tokens.values()), file) 359 self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor( 360 self, exact_tokens, non_exact_tokens 361 ) 362 self._varname_counter = 0 363 self.debug = debug 364 self.skip_actions = skip_actions 365 366 def add_level(self) -> None: 367 self.print("D(p->level++);") 368 369 def remove_level(self) -> None: 370 self.print("D(p->level--);") 371 372 def add_return(self, ret_val: str) -> None: 373 self.remove_level() 374 self.print(f"return {ret_val};") 375 376 def unique_varname(self, name: str = "tmpvar") -> str: 377 new_var = name + "_" + str(self._varname_counter) 378 self._varname_counter += 1 379 return new_var 380 381 def call_with_errorcheck_return(self, call_text: str, returnval: str) -> None: 382 error_var = self.unique_varname() 383 self.print(f"int {error_var} = {call_text};") 384 self.print(f"if ({error_var}) {{") 385 with self.indent(): 386 self.add_return(returnval) 387 self.print("}") 388 389 def call_with_errorcheck_goto(self, call_text: str, goto_target: str) -> None: 390 error_var = self.unique_varname() 391 self.print(f"int {error_var} = {call_text};") 392 self.print(f"if ({error_var}) {{") 393 with self.indent(): 394 self.print(f"goto {goto_target};") 395 self.print(f"}}") 396 397 def out_of_memory_return( 398 self, 399 expr: str, 400 cleanup_code: Optional[str] = None, 401 ) -> None: 402 self.print(f"if ({expr}) {{") 403 with self.indent(): 404 if cleanup_code is not None: 405 self.print(cleanup_code) 406 self.print("p->error_indicator = 1;") 407 self.print("PyErr_NoMemory();") 408 self.add_return("NULL") 409 self.print(f"}}") 410 411 def out_of_memory_goto(self, expr: str, goto_target: str) -> None: 412 self.print(f"if ({expr}) {{") 413 with self.indent(): 414 self.print("PyErr_NoMemory();") 415 self.print(f"goto {goto_target};") 416 self.print(f"}}") 417 418 def generate(self, filename: str) -> None: 419 self.collect_rules() 420 basename = os.path.basename(filename) 421 self.print(f"// @generated by pegen from {basename}") 422 header = self.grammar.metas.get("header", EXTENSION_PREFIX) 423 if header: 424 self.print(header.rstrip("\n")) 425 subheader = self.grammar.metas.get("subheader", "") 426 if subheader: 427 self.print(subheader) 428 self._setup_keywords() 429 self._setup_soft_keywords() 430 for i, (rulename, rule) in enumerate(self.all_rules.items(), 1000): 431 comment = " // Left-recursive" if rule.left_recursive else "" 432 self.print(f"#define {rulename}_type {i}{comment}") 433 self.print() 434 for rulename, rule in self.all_rules.items(): 435 if rule.is_loop() or rule.is_gather(): 436 type = "asdl_seq *" 437 elif rule.type: 438 type = rule.type + " " 439 else: 440 type = "void *" 441 self.print(f"static {type}{rulename}_rule(Parser *p);") 442 self.print() 443 for rulename, rule in list(self.all_rules.items()): 444 self.print() 445 if rule.left_recursive: 446 self.print("// Left-recursive") 447 self.visit(rule) 448 if self.skip_actions: 449 mode = 0 450 else: 451 mode = int(self.rules["start"].type == "mod_ty") if "start" in self.rules else 1 452 if mode == 1 and self.grammar.metas.get("bytecode"): 453 mode += 1 454 modulename = self.grammar.metas.get("modulename", "parse") 455 trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX) 456 if trailer: 457 self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename)) 458 459 def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]: 460 groups: Dict[int, List[Tuple[str, int]]] = {} 461 for keyword_str, keyword_type in self.keywords.items(): 462 length = len(keyword_str) 463 if length in groups: 464 groups[length].append((keyword_str, keyword_type)) 465 else: 466 groups[length] = [(keyword_str, keyword_type)] 467 return groups 468 469 def _setup_keywords(self) -> None: 470 n_keyword_lists = ( 471 len(max(self.keywords.keys(), key=len)) + 1 if len(self.keywords) > 0 else 0 472 ) 473 self.print(f"static const int n_keyword_lists = {n_keyword_lists};") 474 groups = self._group_keywords_by_length() 475 self.print("static KeywordToken *reserved_keywords[] = {") 476 with self.indent(): 477 num_groups = max(groups) + 1 if groups else 1 478 for keywords_length in range(num_groups): 479 if keywords_length not in groups.keys(): 480 self.print("(KeywordToken[]) {{NULL, -1}},") 481 else: 482 self.print("(KeywordToken[]) {") 483 with self.indent(): 484 for keyword_str, keyword_type in groups[keywords_length]: 485 self.print(f'{{"{keyword_str}", {keyword_type}}},') 486 self.print("{NULL, -1},") 487 self.print("},") 488 self.print("};") 489 490 def _setup_soft_keywords(self) -> None: 491 soft_keywords = sorted(self.soft_keywords) 492 self.print("static char *soft_keywords[] = {") 493 with self.indent(): 494 for keyword in soft_keywords: 495 self.print(f'"{keyword}",') 496 self.print("NULL,") 497 self.print("};") 498 499 def _set_up_token_start_metadata_extraction(self) -> None: 500 self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {") 501 with self.indent(): 502 self.print("p->error_indicator = 1;") 503 self.add_return("NULL") 504 self.print("}") 505 self.print("int _start_lineno = p->tokens[_mark]->lineno;") 506 self.print("UNUSED(_start_lineno); // Only used by EXTRA macro") 507 self.print("int _start_col_offset = p->tokens[_mark]->col_offset;") 508 self.print("UNUSED(_start_col_offset); // Only used by EXTRA macro") 509 510 def _set_up_token_end_metadata_extraction(self) -> None: 511 self.print("Token *_token = _PyPegen_get_last_nonnwhitespace_token(p);") 512 self.print("if (_token == NULL) {") 513 with self.indent(): 514 self.add_return("NULL") 515 self.print("}") 516 self.print("int _end_lineno = _token->end_lineno;") 517 self.print("UNUSED(_end_lineno); // Only used by EXTRA macro") 518 self.print("int _end_col_offset = _token->end_col_offset;") 519 self.print("UNUSED(_end_col_offset); // Only used by EXTRA macro") 520 521 def _check_for_errors(self) -> None: 522 self.print("if (p->error_indicator) {") 523 with self.indent(): 524 self.add_return("NULL") 525 self.print("}") 526 527 def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None: 528 self.print("{") 529 with self.indent(): 530 self.add_level() 531 self.print(f"{result_type} _res = NULL;") 532 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") 533 with self.indent(): 534 self.add_return("_res") 535 self.print("}") 536 self.print("int _mark = p->mark;") 537 self.print("int _resmark = p->mark;") 538 self.print("while (1) {") 539 with self.indent(): 540 self.call_with_errorcheck_return( 541 f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res" 542 ) 543 self.print("p->mark = _mark;") 544 self.print("p->in_raw_rule++;") 545 self.print(f"void *_raw = {node.name}_raw(p);") 546 self.print("p->in_raw_rule--;") 547 self.print("if (p->error_indicator)") 548 with self.indent(): 549 self.print("return NULL;") 550 self.print("if (_raw == NULL || p->mark <= _resmark)") 551 with self.indent(): 552 self.print("break;") 553 self.print(f"_resmark = p->mark;") 554 self.print("_res = _raw;") 555 self.print("}") 556 self.print(f"p->mark = _resmark;") 557 self.add_return("_res") 558 self.print("}") 559 self.print(f"static {result_type}") 560 self.print(f"{node.name}_raw(Parser *p)") 561 562 def _should_memoize(self, node: Rule) -> bool: 563 return node.memo and not node.left_recursive 564 565 def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None: 566 memoize = self._should_memoize(node) 567 568 with self.indent(): 569 self.add_level() 570 self._check_for_errors() 571 self.print(f"{result_type} _res = NULL;") 572 if memoize: 573 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") 574 with self.indent(): 575 self.add_return("_res") 576 self.print("}") 577 self.print("int _mark = p->mark;") 578 if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): 579 self._set_up_token_start_metadata_extraction() 580 self.visit( 581 rhs, 582 is_loop=False, 583 is_gather=node.is_gather(), 584 rulename=node.name, 585 ) 586 if self.debug: 587 self.print(f'D(fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark));') 588 self.print("_res = NULL;") 589 self.print(" done:") 590 with self.indent(): 591 if memoize: 592 self.print(f"_PyPegen_insert_memo(p, _mark, {node.name}_type, _res);") 593 self.add_return("_res") 594 595 def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: 596 memoize = self._should_memoize(node) 597 is_repeat1 = node.name.startswith("_loop1") 598 599 with self.indent(): 600 self.add_level() 601 self._check_for_errors() 602 self.print("void *_res = NULL;") 603 if memoize: 604 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") 605 with self.indent(): 606 self.add_return("_res") 607 self.print("}") 608 self.print("int _mark = p->mark;") 609 self.print("int _start_mark = p->mark;") 610 self.print("void **_children = PyMem_Malloc(sizeof(void *));") 611 self.out_of_memory_return(f"!_children") 612 self.print("Py_ssize_t _children_capacity = 1;") 613 self.print("Py_ssize_t _n = 0;") 614 if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): 615 self._set_up_token_start_metadata_extraction() 616 self.visit( 617 rhs, 618 is_loop=True, 619 is_gather=node.is_gather(), 620 rulename=node.name, 621 ) 622 if is_repeat1: 623 self.print("if (_n == 0 || p->error_indicator) {") 624 with self.indent(): 625 self.print("PyMem_Free(_children);") 626 self.add_return("NULL") 627 self.print("}") 628 self.print("asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena);") 629 self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);") 630 self.print("for (int i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);") 631 self.print("PyMem_Free(_children);") 632 if node.name: 633 self.print(f"_PyPegen_insert_memo(p, _start_mark, {node.name}_type, _seq);") 634 self.add_return("_seq") 635 636 def visit_Rule(self, node: Rule) -> None: 637 is_loop = node.is_loop() 638 is_gather = node.is_gather() 639 rhs = node.flatten() 640 if is_loop or is_gather: 641 result_type = "asdl_seq *" 642 elif node.type: 643 result_type = node.type 644 else: 645 result_type = "void *" 646 647 for line in str(node).splitlines(): 648 self.print(f"// {line}") 649 if node.left_recursive and node.leader: 650 self.print(f"static {result_type} {node.name}_raw(Parser *);") 651 652 self.print(f"static {result_type}") 653 self.print(f"{node.name}_rule(Parser *p)") 654 655 if node.left_recursive and node.leader: 656 self._set_up_rule_memoization(node, result_type) 657 658 self.print("{") 659 if is_loop: 660 self._handle_loop_rule_body(node, rhs) 661 else: 662 self._handle_default_rule_body(node, rhs, result_type) 663 self.print("}") 664 665 def visit_NamedItem(self, node: NamedItem) -> None: 666 call = self.callmakervisitor.generate_call(node) 667 if call.assigned_variable: 668 call.assigned_variable = self.dedupe(call.assigned_variable) 669 self.print(call) 670 671 def visit_Rhs( 672 self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str] 673 ) -> None: 674 if is_loop: 675 assert len(node.alts) == 1 676 for alt in node.alts: 677 self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename) 678 679 def join_conditions(self, keyword: str, node: Any) -> None: 680 self.print(f"{keyword} (") 681 with self.indent(): 682 first = True 683 for item in node.items: 684 if first: 685 first = False 686 else: 687 self.print("&&") 688 self.visit(item) 689 self.print(")") 690 691 def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None: 692 self.print(f"_res = {node.action};") 693 694 self.print("if (_res == NULL && PyErr_Occurred()) {") 695 with self.indent(): 696 self.print("p->error_indicator = 1;") 697 if cleanup_code: 698 self.print(cleanup_code) 699 self.add_return("NULL") 700 self.print("}") 701 702 if self.debug: 703 self.print( 704 f'D(fprintf(stderr, "Hit with action [%d-%d]: %s\\n", _mark, p->mark, "{node}"));' 705 ) 706 707 def emit_default_action(self, is_gather: bool, node: Alt) -> None: 708 if len(self.local_variable_names) > 1: 709 if is_gather: 710 assert len(self.local_variable_names) == 2 711 self.print( 712 f"_res = _PyPegen_seq_insert_in_front(p, " 713 f"{self.local_variable_names[0]}, {self.local_variable_names[1]});" 714 ) 715 else: 716 if self.debug: 717 self.print( 718 f'D(fprintf(stderr, "Hit without action [%d:%d]: %s\\n", _mark, p->mark, "{node}"));' 719 ) 720 self.print( 721 f"_res = _PyPegen_dummy_name(p, {', '.join(self.local_variable_names)});" 722 ) 723 else: 724 if self.debug: 725 self.print( 726 f'D(fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", _mark, p->mark, "{node}"));' 727 ) 728 self.print(f"_res = {self.local_variable_names[0]};") 729 730 def emit_dummy_action(self) -> None: 731 self.print("_res = _PyPegen_dummy_name(p);") 732 733 def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: 734 self.join_conditions(keyword="if", node=node) 735 self.print("{") 736 # We have parsed successfully all the conditions for the option. 737 with self.indent(): 738 node_str = str(node).replace('"', '\\"') 739 self.print( 740 f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node_str}"));' 741 ) 742 # Prepare to emit the rule action and do so 743 if node.action and "EXTRA" in node.action: 744 self._set_up_token_end_metadata_extraction() 745 if self.skip_actions: 746 self.emit_dummy_action() 747 elif node.action: 748 self.emit_action(node) 749 else: 750 self.emit_default_action(is_gather, node) 751 752 # As the current option has parsed correctly, do not continue with the rest. 753 self.print(f"goto done;") 754 self.print("}") 755 756 def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: 757 # Condition of the main body of the alternative 758 self.join_conditions(keyword="while", node=node) 759 self.print("{") 760 # We have parsed successfully one item! 761 with self.indent(): 762 # Prepare to emit the rule action and do so 763 if node.action and "EXTRA" in node.action: 764 self._set_up_token_end_metadata_extraction() 765 if self.skip_actions: 766 self.emit_dummy_action() 767 elif node.action: 768 self.emit_action(node, cleanup_code="PyMem_Free(_children);") 769 else: 770 self.emit_default_action(is_gather, node) 771 772 # Add the result of rule to the temporary buffer of children. This buffer 773 # will populate later an asdl_seq with all elements to return. 774 self.print("if (_n == _children_capacity) {") 775 with self.indent(): 776 self.print("_children_capacity *= 2;") 777 self.print( 778 "void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *));" 779 ) 780 self.out_of_memory_return(f"!_new_children") 781 self.print("_children = _new_children;") 782 self.print("}") 783 self.print("_children[_n++] = _res;") 784 self.print("_mark = p->mark;") 785 self.print("}") 786 787 def visit_Alt( 788 self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str] 789 ) -> None: 790 if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"): 791 self.print(f"if (p->call_invalid_rules) {{ // {node}") 792 else: 793 self.print(f"{{ // {node}") 794 with self.indent(): 795 self._check_for_errors() 796 node_str = str(node).replace('"', '\\"') 797 self.print( 798 f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node_str}"));' 799 ) 800 # Prepare variable declarations for the alternative 801 vars = self.collect_vars(node) 802 for v, var_type in sorted(item for item in vars.items() if item[0] is not None): 803 if not var_type: 804 var_type = "void *" 805 else: 806 var_type += " " 807 if v == "_cut_var": 808 v += " = 0" # cut_var must be initialized 809 self.print(f"{var_type}{v};") 810 if v and v.startswith("_opt_var"): 811 self.print(f"UNUSED({v}); // Silence compiler warnings") 812 813 with self.local_variable_context(): 814 if is_loop: 815 self.handle_alt_loop(node, is_gather, rulename) 816 else: 817 self.handle_alt_normal(node, is_gather, rulename) 818 819 self.print("p->mark = _mark;") 820 node_str = str(node).replace('"', '\\"') 821 self.print( 822 f"D(fprintf(stderr, \"%*c%s {rulename}[%d-%d]: %s failed!\\n\", p->level, ' ',\n" 823 f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node_str}"));' 824 ) 825 if "_cut_var" in vars: 826 self.print("if (_cut_var) {") 827 with self.indent(): 828 self.add_return("NULL") 829 self.print("}") 830 self.print("}") 831 832 def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]: 833 types = {} 834 with self.local_variable_context(): 835 for item in node.items: 836 name, type = self.add_var(item) 837 types[name] = type 838 return types 839 840 def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]: 841 call = self.callmakervisitor.generate_call(node.item) 842 name = node.name if node.name else call.assigned_variable 843 if name is not None: 844 name = self.dedupe(name) 845 return_type = call.return_type if node.type is None else node.type 846 return name, return_type 847