1# begin[licence] 2# 3# [The "BSD licence"] 4# Copyright (c) 2005-2009 Terence Parr 5# All rights reserved. 6 7# Redistribution and use in source and binary forms, with or without 8# modification, are permitted provided that the following conditions 9# are met: 10# 1. Redistributions of source code must retain the above copyright 11# notice, this list of conditions and the following disclaimer. 12# 2. Redistributions in binary form must reproduce the above copyright 13# notice, this list of conditions and the following disclaimer in the 14# documentation and/or other materials provided with the distribution. 15# 3. The name of the author may not be used to endorse or promote products 16# derived from this software without specific prior written permission. 17 18# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28# 29# end[licence] 30 31import socket 32from antlr3 import Parser, TokenStream, RecognitionException, Token 33from antlr3.tree import CommonTreeAdaptor, TreeAdaptor, Tree 34 35class DebugParser(Parser): 36 def __init__(self, stream, state=None, dbg=None, *args, **kwargs): 37 # wrap token stream in DebugTokenStream (unless user already did so). 38 if not isinstance(stream, DebugTokenStream): 39 stream = DebugTokenStream(stream, dbg) 40 41 super(DebugParser, self).__init__(stream, state, *args, **kwargs) 42 43 # Who to notify when events in the parser occur. 44 self._dbg = None 45 46 self.setDebugListener(dbg) 47 48 49 def setDebugListener(self, dbg): 50 """ 51 Provide a new debug event listener for this parser. Notify the 52 input stream too that it should send events to this listener. 53 """ 54 55 if hasattr(self.input, 'dbg'): 56 self.input.dbg = dbg 57 58 self._dbg = dbg 59 60 def getDebugListener(self): 61 return self._dbg 62 63 dbg = property(getDebugListener, setDebugListener) 64 65 66 def beginResync(self): 67 self._dbg.beginResync() 68 69 70 def endResync(self): 71 self._dbg.endResync() 72 73 74 def beginBacktrack(self, level): 75 self._dbg.beginBacktrack(level) 76 77 78 def endBacktrack(self, level, successful): 79 self._dbg.endBacktrack(level,successful) 80 81 82 def reportError(self, exc): 83 if isinstance(exc, RecognitionException): 84 self._dbg.recognitionException(exc) 85 86 else: 87 traceback.print_exc(exc) 88 89 90class DebugTokenStream(TokenStream): 91 def __init__(self, input, dbg=None): 92 self.input = input 93 self.initialStreamState = True 94 # Track the last mark() call result value for use in rewind(). 95 self.lastMarker = None 96 97 self._dbg = None 98 self.setDebugListener(dbg) 99 100 # force TokenStream to get at least first valid token 101 # so we know if there are any hidden tokens first in the stream 102 self.input.LT(1) 103 104 105 def getDebugListener(self): 106 return self._dbg 107 108 def setDebugListener(self, dbg): 109 self._dbg = dbg 110 111 dbg = property(getDebugListener, setDebugListener) 112 113 114 def consume(self): 115 if self.initialStreamState: 116 self.consumeInitialHiddenTokens() 117 118 a = self.input.index() 119 t = self.input.LT(1) 120 self.input.consume() 121 b = self.input.index() 122 self._dbg.consumeToken(t) 123 124 if b > a+1: 125 # then we consumed more than one token; must be off channel tokens 126 for idx in range(a+1, b): 127 self._dbg.consumeHiddenToken(self.input.get(idx)); 128 129 130 def consumeInitialHiddenTokens(self): 131 """consume all initial off-channel tokens""" 132 133 firstOnChannelTokenIndex = self.input.index() 134 for idx in range(firstOnChannelTokenIndex): 135 self._dbg.consumeHiddenToken(self.input.get(idx)) 136 137 self.initialStreamState = False 138 139 140 def LT(self, i): 141 if self.initialStreamState: 142 self.consumeInitialHiddenTokens() 143 144 t = self.input.LT(i) 145 self._dbg.LT(i, t) 146 return t 147 148 149 def LA(self, i): 150 if self.initialStreamState: 151 self.consumeInitialHiddenTokens() 152 153 t = self.input.LT(i) 154 self._dbg.LT(i, t) 155 return t.type 156 157 158 def get(self, i): 159 return self.input.get(i) 160 161 162 def index(self): 163 return self.input.index() 164 165 166 def mark(self): 167 self.lastMarker = self.input.mark() 168 self._dbg.mark(self.lastMarker) 169 return self.lastMarker 170 171 172 def rewind(self, marker=None): 173 self._dbg.rewind(marker) 174 self.input.rewind(marker) 175 176 177 def release(self, marker): 178 pass 179 180 181 def seek(self, index): 182 # TODO: implement seek in dbg interface 183 # self._dbg.seek(index); 184 self.input.seek(index) 185 186 187 def size(self): 188 return self.input.size() 189 190 191 def getTokenSource(self): 192 return self.input.getTokenSource() 193 194 195 def getSourceName(self): 196 return self.getTokenSource().getSourceName() 197 198 199 def toString(self, start=None, stop=None): 200 return self.input.toString(start, stop) 201 202 203class DebugTreeAdaptor(TreeAdaptor): 204 """A TreeAdaptor proxy that fires debugging events to a DebugEventListener 205 delegate and uses the TreeAdaptor delegate to do the actual work. All 206 AST events are triggered by this adaptor; no code gen changes are needed 207 in generated rules. Debugging events are triggered *after* invoking 208 tree adaptor routines. 209 210 Trees created with actions in rewrite actions like "-> ^(ADD {foo} {bar})" 211 cannot be tracked as they might not use the adaptor to create foo, bar. 212 The debug listener has to deal with tree node IDs for which it did 213 not see a createNode event. A single <unknown> node is sufficient even 214 if it represents a whole tree. 215 """ 216 217 def __init__(self, dbg, adaptor): 218 self.dbg = dbg 219 self.adaptor = adaptor 220 221 222 def createWithPayload(self, payload): 223 if payload.getTokenIndex() < 0: 224 # could be token conjured up during error recovery 225 return self.createFromType(payload.getType(), payload.getText()) 226 227 node = self.adaptor.createWithPayload(payload) 228 self.dbg.createNode(node, payload) 229 return node 230 231 def createFromToken(self, tokenType, fromToken, text=None): 232 node = self.adaptor.createFromToken(tokenType, fromToken, text) 233 self.dbg.createNode(node) 234 return node 235 236 def createFromType(self, tokenType, text): 237 node = self.adaptor.createFromType(tokenType, text) 238 self.dbg.createNode(node) 239 return node 240 241 242 def errorNode(self, input, start, stop, exc): 243 node = selfadaptor.errorNode(input, start, stop, exc) 244 if node is not None: 245 dbg.errorNode(node) 246 247 return node 248 249 250 def dupTree(self, tree): 251 t = self.adaptor.dupTree(tree) 252 # walk the tree and emit create and add child events 253 # to simulate what dupTree has done. dupTree does not call this debug 254 # adapter so I must simulate. 255 self.simulateTreeConstruction(t) 256 return t 257 258 259 def simulateTreeConstruction(self, t): 260 """^(A B C): emit create A, create B, add child, ...""" 261 self.dbg.createNode(t) 262 for i in range(self.adaptor.getChildCount(t)): 263 child = self.adaptor.getChild(t, i) 264 self.simulateTreeConstruction(child) 265 self.dbg.addChild(t, child) 266 267 268 def dupNode(self, treeNode): 269 d = self.adaptor.dupNode(treeNode) 270 self.dbg.createNode(d) 271 return d 272 273 274 def nil(self): 275 node = self.adaptor.nil() 276 self.dbg.nilNode(node) 277 return node 278 279 280 def isNil(self, tree): 281 return self.adaptor.isNil(tree) 282 283 284 def addChild(self, t, child): 285 if isinstance(child, Token): 286 n = self.createWithPayload(child) 287 self.addChild(t, n) 288 289 else: 290 if t is None or child is None: 291 return 292 293 self.adaptor.addChild(t, child) 294 self.dbg.addChild(t, child) 295 296 def becomeRoot(self, newRoot, oldRoot): 297 if isinstance(newRoot, Token): 298 n = self.createWithPayload(newRoot) 299 self.adaptor.becomeRoot(n, oldRoot) 300 else: 301 n = self.adaptor.becomeRoot(newRoot, oldRoot) 302 303 self.dbg.becomeRoot(newRoot, oldRoot) 304 return n 305 306 307 def rulePostProcessing(self, root): 308 return self.adaptor.rulePostProcessing(root) 309 310 311 def getType(self, t): 312 return self.adaptor.getType(t) 313 314 315 def setType(self, t, type): 316 self.adaptor.setType(t, type) 317 318 319 def getText(self, t): 320 return self.adaptor.getText(t) 321 322 323 def setText(self, t, text): 324 self.adaptor.setText(t, text) 325 326 327 def getToken(self, t): 328 return self.adaptor.getToken(t) 329 330 331 def setTokenBoundaries(self, t, startToken, stopToken): 332 self.adaptor.setTokenBoundaries(t, startToken, stopToken) 333 if t is not None and startToken is not None and stopToken is not None: 334 self.dbg.setTokenBoundaries( 335 t, startToken.getTokenIndex(), 336 stopToken.getTokenIndex()) 337 338 339 def getTokenStartIndex(self, t): 340 return self.adaptor.getTokenStartIndex(t) 341 342 343 def getTokenStopIndex(self, t): 344 return self.adaptor.getTokenStopIndex(t) 345 346 347 def getChild(self, t, i): 348 return self.adaptor.getChild(t, i) 349 350 351 def setChild(self, t, i, child): 352 self.adaptor.setChild(t, i, child) 353 354 355 def deleteChild(self, t, i): 356 return self.adaptor.deleteChild(t, i) 357 358 359 def getChildCount(self, t): 360 return self.adaptor.getChildCount(t) 361 362 363 def getUniqueID(self, node): 364 return self.adaptor.getUniqueID(node) 365 366 367 def getParent(self, t): 368 return self.adaptor.getParent(t) 369 370 371 def getChildIndex(self, t): 372 return self.adaptor.getChildIndex(t) 373 374 375 def setParent(self, t, parent): 376 self.adaptor.setParent(t, parent) 377 378 379 def setChildIndex(self, t, index): 380 self.adaptor.setChildIndex(t, index) 381 382 383 def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): 384 self.adaptor.replaceChildren(parent, startChildIndex, stopChildIndex, t) 385 386 387 ## support 388 389 def getDebugListener(self): 390 return dbg 391 392 def setDebugListener(self, dbg): 393 self.dbg = dbg 394 395 396 def getTreeAdaptor(self): 397 return self.adaptor 398 399 400 401class DebugEventListener(object): 402 """All debugging events that a recognizer can trigger. 403 404 I did not create a separate AST debugging interface as it would create 405 lots of extra classes and DebugParser has a dbg var defined, which makes 406 it hard to change to ASTDebugEventListener. I looked hard at this issue 407 and it is easier to understand as one monolithic event interface for all 408 possible events. Hopefully, adding ST debugging stuff won't be bad. Leave 409 for future. 4/26/2006. 410 """ 411 412 # Moved to version 2 for v3.1: added grammar name to enter/exit Rule 413 PROTOCOL_VERSION = "2" 414 415 def enterRule(self, grammarFileName, ruleName): 416 """The parser has just entered a rule. No decision has been made about 417 which alt is predicted. This is fired AFTER init actions have been 418 executed. Attributes are defined and available etc... 419 The grammarFileName allows composite grammars to jump around among 420 multiple grammar files. 421 """ 422 423 pass 424 425 426 def enterAlt(self, alt): 427 """Because rules can have lots of alternatives, it is very useful to 428 know which alt you are entering. This is 1..n for n alts. 429 """ 430 pass 431 432 433 def exitRule(self, grammarFileName, ruleName): 434 """This is the last thing executed before leaving a rule. It is 435 executed even if an exception is thrown. This is triggered after 436 error reporting and recovery have occurred (unless the exception is 437 not caught in this rule). This implies an "exitAlt" event. 438 The grammarFileName allows composite grammars to jump around among 439 multiple grammar files. 440 """ 441 pass 442 443 444 def enterSubRule(self, decisionNumber): 445 """Track entry into any (...) subrule other EBNF construct""" 446 pass 447 448 449 def exitSubRule(self, decisionNumber): 450 pass 451 452 453 def enterDecision(self, decisionNumber): 454 """Every decision, fixed k or arbitrary, has an enter/exit event 455 so that a GUI can easily track what LT/consume events are 456 associated with prediction. You will see a single enter/exit 457 subrule but multiple enter/exit decision events, one for each 458 loop iteration. 459 """ 460 pass 461 462 463 def exitDecision(self, decisionNumber): 464 pass 465 466 467 def consumeToken(self, t): 468 """An input token was consumed; matched by any kind of element. 469 Trigger after the token was matched by things like match(), matchAny(). 470 """ 471 pass 472 473 474 def consumeHiddenToken(self, t): 475 """An off-channel input token was consumed. 476 Trigger after the token was matched by things like match(), matchAny(). 477 (unless of course the hidden token is first stuff in the input stream). 478 """ 479 pass 480 481 482 def LT(self, i, t): 483 """Somebody (anybody) looked ahead. Note that this actually gets 484 triggered by both LA and LT calls. The debugger will want to know 485 which Token object was examined. Like consumeToken, this indicates 486 what token was seen at that depth. A remote debugger cannot look 487 ahead into a file it doesn't have so LT events must pass the token 488 even if the info is redundant. 489 """ 490 pass 491 492 493 def mark(self, marker): 494 """The parser is going to look arbitrarily ahead; mark this location, 495 the token stream's marker is sent in case you need it. 496 """ 497 pass 498 499 500 def rewind(self, marker=None): 501 """After an arbitrairly long lookahead as with a cyclic DFA (or with 502 any backtrack), this informs the debugger that stream should be 503 rewound to the position associated with marker. 504 505 """ 506 pass 507 508 509 def beginBacktrack(self, level): 510 pass 511 512 513 def endBacktrack(self, level, successful): 514 pass 515 516 517 def location(self, line, pos): 518 """To watch a parser move through the grammar, the parser needs to 519 inform the debugger what line/charPos it is passing in the grammar. 520 For now, this does not know how to switch from one grammar to the 521 other and back for island grammars etc... 522 523 This should also allow breakpoints because the debugger can stop 524 the parser whenever it hits this line/pos. 525 """ 526 pass 527 528 529 def recognitionException(self, e): 530 """A recognition exception occurred such as NoViableAltException. I made 531 this a generic event so that I can alter the exception hierachy later 532 without having to alter all the debug objects. 533 534 Upon error, the stack of enter rule/subrule must be properly unwound. 535 If no viable alt occurs it is within an enter/exit decision, which 536 also must be rewound. Even the rewind for each mark must be unwount. 537 In the Java target this is pretty easy using try/finally, if a bit 538 ugly in the generated code. The rewind is generated in DFA.predict() 539 actually so no code needs to be generated for that. For languages 540 w/o this "finally" feature (C++?), the target implementor will have 541 to build an event stack or something. 542 543 Across a socket for remote debugging, only the RecognitionException 544 data fields are transmitted. The token object or whatever that 545 caused the problem was the last object referenced by LT. The 546 immediately preceding LT event should hold the unexpected Token or 547 char. 548 549 Here is a sample event trace for grammar: 550 551 b : C ({;}A|B) // {;} is there to prevent A|B becoming a set 552 | D 553 ; 554 555 The sequence for this rule (with no viable alt in the subrule) for 556 input 'c c' (there are 3 tokens) is: 557 558 commence 559 LT(1) 560 enterRule b 561 location 7 1 562 enter decision 3 563 LT(1) 564 exit decision 3 565 enterAlt1 566 location 7 5 567 LT(1) 568 consumeToken [c/<4>,1:0] 569 location 7 7 570 enterSubRule 2 571 enter decision 2 572 LT(1) 573 LT(1) 574 recognitionException NoViableAltException 2 1 2 575 exit decision 2 576 exitSubRule 2 577 beginResync 578 LT(1) 579 consumeToken [c/<4>,1:1] 580 LT(1) 581 endResync 582 LT(-1) 583 exitRule b 584 terminate 585 """ 586 pass 587 588 589 def beginResync(self): 590 """Indicates the recognizer is about to consume tokens to resynchronize 591 the parser. Any consume events from here until the recovered event 592 are not part of the parse--they are dead tokens. 593 """ 594 pass 595 596 597 def endResync(self): 598 """Indicates that the recognizer has finished consuming tokens in order 599 to resychronize. There may be multiple beginResync/endResync pairs 600 before the recognizer comes out of errorRecovery mode (in which 601 multiple errors are suppressed). This will be useful 602 in a gui where you want to probably grey out tokens that are consumed 603 but not matched to anything in grammar. Anything between 604 a beginResync/endResync pair was tossed out by the parser. 605 """ 606 pass 607 608 609 def semanticPredicate(self, result, predicate): 610 """A semantic predicate was evaluate with this result and action text""" 611 pass 612 613 614 def commence(self): 615 """Announce that parsing has begun. Not technically useful except for 616 sending events over a socket. A GUI for example will launch a thread 617 to connect and communicate with a remote parser. The thread will want 618 to notify the GUI when a connection is made. ANTLR parsers 619 trigger this upon entry to the first rule (the ruleLevel is used to 620 figure this out). 621 """ 622 pass 623 624 625 def terminate(self): 626 """Parsing is over; successfully or not. Mostly useful for telling 627 remote debugging listeners that it's time to quit. When the rule 628 invocation level goes to zero at the end of a rule, we are done 629 parsing. 630 """ 631 pass 632 633 634 ## T r e e P a r s i n g 635 636 def consumeNode(self, t): 637 """Input for a tree parser is an AST, but we know nothing for sure 638 about a node except its type and text (obtained from the adaptor). 639 This is the analog of the consumeToken method. Again, the ID is 640 the hashCode usually of the node so it only works if hashCode is 641 not implemented. If the type is UP or DOWN, then 642 the ID is not really meaningful as it's fixed--there is 643 just one UP node and one DOWN navigation node. 644 """ 645 pass 646 647 648 def LT(self, i, t): 649 """The tree parser lookedahead. If the type is UP or DOWN, 650 then the ID is not really meaningful as it's fixed--there is 651 just one UP node and one DOWN navigation node. 652 """ 653 pass 654 655 656 657 ## A S T E v e n t s 658 659 def nilNode(self, t): 660 """A nil was created (even nil nodes have a unique ID... 661 they are not "null" per se). As of 4/28/2006, this 662 seems to be uniquely triggered when starting a new subtree 663 such as when entering a subrule in automatic mode and when 664 building a tree in rewrite mode. 665 666 If you are receiving this event over a socket via 667 RemoteDebugEventSocketListener then only t.ID is set. 668 """ 669 pass 670 671 672 def errorNode(self, t): 673 """Upon syntax error, recognizers bracket the error with an error node 674 if they are building ASTs. 675 """ 676 pass 677 678 679 def createNode(self, node, token=None): 680 """Announce a new node built from token elements such as type etc... 681 682 If you are receiving this event over a socket via 683 RemoteDebugEventSocketListener then only t.ID, type, text are 684 set. 685 """ 686 pass 687 688 689 def becomeRoot(self, newRoot, oldRoot): 690 """Make a node the new root of an existing root. 691 692 Note: the newRootID parameter is possibly different 693 than the TreeAdaptor.becomeRoot() newRoot parameter. 694 In our case, it will always be the result of calling 695 TreeAdaptor.becomeRoot() and not root_n or whatever. 696 697 The listener should assume that this event occurs 698 only when the current subrule (or rule) subtree is 699 being reset to newRootID. 700 701 If you are receiving this event over a socket via 702 RemoteDebugEventSocketListener then only IDs are set. 703 704 @see antlr3.tree.TreeAdaptor.becomeRoot() 705 """ 706 pass 707 708 709 def addChild(self, root, child): 710 """Make childID a child of rootID. 711 712 If you are receiving this event over a socket via 713 RemoteDebugEventSocketListener then only IDs are set. 714 715 @see antlr3.tree.TreeAdaptor.addChild() 716 """ 717 pass 718 719 720 def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): 721 """Set the token start/stop token index for a subtree root or node. 722 723 If you are receiving this event over a socket via 724 RemoteDebugEventSocketListener then only t.ID is set. 725 """ 726 pass 727 728 729class BlankDebugEventListener(DebugEventListener): 730 """A blank listener that does nothing; useful for real classes so 731 they don't have to have lots of blank methods and are less 732 sensitive to updates to debug interface. 733 734 Note: this class is identical to DebugEventListener and exists purely 735 for compatibility with Java. 736 """ 737 pass 738 739 740class TraceDebugEventListener(DebugEventListener): 741 """A listener that simply records text representations of the events. 742 743 Useful for debugging the debugging facility ;) 744 745 Subclasses can override the record() method (which defaults to printing to 746 stdout) to record the events in a different way. 747 """ 748 749 def __init__(self, adaptor=None): 750 super(TraceDebugEventListener, self).__init__() 751 752 if adaptor is None: 753 adaptor = CommonTreeAdaptor() 754 self.adaptor = adaptor 755 756 def record(self, event): 757 sys.stdout.write(event + '\n') 758 759 def enterRule(self, grammarFileName, ruleName): 760 self.record("enterRule "+ruleName) 761 762 def exitRule(self, grammarFileName, ruleName): 763 self.record("exitRule "+ruleName) 764 765 def enterSubRule(self, decisionNumber): 766 self.record("enterSubRule") 767 768 def exitSubRule(self, decisionNumber): 769 self.record("exitSubRule") 770 771 def location(self, line, pos): 772 self.record("location %s:%s" % (line, pos)) 773 774 ## Tree parsing stuff 775 776 def consumeNode(self, t): 777 self.record("consumeNode %s %s %s" % ( 778 self.adaptor.getUniqueID(t), 779 self.adaptor.getText(t), 780 self.adaptor.getType(t))) 781 782 def LT(self, i, t): 783 self.record("LT %s %s %s %s" % ( 784 i, 785 self.adaptor.getUniqueID(t), 786 self.adaptor.getText(t), 787 self.adaptor.getType(t))) 788 789 790 ## AST stuff 791 def nilNode(self, t): 792 self.record("nilNode %s" % self.adaptor.getUniqueID(t)) 793 794 def createNode(self, t, token=None): 795 if token is None: 796 self.record("create %s: %s, %s" % ( 797 self.adaptor.getUniqueID(t), 798 self.adaptor.getText(t), 799 self.adaptor.getType(t))) 800 801 else: 802 self.record("create %s: %s" % ( 803 self.adaptor.getUniqueID(t), 804 token.getTokenIndex())) 805 806 def becomeRoot(self, newRoot, oldRoot): 807 self.record("becomeRoot %s, %s" % ( 808 self.adaptor.getUniqueID(newRoot), 809 self.adaptor.getUniqueID(oldRoot))) 810 811 def addChild(self, root, child): 812 self.record("addChild %s, %s" % ( 813 self.adaptor.getUniqueID(root), 814 self.adaptor.getUniqueID(child))) 815 816 def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): 817 self.record("setTokenBoundaries %s, %s, %s" % ( 818 self.adaptor.getUniqueID(t), 819 tokenStartIndex, tokenStopIndex)) 820 821 822class RecordDebugEventListener(TraceDebugEventListener): 823 """A listener that records events as strings in an array.""" 824 825 def __init__(self, adaptor=None): 826 super(RecordDebugEventListener, self).__init__(adaptor) 827 828 self.events = [] 829 830 def record(self, event): 831 self.events.append(event) 832 833 834class DebugEventSocketProxy(DebugEventListener): 835 """A proxy debug event listener that forwards events over a socket to 836 a debugger (or any other listener) using a simple text-based protocol; 837 one event per line. ANTLRWorks listens on server socket with a 838 RemoteDebugEventSocketListener instance. These two objects must therefore 839 be kept in sync. New events must be handled on both sides of socket. 840 """ 841 842 DEFAULT_DEBUGGER_PORT = 49100 843 844 def __init__(self, recognizer, adaptor=None, port=None, 845 debug=None): 846 super(DebugEventSocketProxy, self).__init__() 847 848 self.grammarFileName = recognizer.getGrammarFileName() 849 850 # Almost certainly the recognizer will have adaptor set, but 851 # we don't know how to cast it (Parser or TreeParser) to get 852 # the adaptor field. Must be set with a constructor. :( 853 self.adaptor = adaptor 854 855 self.port = port or self.DEFAULT_DEBUGGER_PORT 856 857 self.debug = debug 858 859 self.socket = None 860 self.connection = None 861 self.input = None 862 self.output = None 863 864 865 def log(self, msg): 866 if self.debug is not None: 867 self.debug.write(msg + '\n') 868 869 870 def handshake(self): 871 if self.socket is None: 872 # create listening socket 873 self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 874 self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 875 self.socket.bind(('', self.port)) 876 self.socket.listen(1) 877 self.log("Waiting for incoming connection on port %d" % self.port) 878 879 # wait for an incoming connection 880 self.connection, addr = self.socket.accept() 881 self.log("Accepted connection from %s:%d" % addr) 882 883 self.connection.setblocking(1) 884 self.connection.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1) 885 886 # FIXME(pink): wrap into utf8 encoding stream 887 self.output = self.connection.makefile('w', 0) 888 self.input = self.connection.makefile('r', 0) 889 890 self.write("ANTLR %s" % self.PROTOCOL_VERSION) 891 self.write("grammar \"%s" % self.grammarFileName) 892 self.ack() 893 894 895 def write(self, msg): 896 self.log("> %s" % msg) 897 self.output.write("%s\n" % msg) 898 self.output.flush() 899 900 901 def ack(self): 902 t = self.input.readline() 903 self.log("< %s" % t.rstrip()) 904 905 906 def transmit(self, event): 907 self.write(event); 908 self.ack(); 909 910 911 def commence(self): 912 # don't bother sending event; listener will trigger upon connection 913 pass 914 915 916 def terminate(self): 917 self.transmit("terminate") 918 self.output.close() 919 self.input.close() 920 self.connection.close() 921 self.socket.close() 922 923 924 def enterRule(self, grammarFileName, ruleName): 925 self.transmit("enterRule\t%s\t%s" % (grammarFileName, ruleName)) 926 927 928 def enterAlt(self, alt): 929 self.transmit("enterAlt\t%d" % alt) 930 931 932 def exitRule(self, grammarFileName, ruleName): 933 self.transmit("exitRule\t%s\t%s" % (grammarFileName, ruleName)) 934 935 936 def enterSubRule(self, decisionNumber): 937 self.transmit("enterSubRule\t%d" % decisionNumber) 938 939 940 def exitSubRule(self, decisionNumber): 941 self.transmit("exitSubRule\t%d" % decisionNumber) 942 943 944 def enterDecision(self, decisionNumber): 945 self.transmit("enterDecision\t%d" % decisionNumber) 946 947 948 def exitDecision(self, decisionNumber): 949 self.transmit("exitDecision\t%d" % decisionNumber) 950 951 952 def consumeToken(self, t): 953 self.transmit("consumeToken\t%s" % self.serializeToken(t)) 954 955 956 def consumeHiddenToken(self, t): 957 self.transmit("consumeHiddenToken\t%s" % self.serializeToken(t)) 958 959 960 def LT(self, i, o): 961 if isinstance(o, Tree): 962 return self.LT_tree(i, o) 963 return self.LT_token(i, o) 964 965 966 def LT_token(self, i, t): 967 if t is not None: 968 self.transmit("LT\t%d\t%s" % (i, self.serializeToken(t))) 969 970 971 def mark(self, i): 972 self.transmit("mark\t%d" % i) 973 974 975 def rewind(self, i=None): 976 if i is not None: 977 self.transmit("rewind\t%d" % i) 978 else: 979 self.transmit("rewind") 980 981 982 def beginBacktrack(self, level): 983 self.transmit("beginBacktrack\t%d" % level) 984 985 986 def endBacktrack(self, level, successful): 987 self.transmit("endBacktrack\t%d\t%s" % ( 988 level, ['0', '1'][bool(successful)])) 989 990 991 def location(self, line, pos): 992 self.transmit("location\t%d\t%d" % (line, pos)) 993 994 995 def recognitionException(self, exc): 996 self.transmit('\t'.join([ 997 "exception", 998 exc.__class__.__name__, 999 str(int(exc.index)), 1000 str(int(exc.line)), 1001 str(int(exc.charPositionInLine))])) 1002 1003 1004 def beginResync(self): 1005 self.transmit("beginResync") 1006 1007 1008 def endResync(self): 1009 self.transmit("endResync") 1010 1011 1012 def semanticPredicate(self, result, predicate): 1013 self.transmit('\t'.join([ 1014 "semanticPredicate", 1015 str(int(result)), 1016 self.escapeNewlines(predicate)])) 1017 1018 ## A S T P a r s i n g E v e n t s 1019 1020 def consumeNode(self, t): 1021 FIXME(31) 1022# StringBuffer buf = new StringBuffer(50); 1023# buf.append("consumeNode"); 1024# serializeNode(buf, t); 1025# transmit(buf.toString()); 1026 1027 1028 def LT_tree(self, i, t): 1029 FIXME(34) 1030# int ID = adaptor.getUniqueID(t); 1031# String text = adaptor.getText(t); 1032# int type = adaptor.getType(t); 1033# StringBuffer buf = new StringBuffer(50); 1034# buf.append("LN\t"); // lookahead node; distinguish from LT in protocol 1035# buf.append(i); 1036# serializeNode(buf, t); 1037# transmit(buf.toString()); 1038 1039 1040 def serializeNode(self, buf, t): 1041 FIXME(33) 1042# int ID = adaptor.getUniqueID(t); 1043# String text = adaptor.getText(t); 1044# int type = adaptor.getType(t); 1045# buf.append("\t"); 1046# buf.append(ID); 1047# buf.append("\t"); 1048# buf.append(type); 1049# Token token = adaptor.getToken(t); 1050# int line = -1; 1051# int pos = -1; 1052# if ( token!=null ) { 1053# line = token.getLine(); 1054# pos = token.getCharPositionInLine(); 1055# } 1056# buf.append("\t"); 1057# buf.append(line); 1058# buf.append("\t"); 1059# buf.append(pos); 1060# int tokenIndex = adaptor.getTokenStartIndex(t); 1061# buf.append("\t"); 1062# buf.append(tokenIndex); 1063# serializeText(buf, text); 1064 1065 1066 ## A S T E v e n t s 1067 1068 def nilNode(self, t): 1069 self.transmit("nilNode\t%d" % self.adaptor.getUniqueID(t)) 1070 1071 1072 def errorNode(self, t): 1073 self.transmit("errorNode\t%d\t%d\t\"%s" % ( 1074 self.adaptor.getUniqueID(t), 1075 Token.INVALID_TOKEN_TYPE, 1076 self.escapeNewlines(t.toString()))) 1077 1078 1079 1080 def createNode(self, node, token=None): 1081 if token is not None: 1082 self.transmit("createNode\t%d\t%d" % ( 1083 self.adaptor.getUniqueID(node), 1084 token.getTokenIndex())) 1085 1086 else: 1087 self.transmit("createNodeFromTokenElements\t%d\t%d\t\"%s" % ( 1088 self.adaptor.getUniqueID(node), 1089 self.adaptor.getType(node), 1090 self.adaptor.getText(node))) 1091 1092 1093 def becomeRoot(self, newRoot, oldRoot): 1094 self.transmit("becomeRoot\t%d\t%d" % ( 1095 self.adaptor.getUniqueID(newRoot), 1096 self.adaptor.getUniqueID(oldRoot))) 1097 1098 1099 def addChild(self, root, child): 1100 self.transmit("addChild\t%d\t%d" % ( 1101 self.adaptor.getUniqueID(root), 1102 self.adaptor.getUniqueID(child))) 1103 1104 1105 def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): 1106 self.transmit("setTokenBoundaries\t%d\t%d\t%d" % ( 1107 self.adaptor.getUniqueID(t), 1108 tokenStartIndex, tokenStopIndex)) 1109 1110 1111 1112 ## support 1113 1114 def setTreeAdaptor(self, adaptor): 1115 self.adaptor = adaptor 1116 1117 def getTreeAdaptor(self): 1118 return self.adaptor 1119 1120 1121 def serializeToken(self, t): 1122 buf = [str(int(t.getTokenIndex())), 1123 str(int(t.getType())), 1124 str(int(t.getChannel())), 1125 str(int(t.getLine() or 0)), 1126 str(int(t.getCharPositionInLine() or 0)), 1127 '\"' + self.escapeNewlines(t.getText())] 1128 return '\t'.join(buf) 1129 1130 1131 def escapeNewlines(self, txt): 1132 if txt is None: 1133 return '' 1134 1135 txt = txt.replace("%","%25") # escape all escape char ;) 1136 txt = txt.replace("\n","%0A") # escape \n 1137 txt = txt.replace("\r","%0D") # escape \r 1138 return txt 1139