1## This file is part of PyANTLR. See LICENSE.txt for license 2## details..........Copyright (C) Wolfgang Haefelinger, 2004. 3 4## get sys module 5import sys 6 7version = sys.version.split()[0] 8if version < '2.2.1': 9 False = 0 10if version < '2.3': 11 True = not False 12 13###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 14### global symbols ### 15###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 16 17### ANTLR Standard Tokens 18SKIP = -1 19INVALID_TYPE = 0 20EOF_TYPE = 1 21EOF = 1 22NULL_TREE_LOOKAHEAD = 3 23MIN_USER_TYPE = 4 24 25### ANTLR's EOF Symbol 26EOF_CHAR = '' 27 28###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 29### general functions ### 30###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 31 32## Version should be automatically derived from configure.in. For now, 33## we need to bump it ourselfs. Don't remove the <version> tags. 34## <version> 35def version(): 36 r = { 37 'major' : '2', 38 'minor' : '7', 39 'micro' : '5', 40 'patch' : '' , 41 'version': '2.7.5' 42 } 43 return r 44## </version> 45 46def error(fmt,*args): 47 if fmt: 48 print "error: ", fmt % tuple(args) 49 50def ifelse(cond,_then,_else): 51 if cond : 52 r = _then 53 else: 54 r = _else 55 return r 56 57def is_string_type(x): 58 return (isinstance(x,str) or isinstance(x,unicode)) 59 60def assert_string_type(x): 61 assert is_string_type(x) 62 pass 63 64###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 65### ANTLR Exceptions ### 66###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 67 68class ANTLRException(Exception): 69 70 def __init__(self, *args): 71 Exception.__init__(self, *args) 72 73 74class RecognitionException(ANTLRException): 75 76 def __init__(self, *args): 77 ANTLRException.__init__(self, *args) 78 self.fileName = None 79 self.line = -1 80 self.column = -1 81 if len(args) >= 2: 82 self.fileName = args[1] 83 if len(args) >= 3: 84 self.line = args[2] 85 if len(args) >= 4: 86 self.column = args[3] 87 88 def __str__(self): 89 buf = [''] 90 if self.fileName: 91 buf.append(self.fileName + ":") 92 if self.line != -1: 93 if not self.fileName: 94 buf.append("line ") 95 buf.append(str(self.line)) 96 if self.column != -1: 97 buf.append(":" + str(self.column)) 98 buf.append(":") 99 buf.append(" ") 100 return str('').join(buf) 101 102 __repr__ = __str__ 103 104 105class NoViableAltException(RecognitionException): 106 107 def __init__(self, *args): 108 RecognitionException.__init__(self, *args) 109 self.token = None 110 self.node = None 111 if isinstance(args[0],AST): 112 self.node = args[0] 113 elif isinstance(args[0],Token): 114 self.token = args[0] 115 else: 116 raise TypeError("NoViableAltException requires Token or AST argument") 117 118 def __str__(self): 119 if self.token: 120 line = self.token.getLine() 121 col = self.token.getColumn() 122 text = self.token.getText() 123 return "unexpected symbol at line %s (column %s): \"%s\"" % (line,col,text) 124 if self.node == ASTNULL: 125 return "unexpected end of subtree" 126 assert self.node 127 ### hackish, we assume that an AST contains method getText 128 return "unexpected node: %s" % (self.node.getText()) 129 130 __repr__ = __str__ 131 132 133class NoViableAltForCharException(RecognitionException): 134 135 def __init__(self, *args): 136 self.foundChar = None 137 if len(args) == 2: 138 self.foundChar = args[0] 139 scanner = args[1] 140 RecognitionException.__init__(self, "NoViableAlt", 141 scanner.getFilename(), 142 scanner.getLine(), 143 scanner.getColumn()) 144 elif len(args) == 4: 145 self.foundChar = args[0] 146 fileName = args[1] 147 line = args[2] 148 column = args[3] 149 RecognitionException.__init__(self, "NoViableAlt", 150 fileName, line, column) 151 else: 152 RecognitionException.__init__(self, "NoViableAlt", 153 '', -1, -1) 154 155 def __str__(self): 156 mesg = "unexpected char: " 157 if self.foundChar >= ' ' and self.foundChar <= '~': 158 mesg += "'" + self.foundChar + "'" 159 elif self.foundChar: 160 mesg += "0x" + hex(ord(self.foundChar)).upper()[2:] 161 else: 162 mesg += "<None>" 163 return mesg 164 165 __repr__ = __str__ 166 167 168class SemanticException(RecognitionException): 169 170 def __init__(self, *args): 171 RecognitionException.__init__(self, *args) 172 173 174class MismatchedCharException(RecognitionException): 175 176 NONE = 0 177 CHAR = 1 178 NOT_CHAR = 2 179 RANGE = 3 180 NOT_RANGE = 4 181 SET = 5 182 NOT_SET = 6 183 184 def __init__(self, *args): 185 self.args = args 186 if len(args) == 5: 187 # Expected range / not range 188 if args[3]: 189 self.mismatchType = MismatchedCharException.NOT_RANGE 190 else: 191 self.mismatchType = MismatchedCharException.RANGE 192 self.foundChar = args[0] 193 self.expecting = args[1] 194 self.upper = args[2] 195 self.scanner = args[4] 196 RecognitionException.__init__(self, "Mismatched char range", 197 self.scanner.getFilename(), 198 self.scanner.getLine(), 199 self.scanner.getColumn()) 200 elif len(args) == 4 and is_string_type(args[1]): 201 # Expected char / not char 202 if args[2]: 203 self.mismatchType = MismatchedCharException.NOT_CHAR 204 else: 205 self.mismatchType = MismatchedCharException.CHAR 206 self.foundChar = args[0] 207 self.expecting = args[1] 208 self.scanner = args[3] 209 RecognitionException.__init__(self, "Mismatched char", 210 self.scanner.getFilename(), 211 self.scanner.getLine(), 212 self.scanner.getColumn()) 213 elif len(args) == 4 and isinstance(args[1], BitSet): 214 # Expected BitSet / not BitSet 215 if args[2]: 216 self.mismatchType = MismatchedCharException.NOT_SET 217 else: 218 self.mismatchType = MismatchedCharException.SET 219 self.foundChar = args[0] 220 self.set = args[1] 221 self.scanner = args[3] 222 RecognitionException.__init__(self, "Mismatched char set", 223 self.scanner.getFilename(), 224 self.scanner.getLine(), 225 self.scanner.getColumn()) 226 else: 227 self.mismatchType = MismatchedCharException.NONE 228 RecognitionException.__init__(self, "Mismatched char") 229 230 ## Append a char to the msg buffer. If special, 231 # then show escaped version 232 # 233 def appendCharName(self, sb, c): 234 if not c or c == 65535: 235 # 65535 = (char) -1 = EOF 236 sb.append("'<EOF>'") 237 elif c == '\n': 238 sb.append("'\\n'") 239 elif c == '\r': 240 sb.append("'\\r'"); 241 elif c == '\t': 242 sb.append("'\\t'") 243 else: 244 sb.append('\'' + c + '\'') 245 246 ## 247 # Returns an error message with line number/column information 248 # 249 def __str__(self): 250 sb = [''] 251 sb.append(RecognitionException.__str__(self)) 252 253 if self.mismatchType == MismatchedCharException.CHAR: 254 sb.append("expecting ") 255 self.appendCharName(sb, self.expecting) 256 sb.append(", found ") 257 self.appendCharName(sb, self.foundChar) 258 elif self.mismatchType == MismatchedCharException.NOT_CHAR: 259 sb.append("expecting anything but '") 260 self.appendCharName(sb, self.expecting) 261 sb.append("'; got it anyway") 262 elif self.mismatchType in [MismatchedCharException.RANGE, MismatchedCharException.NOT_RANGE]: 263 sb.append("expecting char ") 264 if self.mismatchType == MismatchedCharException.NOT_RANGE: 265 sb.append("NOT ") 266 sb.append("in range: ") 267 appendCharName(sb, self.expecting) 268 sb.append("..") 269 appendCharName(sb, self.upper) 270 sb.append(", found ") 271 appendCharName(sb, self.foundChar) 272 elif self.mismatchType in [MismatchedCharException.SET, MismatchedCharException.NOT_SET]: 273 sb.append("expecting ") 274 if self.mismatchType == MismatchedCharException.NOT_SET: 275 sb.append("NOT ") 276 sb.append("one of (") 277 for i in range(len(self.set)): 278 self.appendCharName(sb, self.set[i]) 279 sb.append("), found ") 280 self.appendCharName(sb, self.foundChar) 281 282 return str().join(sb).strip() 283 284 __repr__ = __str__ 285 286 287class MismatchedTokenException(RecognitionException): 288 289 NONE = 0 290 TOKEN = 1 291 NOT_TOKEN = 2 292 RANGE = 3 293 NOT_RANGE = 4 294 SET = 5 295 NOT_SET = 6 296 297 def __init__(self, *args): 298 self.args = args 299 self.tokenNames = [] 300 self.token = None 301 self.tokenText = '' 302 self.node = None 303 if len(args) == 6: 304 # Expected range / not range 305 if args[3]: 306 self.mismatchType = MismatchedTokenException.NOT_RANGE 307 else: 308 self.mismatchType = MismatchedTokenException.RANGE 309 self.tokenNames = args[0] 310 self.expecting = args[2] 311 self.upper = args[3] 312 self.fileName = args[5] 313 314 elif len(args) == 4 and isinstance(args[2], int): 315 # Expected token / not token 316 if args[3]: 317 self.mismatchType = MismatchedTokenException.NOT_TOKEN 318 else: 319 self.mismatchType = MismatchedTokenException.TOKEN 320 self.tokenNames = args[0] 321 self.expecting = args[2] 322 323 elif len(args) == 4 and isinstance(args[2], BitSet): 324 # Expected BitSet / not BitSet 325 if args[3]: 326 self.mismatchType = MismatchedTokenException.NOT_SET 327 else: 328 self.mismatchType = MismatchedTokenException.SET 329 self.tokenNames = args[0] 330 self.set = args[2] 331 332 else: 333 self.mismatchType = MismatchedTokenException.NONE 334 RecognitionException.__init__(self, "Mismatched Token: expecting any AST node", "<AST>", -1, -1) 335 336 if len(args) >= 2: 337 if isinstance(args[1],Token): 338 self.token = args[1] 339 self.tokenText = self.token.getText() 340 RecognitionException.__init__(self, "Mismatched Token", 341 self.fileName, 342 self.token.getLine(), 343 self.token.getColumn()) 344 elif isinstance(args[1],AST): 345 self.node = args[1] 346 self.tokenText = str(self.node) 347 RecognitionException.__init__(self, "Mismatched Token", 348 "<AST>", 349 self.node.getLine(), 350 self.node.getColumn()) 351 else: 352 self.tokenText = "<empty tree>" 353 RecognitionException.__init__(self, "Mismatched Token", 354 "<AST>", -1, -1) 355 356 def appendTokenName(self, sb, tokenType): 357 if tokenType == INVALID_TYPE: 358 sb.append("<Set of tokens>") 359 elif tokenType < 0 or tokenType >= len(self.tokenNames): 360 sb.append("<" + str(tokenType) + ">") 361 else: 362 sb.append(self.tokenNames[tokenType]) 363 364 ## 365 # Returns an error message with line number/column information 366 # 367 def __str__(self): 368 sb = [''] 369 sb.append(RecognitionException.__str__(self)) 370 371 if self.mismatchType == MismatchedTokenException.TOKEN: 372 sb.append("expecting ") 373 self.appendTokenName(sb, self.expecting) 374 sb.append(", found " + self.tokenText) 375 elif self.mismatchType == MismatchedTokenException.NOT_TOKEN: 376 sb.append("expecting anything but '") 377 self.appendTokenName(sb, self.expecting) 378 sb.append("'; got it anyway") 379 elif self.mismatchType in [MismatchedTokenException.RANGE, MismatchedTokenException.NOT_RANGE]: 380 sb.append("expecting token ") 381 if self.mismatchType == MismatchedTokenException.NOT_RANGE: 382 sb.append("NOT ") 383 sb.append("in range: ") 384 appendTokenName(sb, self.expecting) 385 sb.append("..") 386 appendTokenName(sb, self.upper) 387 sb.append(", found " + self.tokenText) 388 elif self.mismatchType in [MismatchedTokenException.SET, MismatchedTokenException.NOT_SET]: 389 sb.append("expecting ") 390 if self.mismatchType == MismatchedTokenException.NOT_SET: 391 sb.append("NOT ") 392 sb.append("one of (") 393 for i in range(len(self.set)): 394 self.appendTokenName(sb, self.set[i]) 395 sb.append("), found " + self.tokenText) 396 397 return str().join(sb).strip() 398 399 __repr__ = __str__ 400 401 402class TokenStreamException(ANTLRException): 403 404 def __init__(self, *args): 405 ANTLRException.__init__(self, *args) 406 407 408# Wraps an Exception in a TokenStreamException 409class TokenStreamIOException(TokenStreamException): 410 411 def __init__(self, *args): 412 if args and isinstance(args[0], Exception): 413 io = args[0] 414 TokenStreamException.__init__(self, str(io)) 415 self.io = io 416 else: 417 TokenStreamException.__init__(self, *args) 418 self.io = self 419 420 421# Wraps a RecognitionException in a TokenStreamException 422class TokenStreamRecognitionException(TokenStreamException): 423 424 def __init__(self, *args): 425 if args and isinstance(args[0], RecognitionException): 426 recog = args[0] 427 TokenStreamException.__init__(self, str(recog)) 428 self.recog = recog 429 else: 430 raise TypeError("TokenStreamRecognitionException requires RecognitionException argument") 431 432 def __str__(self): 433 return str(self.recog) 434 435 __repr__ = __str__ 436 437 438class TokenStreamRetryException(TokenStreamException): 439 440 def __init__(self, *args): 441 TokenStreamException.__init__(self, *args) 442 443 444class CharStreamException(ANTLRException): 445 446 def __init__(self, *args): 447 ANTLRException.__init__(self, *args) 448 449 450# Wraps an Exception in a CharStreamException 451class CharStreamIOException(CharStreamException): 452 453 def __init__(self, *args): 454 if args and isinstance(args[0], Exception): 455 io = args[0] 456 CharStreamException.__init__(self, str(io)) 457 self.io = io 458 else: 459 CharStreamException.__init__(self, *args) 460 self.io = self 461 462 463class TryAgain(Exception): 464 pass 465 466 467###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 468### Token ### 469###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 470 471class Token(object): 472 SKIP = -1 473 INVALID_TYPE = 0 474 EOF_TYPE = 1 475 EOF = 1 476 NULL_TREE_LOOKAHEAD = 3 477 MIN_USER_TYPE = 4 478 479 def __init__(self,**argv): 480 try: 481 self.type = argv['type'] 482 except: 483 self.type = INVALID_TYPE 484 try: 485 self.text = argv['text'] 486 except: 487 self.text = "<no text>" 488 489 def isEOF(self): 490 return (self.type == EOF_TYPE) 491 492 def getColumn(self): 493 return 0 494 495 def getLine(self): 496 return 0 497 498 def getFilename(self): 499 return None 500 501 def setFilename(self,name): 502 return self 503 504 def getText(self): 505 return "<no text>" 506 507 def setText(self,text): 508 if is_string_type(text): 509 pass 510 else: 511 raise TypeError("Token.setText requires string argument") 512 return self 513 514 def setColumn(self,column): 515 return self 516 517 def setLine(self,line): 518 return self 519 520 def getType(self): 521 return self.type 522 523 def setType(self,type): 524 if isinstance(type,int): 525 self.type = type 526 else: 527 raise TypeError("Token.setType requires integer argument") 528 return self 529 530 def toString(self): 531 ## not optimal 532 type_ = self.type 533 if type_ == 3: 534 tval = 'NULL_TREE_LOOKAHEAD' 535 elif type_ == 1: 536 tval = 'EOF_TYPE' 537 elif type_ == 0: 538 tval = 'INVALID_TYPE' 539 elif type_ == -1: 540 tval = 'SKIP' 541 else: 542 tval = type_ 543 return '["%s",<%s>]' % (self.getText(),tval) 544 545 __str__ = toString 546 __repr__ = toString 547 548### static attribute .. 549Token.badToken = Token( type=INVALID_TYPE, text="<no text>") 550 551if __name__ == "__main__": 552 print "testing .." 553 T = Token.badToken 554 print T 555 556###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 557### CommonToken ### 558###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 559 560class CommonToken(Token): 561 562 def __init__(self,**argv): 563 Token.__init__(self,**argv) 564 self.line = 0 565 self.col = 0 566 try: 567 self.line = argv['line'] 568 except: 569 pass 570 try: 571 self.col = argv['col'] 572 except: 573 pass 574 575 def getLine(self): 576 return self.line 577 578 def getText(self): 579 return self.text 580 581 def getColumn(self): 582 return self.col 583 584 def setLine(self,line): 585 self.line = line 586 return self 587 588 def setText(self,text): 589 self.text = text 590 return self 591 592 def setColumn(self,col): 593 self.col = col 594 return self 595 596 def toString(self): 597 ## not optimal 598 type_ = self.type 599 if type_ == 3: 600 tval = 'NULL_TREE_LOOKAHEAD' 601 elif type_ == 1: 602 tval = 'EOF_TYPE' 603 elif type_ == 0: 604 tval = 'INVALID_TYPE' 605 elif type_ == -1: 606 tval = 'SKIP' 607 else: 608 tval = type_ 609 d = { 610 'text' : self.text, 611 'type' : tval, 612 'line' : self.line, 613 'colm' : self.col 614 } 615 616 fmt = '["%(text)s",<%(type)s>,line=%(line)s,col=%(colm)s]' 617 return fmt % d 618 619 __str__ = toString 620 __repr__ = toString 621 622 623if __name__ == '__main__' : 624 T = CommonToken() 625 print T 626 T = CommonToken(col=15,line=1,text="some text", type=5) 627 print T 628 T = CommonToken() 629 T.setLine(1).setColumn(15).setText("some text").setType(5) 630 print T 631 print T.getLine() 632 print T.getColumn() 633 print T.getText() 634 print T.getType() 635 636###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 637### CommonHiddenStreamToken ### 638###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 639 640class CommonHiddenStreamToken(CommonToken): 641 def __init__(self,*args): 642 CommonToken.__init__(self,*args) 643 self.hiddenBefore = None 644 self.hiddenAfter = None 645 646 def getHiddenAfter(self): 647 return self.hiddenAfter 648 649 def getHiddenBefore(self): 650 return self.hiddenBefore 651 652 def setHiddenAfter(self,t): 653 self.hiddenAfter = t 654 655 def setHiddenBefore(self, t): 656 self.hiddenBefore = t 657 658###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 659### Queue ### 660###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 661 662## Shall be a circular buffer on tokens .. 663class Queue(object): 664 665 def __init__(self): 666 self.buffer = [] # empty list 667 668 def append(self,item): 669 self.buffer.append(item) 670 671 def elementAt(self,index): 672 return self.buffer[index] 673 674 def reset(self): 675 self.buffer = [] 676 677 def removeFirst(self): 678 self.buffer.pop(0) 679 680 def length(self): 681 return len(self.buffer) 682 683 def __str__(self): 684 return str(self.buffer) 685 686###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 687### InputBuffer ### 688###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 689 690class InputBuffer(object): 691 def __init__(self): 692 self.nMarkers = 0 693 self.markerOffset = 0 694 self.numToConsume = 0 695 self.queue = Queue() 696 697 def __str__(self): 698 return "(%s,%s,%s,%s)" % ( 699 self.nMarkers, 700 self.markerOffset, 701 self.numToConsume, 702 self.queue) 703 704 def __repr__(self): 705 return str(self) 706 707 def commit(self): 708 self.nMarkers -= 1 709 710 def consume(self) : 711 self.numToConsume += 1 712 713 ## probably better to return a list of items 714 ## because of unicode. Or return a unicode 715 ## string .. 716 def getLAChars(self) : 717 i = self.markerOffset 718 n = self.queue.length() 719 s = '' 720 while i<n: 721 s += self.queue.elementAt(i) 722 return s 723 724 ## probably better to return a list of items 725 ## because of unicode chars 726 def getMarkedChars(self) : 727 s = '' 728 i = 0 729 n = self.markerOffset 730 while i<n: 731 s += self.queue.elementAt(i) 732 return s 733 734 def isMarked(self) : 735 return self.nMarkers != 0 736 737 def fill(self,k): 738 ### abstract method 739 raise NotImplementedError() 740 741 def LA(self,k) : 742 self.fill(k) 743 return self.queue.elementAt(self.markerOffset + k - 1) 744 745 def mark(self) : 746 self.syncConsume() 747 self.nMarkers += 1 748 return self.markerOffset 749 750 def rewind(self,mark) : 751 self.syncConsume() 752 self.markerOffset = mark 753 self.nMarkers -= 1 754 755 def reset(self) : 756 self.nMarkers = 0 757 self.markerOffset = 0 758 self.numToConsume = 0 759 self.queue.reset() 760 761 def syncConsume(self) : 762 while self.numToConsume > 0: 763 if self.nMarkers > 0: 764 # guess mode -- leave leading characters and bump offset. 765 self.markerOffset += 1 766 else: 767 # normal mode -- remove first character 768 self.queue.removeFirst() 769 self.numToConsume -= 1 770 771###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 772### CharBuffer ### 773###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 774 775class CharBuffer(InputBuffer): 776 def __init__(self,reader): 777 ##assert isinstance(reader,file) 778 super(CharBuffer,self).__init__() 779 ## a reader is supposed to be anything that has 780 ## a method 'read(int)'. 781 self.input = reader 782 783 def __str__(self): 784 base = super(CharBuffer,self).__str__() 785 return "CharBuffer{%s,%s" % (base,str(input)) 786 787 def fill(self,amount): 788 try: 789 self.syncConsume() 790 while self.queue.length() < (amount + self.markerOffset) : 791 ## retrieve just one char - what happend at end 792 ## of input? 793 c = self.input.read(1) 794 ### python's behaviour is to return the empty string on 795 ### EOF, ie. no exception whatsoever is thrown. An empty 796 ### python string has the nice feature that it is of 797 ### type 'str' and "not ''" would return true. Contrary, 798 ### one can't do this: '' in 'abc'. This should return 799 ### false, but all we get is then a TypeError as an 800 ### empty string is not a character. 801 802 ### Let's assure then that we have either seen a 803 ### character or an empty string (EOF). 804 assert len(c) == 0 or len(c) == 1 805 806 ### And it shall be of type string (ASCII or UNICODE). 807 assert is_string_type(c) 808 809 ### Just append EOF char to buffer. Note that buffer may 810 ### contain then just more than one EOF char .. 811 812 ### use unicode chars instead of ASCII .. 813 self.queue.append(c) 814 except Exception,e: 815 raise CharStreamIOException(e) 816 ##except: # (mk) Cannot happen ... 817 ##error ("unexpected exception caught ..") 818 ##assert 0 819 820###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 821### LexerSharedInputState ### 822###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 823 824class LexerSharedInputState(object): 825 def __init__(self,ibuf): 826 assert isinstance(ibuf,InputBuffer) 827 self.input = ibuf 828 self.column = 1 829 self.line = 1 830 self.tokenStartColumn = 1 831 self.tokenStartLine = 1 832 self.guessing = 0 833 self.filename = None 834 835 def reset(self): 836 self.column = 1 837 self.line = 1 838 self.tokenStartColumn = 1 839 self.tokenStartLine = 1 840 self.guessing = 0 841 self.filename = None 842 self.input.reset() 843 844 def LA(self,k): 845 return self.input.LA(k) 846 847###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 848### TokenStream ### 849###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 850 851class TokenStream(object): 852 def nextToken(self): 853 pass 854 855 def __iter__(self): 856 return TokenStreamIterator(self) 857 858###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 859### TokenStreamIterator ### 860###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 861 862class TokenStreamIterator(object): 863 def __init__(self,inst): 864 if isinstance(inst,TokenStream): 865 self.inst = inst 866 return 867 raise TypeError("TokenStreamIterator requires TokenStream object") 868 869 def next(self): 870 assert self.inst 871 item = self.inst.nextToken() 872 if not item or item.isEOF(): 873 raise StopIteration() 874 return item 875 876###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 877### TokenStreamSelector ### 878###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 879 880class TokenStreamSelector(TokenStream): 881 882 def __init__(self): 883 self._input = None 884 self._stmap = {} 885 self._stack = [] 886 887 def addInputStream(self,stream,key): 888 self._stmap[key] = stream 889 890 def getCurrentStream(self): 891 return self._input 892 893 def getStream(self,sname): 894 try: 895 stream = self._stmap[sname] 896 except: 897 raise ValueError("TokenStream " + sname + " not found"); 898 return stream; 899 900 def nextToken(self): 901 while 1: 902 try: 903 return self._input.nextToken() 904 except TokenStreamRetryException,r: 905 ### just retry "forever" 906 pass 907 908 def pop(self): 909 stream = self._stack.pop(); 910 self.select(stream); 911 return stream; 912 913 def push(self,arg): 914 self._stack.append(self._input); 915 self.select(arg) 916 917 def retry(self): 918 raise TokenStreamRetryException() 919 920 def select(self,arg): 921 if isinstance(arg,TokenStream): 922 self._input = arg 923 return 924 if is_string_type(arg): 925 self._input = self.getStream(arg) 926 return 927 raise TypeError("TokenStreamSelector.select requires " + 928 "TokenStream or string argument") 929 930###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 931### TokenStreamBasicFilter ### 932###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 933 934class TokenStreamBasicFilter(TokenStream): 935 936 def __init__(self,input): 937 938 self.input = input; 939 self.discardMask = BitSet() 940 941 def discard(self,arg): 942 if isinstance(arg,int): 943 self.discardMask.add(arg) 944 return 945 if isinstance(arg,BitSet): 946 self.discardMark = arg 947 return 948 raise TypeError("TokenStreamBasicFilter.discard requires" + 949 "integer or BitSet argument") 950 951 def nextToken(self): 952 tok = self.input.nextToken() 953 while tok and self.discardMask.member(tok.getType()): 954 tok = self.input.nextToken() 955 return tok 956 957###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 958### TokenStreamHiddenTokenFilter ### 959###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 960 961class TokenStreamHiddenTokenFilter(TokenStreamBasicFilter): 962 963 def __init__(self,input): 964 TokenStreamBasicFilter.__init__(self,input) 965 self.hideMask = BitSet() 966 self.nextMonitoredToken = None 967 self.lastHiddenToken = None 968 self.firstHidden = None 969 970 def consume(self): 971 self.nextMonitoredToken = self.input.nextToken() 972 973 def consumeFirst(self): 974 self.consume() 975 976 p = None; 977 while self.hideMask.member(self.LA(1).getType()) or \ 978 self.discardMask.member(self.LA(1).getType()): 979 if self.hideMask.member(self.LA(1).getType()): 980 if not p: 981 p = self.LA(1) 982 else: 983 p.setHiddenAfter(self.LA(1)) 984 self.LA(1).setHiddenBefore(p) 985 p = self.LA(1) 986 self.lastHiddenToken = p 987 if not self.firstHidden: 988 self.firstHidden = p 989 self.consume() 990 991 def getDiscardMask(self): 992 return self.discardMask 993 994 def getHiddenAfter(self,t): 995 return t.getHiddenAfter() 996 997 def getHiddenBefore(self,t): 998 return t.getHiddenBefore() 999 1000 def getHideMask(self): 1001 return self.hideMask 1002 1003 def getInitialHiddenToken(self): 1004 return self.firstHidden 1005 1006 def hide(self,m): 1007 if isinstance(m,int): 1008 self.hideMask.add(m) 1009 return 1010 if isinstance(m.BitMask): 1011 self.hideMask = m 1012 return 1013 1014 def LA(self,i): 1015 return self.nextMonitoredToken 1016 1017 def nextToken(self): 1018 if not self.LA(1): 1019 self.consumeFirst() 1020 1021 monitored = self.LA(1) 1022 1023 monitored.setHiddenBefore(self.lastHiddenToken) 1024 self.lastHiddenToken = None 1025 1026 self.consume() 1027 p = monitored 1028 1029 while self.hideMask.member(self.LA(1).getType()) or \ 1030 self.discardMask.member(self.LA(1).getType()): 1031 if self.hideMask.member(self.LA(1).getType()): 1032 p.setHiddenAfter(self.LA(1)) 1033 if p != monitored: 1034 self.LA(1).setHiddenBefore(p) 1035 p = self.lastHiddenToken = self.LA(1) 1036 self.consume() 1037 return monitored 1038 1039###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1040### StringBuffer ### 1041###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1042 1043class StringBuffer: 1044 def __init__(self,string=None): 1045 if string: 1046 self.text = list(string) 1047 else: 1048 self.text = [] 1049 1050 def setLength(self,sz): 1051 if not sz : 1052 self.text = [] 1053 return 1054 assert sz>0 1055 if sz >= self.length(): 1056 return 1057 ### just reset to empty buffer 1058 self.text = self.text[0:sz] 1059 1060 def length(self): 1061 return len(self.text) 1062 1063 def append(self,c): 1064 self.text.append(c) 1065 1066 ### return buffer as string. Arg 'a' is used as index 1067 ## into the buffer and 2nd argument shall be the length. 1068 ## If 2nd args is absent, we return chars till end of 1069 ## buffer starting with 'a'. 1070 def getString(self,a=None,length=None): 1071 if not a : 1072 a = 0 1073 assert a>=0 1074 if a>= len(self.text) : 1075 return "" 1076 1077 if not length: 1078 ## no second argument 1079 L = self.text[a:] 1080 else: 1081 assert (a+length) <= len(self.text) 1082 b = a + length 1083 L = self.text[a:b] 1084 s = "" 1085 for x in L : s += x 1086 return s 1087 1088 toString = getString ## alias 1089 1090 def __str__(self): 1091 return str(self.text) 1092 1093###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1094### Reader ### 1095###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1096 1097## When reading Japanese chars, it happens that a stream returns a 1098## 'char' of length 2. This looks like a bug in the appropriate 1099## codecs - but I'm rather unsure about this. Anyway, if this is 1100## the case, I'm going to split this string into a list of chars 1101## and put them on hold, ie. on a buffer. Next time when called 1102## we read from buffer until buffer is empty. 1103## wh: nov, 25th -> problem does not appear in Python 2.4.0.c1. 1104 1105class Reader(object): 1106 def __init__(self,stream): 1107 self.cin = stream 1108 self.buf = [] 1109 1110 def read(self,num): 1111 assert num==1 1112 1113 if len(self.buf): 1114 return self.buf.pop() 1115 1116 ## Read a char - this may return a string. 1117 ## Is this a bug in codecs/Python? 1118 c = self.cin.read(1) 1119 1120 if not c or len(c)==1: 1121 return c 1122 1123 L = list(c) 1124 L.reverse() 1125 for x in L: 1126 self.buf.append(x) 1127 1128 ## read one char .. 1129 return self.read(1) 1130 1131###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1132### CharScanner ### 1133###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1134 1135class CharScanner(TokenStream): 1136 ## class members 1137 NO_CHAR = 0 1138 EOF_CHAR = '' ### EOF shall be the empty string. 1139 1140 def __init__(self, *argv, **kwargs): 1141 super(CharScanner, self).__init__() 1142 self.saveConsumedInput = True 1143 self.tokenClass = None 1144 self.caseSensitive = True 1145 self.caseSensitiveLiterals = True 1146 self.literals = None 1147 self.tabsize = 8 1148 self._returnToken = None 1149 self.commitToPath = False 1150 self.traceDepth = 0 1151 self.text = StringBuffer() 1152 self.hashString = hash(self) 1153 self.setTokenObjectClass(CommonToken) 1154 self.setInput(*argv) 1155 1156 def __iter__(self): 1157 return CharScannerIterator(self) 1158 1159 def setInput(self,*argv): 1160 ## case 1: 1161 ## if there's no arg we default to read from 1162 ## standard input 1163 if not argv: 1164 import sys 1165 self.setInput(sys.stdin) 1166 return 1167 1168 ## get 1st argument 1169 arg1 = argv[0] 1170 1171 ## case 2: 1172 ## if arg1 is a string, we assume it's a file name 1173 ## and open a stream using 2nd argument as open 1174 ## mode. If there's no 2nd argument we fall back to 1175 ## mode '+rb'. 1176 if is_string_type(arg1): 1177 f = open(arg1,"rb") 1178 self.setInput(f) 1179 self.setFilename(arg1) 1180 return 1181 1182 ## case 3: 1183 ## if arg1 is a file we wrap it by a char buffer ( 1184 ## some additional checks?? No, can't do this in 1185 ## general). 1186 if isinstance(arg1,file): 1187 self.setInput(CharBuffer(arg1)) 1188 return 1189 1190 ## case 4: 1191 ## if arg1 is of type SharedLexerInputState we use 1192 ## argument as is. 1193 if isinstance(arg1,LexerSharedInputState): 1194 self.inputState = arg1 1195 return 1196 1197 ## case 5: 1198 ## check whether argument type is of type input 1199 ## buffer. If so create a SharedLexerInputState and 1200 ## go ahead. 1201 if isinstance(arg1,InputBuffer): 1202 self.setInput(LexerSharedInputState(arg1)) 1203 return 1204 1205 ## case 6: 1206 ## check whether argument type has a method read(int) 1207 ## If so create CharBuffer ... 1208 try: 1209 if arg1.read: 1210 rd = Reader(arg1) 1211 cb = CharBuffer(rd) 1212 ss = LexerSharedInputState(cb) 1213 self.inputState = ss 1214 return 1215 except: 1216 pass 1217 1218 ## case 7: 1219 ## raise wrong argument exception 1220 raise TypeError(argv) 1221 1222 def setTabSize(self,size) : 1223 self.tabsize = size 1224 1225 def getTabSize(self) : 1226 return self.tabsize 1227 1228 def setCaseSensitive(self,t) : 1229 self.caseSensitive = t 1230 1231 def setCommitToPath(self,commit) : 1232 self.commitToPath = commit 1233 1234 def setFilename(self,f) : 1235 self.inputState.filename = f 1236 1237 def setLine(self,line) : 1238 self.inputState.line = line 1239 1240 def setText(self,s) : 1241 self.resetText() 1242 self.text.append(s) 1243 1244 def getCaseSensitive(self) : 1245 return self.caseSensitive 1246 1247 def getCaseSensitiveLiterals(self) : 1248 return self.caseSensitiveLiterals 1249 1250 def getColumn(self) : 1251 return self.inputState.column 1252 1253 def setColumn(self,c) : 1254 self.inputState.column = c 1255 1256 def getCommitToPath(self) : 1257 return self.commitToPath 1258 1259 def getFilename(self) : 1260 return self.inputState.filename 1261 1262 def getInputBuffer(self) : 1263 return self.inputState.input 1264 1265 def getInputState(self) : 1266 return self.inputState 1267 1268 def setInputState(self,state) : 1269 assert isinstance(state,LexerSharedInputState) 1270 self.inputState = state 1271 1272 def getLine(self) : 1273 return self.inputState.line 1274 1275 def getText(self) : 1276 return str(self.text) 1277 1278 def getTokenObject(self) : 1279 return self._returnToken 1280 1281 def LA(self,i) : 1282 c = self.inputState.input.LA(i) 1283 if not self.caseSensitive: 1284 ### E0006 1285 c = c.__class__.lower(c) 1286 return c 1287 1288 def makeToken(self,type) : 1289 try: 1290 ## dynamically load a class 1291 assert self.tokenClass 1292 tok = self.tokenClass() 1293 tok.setType(type) 1294 tok.setColumn(self.inputState.tokenStartColumn) 1295 tok.setLine(self.inputState.tokenStartLine) 1296 return tok 1297 except: 1298 self.panic("unable to create new token") 1299 return Token.badToken 1300 1301 def mark(self) : 1302 return self.inputState.input.mark() 1303 1304 def _match_bitset(self,b) : 1305 if b.member(self.LA(1)): 1306 self.consume() 1307 else: 1308 raise MismatchedCharException(self.LA(1), b, False, self) 1309 1310 def _match_string(self,s) : 1311 for c in s: 1312 if self.LA(1) == c: 1313 self.consume() 1314 else: 1315 raise MismatchedCharException(self.LA(1), c, False, self) 1316 1317 def match(self,item): 1318 if is_string_type(item): 1319 return self._match_string(item) 1320 else: 1321 return self._match_bitset(item) 1322 1323 def matchNot(self,c) : 1324 if self.LA(1) != c: 1325 self.consume() 1326 else: 1327 raise MismatchedCharException(self.LA(1), c, True, self) 1328 1329 def matchRange(self,c1,c2) : 1330 if self.LA(1) < c1 or self.LA(1) > c2 : 1331 raise MismatchedCharException(self.LA(1), c1, c2, False, self) 1332 else: 1333 self.consume() 1334 1335 def newline(self) : 1336 self.inputState.line += 1 1337 self.inputState.column = 1 1338 1339 def tab(self) : 1340 c = self.getColumn() 1341 nc = ( ((c-1)/self.tabsize) + 1) * self.tabsize + 1 1342 self.setColumn(nc) 1343 1344 def panic(self,s='') : 1345 print "CharScanner: panic: " + s 1346 sys.exit(1) 1347 1348 def reportError(self,ex) : 1349 print ex 1350 1351 def reportError(self,s) : 1352 if not self.getFilename(): 1353 print "error: " + str(s) 1354 else: 1355 print self.getFilename() + ": error: " + str(s) 1356 1357 def reportWarning(self,s) : 1358 if not self.getFilename(): 1359 print "warning: " + str(s) 1360 else: 1361 print self.getFilename() + ": warning: " + str(s) 1362 1363 def resetText(self) : 1364 self.text.setLength(0) 1365 self.inputState.tokenStartColumn = self.inputState.column 1366 self.inputState.tokenStartLine = self.inputState.line 1367 1368 def rewind(self,pos) : 1369 self.inputState.input.rewind(pos) 1370 1371 def setTokenObjectClass(self,cl): 1372 self.tokenClass = cl 1373 1374 def testForLiteral(self,token): 1375 if not token: 1376 return 1377 assert isinstance(token,Token) 1378 1379 _type = token.getType() 1380 1381 ## special tokens can't be literals 1382 if _type in [SKIP,INVALID_TYPE,EOF_TYPE,NULL_TREE_LOOKAHEAD] : 1383 return 1384 1385 _text = token.getText() 1386 if not _text: 1387 return 1388 1389 assert is_string_type(_text) 1390 _type = self.testLiteralsTable(_text,_type) 1391 token.setType(_type) 1392 return _type 1393 1394 def testLiteralsTable(self,*args): 1395 if is_string_type(args[0]): 1396 s = args[0] 1397 i = args[1] 1398 else: 1399 s = self.text.getString() 1400 i = args[0] 1401 1402 ## check whether integer has been given 1403 if not isinstance(i,int): 1404 assert isinstance(i,int) 1405 1406 ## check whether we have a dict 1407 assert isinstance(self.literals,dict) 1408 try: 1409 ## E0010 1410 if not self.caseSensitiveLiterals: 1411 s = s.__class__.lower(s) 1412 i = self.literals[s] 1413 except: 1414 pass 1415 return i 1416 1417 def toLower(self,c): 1418 return c.__class__.lower() 1419 1420 def traceIndent(self): 1421 print ' ' * self.traceDepth 1422 1423 def traceIn(self,rname): 1424 self.traceDepth += 1 1425 self.traceIndent() 1426 print "> lexer %s c== %s" % (rname,self.LA(1)) 1427 1428 def traceOut(self,rname): 1429 self.traceIndent() 1430 print "< lexer %s c== %s" % (rname,self.LA(1)) 1431 self.traceDepth -= 1 1432 1433 def uponEOF(self): 1434 pass 1435 1436 def append(self,c): 1437 if self.saveConsumedInput : 1438 self.text.append(c) 1439 1440 def commit(self): 1441 self.inputState.input.commit() 1442 1443 def consume(self): 1444 if not self.inputState.guessing: 1445 c = self.LA(1) 1446 if self.caseSensitive: 1447 self.append(c) 1448 else: 1449 # use input.LA(), not LA(), to get original case 1450 # CharScanner.LA() would toLower it. 1451 c = self.inputState.input.LA(1) 1452 self.append(c) 1453 1454 if c and c in "\t": 1455 self.tab() 1456 else: 1457 self.inputState.column += 1 1458 self.inputState.input.consume() 1459 1460 ## Consume chars until one matches the given char 1461 def consumeUntil_char(self,c): 1462 while self.LA(1) != EOF_CHAR and self.LA(1) != c: 1463 self.consume() 1464 1465 ## Consume chars until one matches the given set 1466 def consumeUntil_bitset(self,bitset): 1467 while self.LA(1) != EOF_CHAR and not self.set.member(self.LA(1)): 1468 self.consume() 1469 1470 ### If symbol seen is EOF then generate and set token, otherwise 1471 ### throw exception. 1472 def default(self,la1): 1473 if not la1 : 1474 self.uponEOF() 1475 self._returnToken = self.makeToken(EOF_TYPE) 1476 else: 1477 self.raise_NoViableAlt(la1) 1478 1479 def filterdefault(self,la1,*args): 1480 if not la1: 1481 self.uponEOF() 1482 self._returnToken = self.makeToken(EOF_TYPE) 1483 return 1484 1485 if not args: 1486 self.consume() 1487 raise TryAgain() 1488 else: 1489 ### apply filter object 1490 self.commit(); 1491 try: 1492 func=args[0] 1493 args=args[1:] 1494 apply(func,args) 1495 except RecognitionException, e: 1496 ## catastrophic failure 1497 self.reportError(e); 1498 self.consume(); 1499 raise TryAgain() 1500 1501 def raise_NoViableAlt(self,la1=None): 1502 if not la1: la1 = self.LA(1) 1503 fname = self.getFilename() 1504 line = self.getLine() 1505 col = self.getColumn() 1506 raise NoViableAltForCharException(la1,fname,line,col) 1507 1508 def set_return_token(self,_create,_token,_ttype,_offset): 1509 if _create and not _token and (not _ttype == SKIP): 1510 string = self.text.getString(_offset) 1511 _token = self.makeToken(_ttype) 1512 _token.setText(string) 1513 self._returnToken = _token 1514 return _token 1515 1516###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1517### CharScannerIterator ### 1518###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1519 1520class CharScannerIterator: 1521 1522 def __init__(self,inst): 1523 if isinstance(inst,CharScanner): 1524 self.inst = inst 1525 return 1526 raise TypeError("CharScannerIterator requires CharScanner object") 1527 1528 def next(self): 1529 assert self.inst 1530 item = self.inst.nextToken() 1531 if not item or item.isEOF(): 1532 raise StopIteration() 1533 return item 1534 1535###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1536### BitSet ### 1537###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1538 1539### I'm assuming here that a long is 64bits. It appears however, that 1540### a long is of any size. That means we can use a single long as the 1541### bitset (!), ie. Python would do almost all the work (TBD). 1542 1543class BitSet(object): 1544 BITS = 64 1545 NIBBLE = 4 1546 LOG_BITS = 6 1547 MOD_MASK = BITS -1 1548 1549 def __init__(self,data=None): 1550 if not data: 1551 BitSet.__init__(self,[long(0)]) 1552 return 1553 if isinstance(data,int): 1554 BitSet.__init__(self,[long(data)]) 1555 return 1556 if isinstance(data,long): 1557 BitSet.__init__(self,[data]) 1558 return 1559 if not isinstance(data,list): 1560 raise TypeError("BitSet requires integer, long, or " + 1561 "list argument") 1562 for x in data: 1563 if not isinstance(x,long): 1564 raise TypeError(self,"List argument item is " + 1565 "not a long: %s" % (x)) 1566 self.data = data 1567 1568 def __str__(self): 1569 bits = len(self.data) * BitSet.BITS 1570 s = "" 1571 for i in xrange(0,bits): 1572 if self.at(i): 1573 s += "1" 1574 else: 1575 s += "o" 1576 if not ((i+1) % 10): 1577 s += '|%s|' % (i+1) 1578 return s 1579 1580 def __repr__(self): 1581 return str(self) 1582 1583 def member(self,item): 1584 if not item: 1585 return False 1586 1587 if isinstance(item,int): 1588 return self.at(item) 1589 1590 if not is_string_type(item): 1591 raise TypeError(self,"char or unichar expected: %s" % (item)) 1592 1593 ## char is a (unicode) string with at most lenght 1, ie. 1594 ## a char. 1595 1596 if len(item) != 1: 1597 raise TypeError(self,"char expected: %s" % (item)) 1598 1599 ### handle ASCII/UNICODE char 1600 num = ord(item) 1601 1602 ### check whether position num is in bitset 1603 return self.at(num) 1604 1605 def wordNumber(self,bit): 1606 return bit >> BitSet.LOG_BITS 1607 1608 def bitMask(self,bit): 1609 pos = bit & BitSet.MOD_MASK ## bit mod BITS 1610 return (1L << pos) 1611 1612 def set(self,bit,on=True): 1613 # grow bitset as required (use with care!) 1614 i = self.wordNumber(bit) 1615 mask = self.bitMask(bit) 1616 if i>=len(self.data): 1617 d = i - len(self.data) + 1 1618 for x in xrange(0,d): 1619 self.data.append(0L) 1620 assert len(self.data) == i+1 1621 if on: 1622 self.data[i] |= mask 1623 else: 1624 self.data[i] &= (~mask) 1625 1626 ### make add an alias for set 1627 add = set 1628 1629 def off(self,bit,off=True): 1630 self.set(bit,not off) 1631 1632 def at(self,bit): 1633 i = self.wordNumber(bit) 1634 v = self.data[i] 1635 m = self.bitMask(bit) 1636 return v & m 1637 1638 1639###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1640### some further funcs ### 1641###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1642 1643def illegalarg_ex(func): 1644 raise ValueError( 1645 "%s is only valid if parser is built for debugging" % 1646 (func.func_name)) 1647 1648def runtime_ex(func): 1649 raise RuntimeException( 1650 "%s is only valid if parser is built for debugging" % 1651 (func.func_name)) 1652 1653###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1654### TokenBuffer ### 1655###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1656 1657class TokenBuffer(object): 1658 def __init__(self,stream): 1659 self.input = stream 1660 self.nMarkers = 0 1661 self.markerOffset = 0 1662 self.numToConsume = 0 1663 self.queue = Queue() 1664 1665 def reset(self) : 1666 self.nMarkers = 0 1667 self.markerOffset = 0 1668 self.numToConsume = 0 1669 self.queue.reset() 1670 1671 def consume(self) : 1672 self.numToConsume += 1 1673 1674 def fill(self, amount): 1675 self.syncConsume() 1676 while self.queue.length() < (amount + self.markerOffset): 1677 self.queue.append(self.input.nextToken()) 1678 1679 def getInput(self): 1680 return self.input 1681 1682 def LA(self,k) : 1683 self.fill(k) 1684 return self.queue.elementAt(self.markerOffset + k - 1).type 1685 1686 def LT(self,k) : 1687 self.fill(k) 1688 return self.queue.elementAt(self.markerOffset + k - 1) 1689 1690 def mark(self) : 1691 self.syncConsume() 1692 self.nMarkers += 1 1693 return self.markerOffset 1694 1695 def rewind(self,mark) : 1696 self.syncConsume() 1697 self.markerOffset = mark 1698 self.nMarkers -= 1 1699 1700 def syncConsume(self) : 1701 while self.numToConsume > 0: 1702 if self.nMarkers > 0: 1703 # guess mode -- leave leading characters and bump offset. 1704 self.markerOffset += 1 1705 else: 1706 # normal mode -- remove first character 1707 self.queue.removeFirst() 1708 self.numToConsume -= 1 1709 1710 def __str__(self): 1711 return "(%s,%s,%s,%s,%s)" % ( 1712 self.input, 1713 self.nMarkers, 1714 self.markerOffset, 1715 self.numToConsume, 1716 self.queue) 1717 1718 def __repr__(self): 1719 return str(self) 1720 1721###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1722### ParserSharedInputState ### 1723###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1724 1725class ParserSharedInputState(object): 1726 1727 def __init__(self): 1728 self.input = None 1729 self.reset() 1730 1731 def reset(self): 1732 self.guessing = 0 1733 self.filename = None 1734 if self.input: 1735 self.input.reset() 1736 1737###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1738### Parser ### 1739###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1740 1741class Parser(object): 1742 1743 def __init__(self, *args, **kwargs): 1744 self.tokenNames = None 1745 self.returnAST = None 1746 self.astFactory = None 1747 self.tokenTypeToASTClassMap = {} 1748 self.ignoreInvalidDebugCalls = False 1749 self.traceDepth = 0 1750 if not args: 1751 self.inputState = ParserSharedInputState() 1752 return 1753 arg0 = args[0] 1754 assert isinstance(arg0,ParserSharedInputState) 1755 self.inputState = arg0 1756 return 1757 1758 def getTokenTypeToASTClassMap(self): 1759 return self.tokenTypeToASTClassMap 1760 1761 1762 def addMessageListener(self, l): 1763 if not self.ignoreInvalidDebugCalls: 1764 illegalarg_ex(addMessageListener) 1765 1766 def addParserListener(self,l) : 1767 if (not self.ignoreInvalidDebugCalls) : 1768 illegalarg_ex(addParserListener) 1769 1770 def addParserMatchListener(self, l) : 1771 if (not self.ignoreInvalidDebugCalls) : 1772 illegalarg_ex(addParserMatchListener) 1773 1774 def addParserTokenListener(self, l) : 1775 if (not self.ignoreInvalidDebugCalls): 1776 illegalarg_ex(addParserTokenListener) 1777 1778 def addSemanticPredicateListener(self, l) : 1779 if (not self.ignoreInvalidDebugCalls): 1780 illegalarg_ex(addSemanticPredicateListener) 1781 1782 def addSyntacticPredicateListener(self, l) : 1783 if (not self.ignoreInvalidDebugCalls): 1784 illegalarg_ex(addSyntacticPredicateListener) 1785 1786 def addTraceListener(self, l) : 1787 if (not self.ignoreInvalidDebugCalls): 1788 illegalarg_ex(addTraceListener) 1789 1790 def consume(self): 1791 raise NotImplementedError() 1792 1793 def _consumeUntil_type(self,tokenType): 1794 while self.LA(1) != EOF_TYPE and self.LA(1) != tokenType: 1795 self.consume() 1796 1797 def _consumeUntil_bitset(self, set): 1798 while self.LA(1) != EOF_TYPE and not set.member(self.LA(1)): 1799 self.consume() 1800 1801 def consumeUntil(self,arg): 1802 if isinstance(arg,int): 1803 self._consumeUntil_type(arg) 1804 else: 1805 self._consumeUntil_bitset(arg) 1806 1807 def defaultDebuggingSetup(self): 1808 pass 1809 1810 def getAST(self) : 1811 return self.returnAST 1812 1813 def getASTFactory(self) : 1814 return self.astFactory 1815 1816 def getFilename(self) : 1817 return self.inputState.filename 1818 1819 def getInputState(self) : 1820 return self.inputState 1821 1822 def setInputState(self, state) : 1823 self.inputState = state 1824 1825 def getTokenName(self,num) : 1826 return self.tokenNames[num] 1827 1828 def getTokenNames(self) : 1829 return self.tokenNames 1830 1831 def isDebugMode(self) : 1832 return self.false 1833 1834 def LA(self, i): 1835 raise NotImplementedError() 1836 1837 def LT(self, i): 1838 raise NotImplementedError() 1839 1840 def mark(self): 1841 return self.inputState.input.mark() 1842 1843 def _match_int(self,t): 1844 if (self.LA(1) != t): 1845 raise MismatchedTokenException( 1846 self.tokenNames, self.LT(1), t, False, self.getFilename()) 1847 else: 1848 self.consume() 1849 1850 def _match_set(self, b): 1851 if (not b.member(self.LA(1))): 1852 raise MismatchedTokenException( 1853 self.tokenNames,self.LT(1), b, False, self.getFilename()) 1854 else: 1855 self.consume() 1856 1857 def match(self,set) : 1858 if isinstance(set,int): 1859 self._match_int(set) 1860 return 1861 if isinstance(set,BitSet): 1862 self._match_set(set) 1863 return 1864 raise TypeError("Parser.match requires integer ot BitSet argument") 1865 1866 def matchNot(self,t): 1867 if self.LA(1) == t: 1868 raise MismatchedTokenException( 1869 tokenNames, self.LT(1), t, True, self.getFilename()) 1870 else: 1871 self.consume() 1872 1873 def removeMessageListener(self, l) : 1874 if (not self.ignoreInvalidDebugCalls): 1875 runtime_ex(removeMessageListener) 1876 1877 def removeParserListener(self, l) : 1878 if (not self.ignoreInvalidDebugCalls): 1879 runtime_ex(removeParserListener) 1880 1881 def removeParserMatchListener(self, l) : 1882 if (not self.ignoreInvalidDebugCalls): 1883 runtime_ex(removeParserMatchListener) 1884 1885 def removeParserTokenListener(self, l) : 1886 if (not self.ignoreInvalidDebugCalls): 1887 runtime_ex(removeParserTokenListener) 1888 1889 def removeSemanticPredicateListener(self, l) : 1890 if (not self.ignoreInvalidDebugCalls): 1891 runtime_ex(removeSemanticPredicateListener) 1892 1893 def removeSyntacticPredicateListener(self, l) : 1894 if (not self.ignoreInvalidDebugCalls): 1895 runtime_ex(removeSyntacticPredicateListener) 1896 1897 def removeTraceListener(self, l) : 1898 if (not self.ignoreInvalidDebugCalls): 1899 runtime_ex(removeTraceListener) 1900 1901 def reportError(self,x) : 1902 fmt = "syntax error:" 1903 f = self.getFilename() 1904 if f: 1905 fmt = ("%s:" % f) + fmt 1906 if isinstance(x,Token): 1907 line = x.getColumn() 1908 col = x.getLine() 1909 text = x.getText() 1910 fmt = fmt + 'unexpected symbol at line %s (column %s) : "%s"' 1911 print >>sys.stderr, fmt % (line,col,text) 1912 else: 1913 print >>sys.stderr, fmt,str(x) 1914 1915 def reportWarning(self,s): 1916 f = self.getFilename() 1917 if f: 1918 print "%s:warning: %s" % (f,str(x)) 1919 else: 1920 print "warning: %s" % (str(x)) 1921 1922 def rewind(self, pos) : 1923 self.inputState.input.rewind(pos) 1924 1925 def setASTFactory(self, f) : 1926 self.astFactory = f 1927 1928 def setASTNodeClass(self, cl) : 1929 self.astFactory.setASTNodeType(cl) 1930 1931 def setASTNodeType(self, nodeType) : 1932 self.setASTNodeClass(nodeType) 1933 1934 def setDebugMode(self, debugMode) : 1935 if (not self.ignoreInvalidDebugCalls): 1936 runtime_ex(setDebugMode) 1937 1938 def setFilename(self, f) : 1939 self.inputState.filename = f 1940 1941 def setIgnoreInvalidDebugCalls(self, value) : 1942 self.ignoreInvalidDebugCalls = value 1943 1944 def setTokenBuffer(self, t) : 1945 self.inputState.input = t 1946 1947 def traceIndent(self): 1948 print " " * self.traceDepth 1949 1950 def traceIn(self,rname): 1951 self.traceDepth += 1 1952 self.trace("> ", rname) 1953 1954 def traceOut(self,rname): 1955 self.trace("< ", rname) 1956 self.traceDepth -= 1 1957 1958 ### wh: moved from ASTFactory to Parser 1959 def addASTChild(self,currentAST, child): 1960 if not child: 1961 return 1962 if not currentAST.root: 1963 currentAST.root = child 1964 elif not currentAST.child: 1965 currentAST.root.setFirstChild(child) 1966 else: 1967 currentAST.child.setNextSibling(child) 1968 currentAST.child = child 1969 currentAST.advanceChildToEnd() 1970 1971 ### wh: moved from ASTFactory to Parser 1972 def makeASTRoot(self,currentAST,root) : 1973 if root: 1974 ### Add the current root as a child of new root 1975 root.addChild(currentAST.root) 1976 ### The new current child is the last sibling of the old root 1977 currentAST.child = currentAST.root 1978 currentAST.advanceChildToEnd() 1979 ### Set the new root 1980 currentAST.root = root 1981 1982###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1983### LLkParser ### 1984###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 1985 1986class LLkParser(Parser): 1987 1988 def __init__(self, *args, **kwargs): 1989 try: 1990 arg1 = args[0] 1991 except: 1992 arg1 = 1 1993 1994 if isinstance(arg1,int): 1995 super(LLkParser,self).__init__() 1996 self.k = arg1 1997 return 1998 1999 if isinstance(arg1,ParserSharedInputState): 2000 super(LLkParser,self).__init__(arg1) 2001 self.set_k(1,*args) 2002 return 2003 2004 if isinstance(arg1,TokenBuffer): 2005 super(LLkParser,self).__init__() 2006 self.setTokenBuffer(arg1) 2007 self.set_k(1,*args) 2008 return 2009 2010 if isinstance(arg1,TokenStream): 2011 super(LLkParser,self).__init__() 2012 tokenBuf = TokenBuffer(arg1) 2013 self.setTokenBuffer(tokenBuf) 2014 self.set_k(1,*args) 2015 return 2016 2017 ### unknown argument 2018 raise TypeError("LLkParser requires integer, " + 2019 "ParserSharedInputStream or TokenStream argument") 2020 2021 def consume(self): 2022 self.inputState.input.consume() 2023 2024 def LA(self,i): 2025 return self.inputState.input.LA(i) 2026 2027 def LT(self,i): 2028 return self.inputState.input.LT(i) 2029 2030 def set_k(self,index,*args): 2031 try: 2032 self.k = args[index] 2033 except: 2034 self.k = 1 2035 2036 def trace(self,ee,rname): 2037 print type(self) 2038 self.traceIndent() 2039 guess = "" 2040 if self.inputState.guessing > 0: 2041 guess = " [guessing]" 2042 print(ee + rname + guess) 2043 for i in xrange(1,self.k+1): 2044 if i != 1: 2045 print(", ") 2046 if self.LT(i) : 2047 v = self.LT(i).getText() 2048 else: 2049 v = "null" 2050 print "LA(%s) == %s" % (i,v) 2051 print("\n") 2052 2053 def traceIn(self,rname): 2054 self.traceDepth += 1; 2055 self.trace("> ", rname); 2056 2057 def traceOut(self,rname): 2058 self.trace("< ", rname); 2059 self.traceDepth -= 1; 2060 2061###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2062### TreeParserSharedInputState ### 2063###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2064 2065class TreeParserSharedInputState(object): 2066 def __init__(self): 2067 self.guessing = 0 2068 2069###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2070### TreeParser ### 2071###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2072 2073class TreeParser(object): 2074 2075 def __init__(self, *args, **kwargs): 2076 self.inputState = TreeParserSharedInputState() 2077 self._retTree = None 2078 self.tokenNames = [] 2079 self.returnAST = None 2080 self.astFactory = ASTFactory() 2081 self.traceDepth = 0 2082 2083 def getAST(self): 2084 return self.returnAST 2085 2086 def getASTFactory(self): 2087 return self.astFactory 2088 2089 def getTokenName(self,num) : 2090 return self.tokenNames[num] 2091 2092 def getTokenNames(self): 2093 return self.tokenNames 2094 2095 def match(self,t,set) : 2096 assert isinstance(set,int) or isinstance(set,BitSet) 2097 if not t or t == ASTNULL: 2098 raise MismatchedTokenException(self.getTokenNames(), t,set, False) 2099 2100 if isinstance(set,int) and t.getType() != set: 2101 raise MismatchedTokenException(self.getTokenNames(), t,set, False) 2102 2103 if isinstance(set,BitSet) and not set.member(t.getType): 2104 raise MismatchedTokenException(self.getTokenNames(), t,set, False) 2105 2106 def matchNot(self,t, ttype) : 2107 if not t or (t == ASTNULL) or (t.getType() == ttype): 2108 raise MismatchedTokenException(getTokenNames(), t, ttype, True) 2109 2110 def reportError(self,ex): 2111 print >>sys.stderr,"error:",ex 2112 2113 def reportWarning(self, s): 2114 print "warning:",s 2115 2116 def setASTFactory(self,f): 2117 self.astFactory = f 2118 2119 def setASTNodeType(self,nodeType): 2120 self.setASTNodeClass(nodeType) 2121 2122 def setASTNodeClass(self,nodeType): 2123 self.astFactory.setASTNodeType(nodeType) 2124 2125 def traceIndent(self): 2126 print " " * self.traceDepth 2127 2128 def traceIn(self,rname,t): 2129 self.traceDepth += 1 2130 self.traceIndent() 2131 print("> " + rname + "(" + 2132 ifelse(t,str(t),"null") + ")" + 2133 ifelse(self.inputState.guessing>0,"[guessing]","")) 2134 2135 def traceOut(self,rname,t): 2136 self.traceIndent() 2137 print("< " + rname + "(" + 2138 ifelse(t,str(t),"null") + ")" + 2139 ifelse(self.inputState.guessing>0,"[guessing]","")) 2140 self.traceDepth -= 1 2141 2142 ### wh: moved from ASTFactory to TreeParser 2143 def addASTChild(self,currentAST, child): 2144 if not child: 2145 return 2146 if not currentAST.root: 2147 currentAST.root = child 2148 elif not currentAST.child: 2149 currentAST.root.setFirstChild(child) 2150 else: 2151 currentAST.child.setNextSibling(child) 2152 currentAST.child = child 2153 currentAST.advanceChildToEnd() 2154 2155 ### wh: moved from ASTFactory to TreeParser 2156 def makeASTRoot(self,currentAST,root): 2157 if root: 2158 ### Add the current root as a child of new root 2159 root.addChild(currentAST.root) 2160 ### The new current child is the last sibling of the old root 2161 currentAST.child = currentAST.root 2162 currentAST.advanceChildToEnd() 2163 ### Set the new root 2164 currentAST.root = root 2165 2166###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2167### funcs to work on trees ### 2168###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2169 2170def rightmost(ast): 2171 if ast: 2172 while(ast.right): 2173 ast = ast.right 2174 return ast 2175 2176def cmptree(s,t,partial): 2177 while(s and t): 2178 ### as a quick optimization, check roots first. 2179 if not s.equals(t): 2180 return False 2181 2182 ### if roots match, do full list match test on children. 2183 if not cmptree(s.getFirstChild(),t.getFirstChild(),partial): 2184 return False 2185 2186 s = s.getNextSibling() 2187 t = t.getNextSibling() 2188 2189 r = ifelse(partial,not t,not s and not t) 2190 return r 2191 2192###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2193### AST ### 2194###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2195 2196class AST(object): 2197 def __init__(self): 2198 pass 2199 2200 def addChild(self, c): 2201 pass 2202 2203 def equals(self, t): 2204 return False 2205 2206 def equalsList(self, t): 2207 return False 2208 2209 def equalsListPartial(self, t): 2210 return False 2211 2212 def equalsTree(self, t): 2213 return False 2214 2215 def equalsTreePartial(self, t): 2216 return False 2217 2218 def findAll(self, tree): 2219 return None 2220 2221 def findAllPartial(self, subtree): 2222 return None 2223 2224 def getFirstChild(self): 2225 return self 2226 2227 def getNextSibling(self): 2228 return self 2229 2230 def getText(self): 2231 return "" 2232 2233 def getType(self): 2234 return INVALID_TYPE 2235 2236 def getLine(self): 2237 return 0 2238 2239 def getColumn(self): 2240 return 0 2241 2242 def getNumberOfChildren(self): 2243 return 0 2244 2245 def initialize(self, t, txt): 2246 pass 2247 2248 def initialize(self, t): 2249 pass 2250 2251 def setFirstChild(self, c): 2252 pass 2253 2254 def setNextSibling(self, n): 2255 pass 2256 2257 def setText(self, text): 2258 pass 2259 2260 def setType(self, ttype): 2261 pass 2262 2263 def toString(self): 2264 self.getText() 2265 2266 __str__ = toString 2267 2268 def toStringList(self): 2269 return self.getText() 2270 2271 def toStringTree(self): 2272 return self.getText() 2273 2274###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2275### ASTNULLType ### 2276###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2277 2278### There is only one instance of this class **/ 2279class ASTNULLType(AST): 2280 def __init__(self): 2281 AST.__init__(self) 2282 pass 2283 2284 def getText(self): 2285 return "<ASTNULL>" 2286 2287 def getType(self): 2288 return NULL_TREE_LOOKAHEAD 2289 2290 2291###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2292### BaseAST ### 2293###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2294 2295class BaseAST(AST): 2296 2297 verboseStringConversion = False 2298 tokenNames = None 2299 2300 def __init__(self): 2301 self.down = None ## kid 2302 self.right = None ## sibling 2303 2304 def addChild(self,node): 2305 if node: 2306 t = rightmost(self.down) 2307 if t: 2308 t.right = node 2309 else: 2310 assert not self.down 2311 self.down = node 2312 2313 def getNumberOfChildren(self): 2314 t = self.down 2315 n = 0 2316 while t: 2317 n += 1 2318 t = t.right 2319 return n 2320 2321 def doWorkForFindAll(self,v,target,partialMatch): 2322 sibling = self 2323 2324 while sibling: 2325 c1 = partialMatch and sibling.equalsTreePartial(target) 2326 if c1: 2327 v.append(sibling) 2328 else: 2329 c2 = not partialMatch and sibling.equalsTree(target) 2330 if c2: 2331 v.append(sibling) 2332 2333 ### regardless of match or not, check any children for matches 2334 if sibling.getFirstChild(): 2335 sibling.getFirstChild().doWorkForFindAll(v,target,partialMatch) 2336 2337 sibling = sibling.getNextSibling() 2338 2339 ### Is node t equal to 'self' in terms of token type and text? 2340 def equals(self,t): 2341 if not t: 2342 return False 2343 return self.getText() == t.getText() and self.getType() == t.getType() 2344 2345 ### Is t an exact structural and equals() match of this tree. The 2346 ### 'self' reference is considered the start of a sibling list. 2347 ### 2348 def equalsList(self, t): 2349 return cmptree(self, t, partial=False) 2350 2351 ### Is 't' a subtree of this list? 2352 ### The siblings of the root are NOT ignored. 2353 ### 2354 def equalsListPartial(self,t): 2355 return cmptree(self,t,partial=True) 2356 2357 ### Is tree rooted at 'self' equal to 't'? The siblings 2358 ### of 'self' are ignored. 2359 ### 2360 def equalsTree(self, t): 2361 return self.equals(t) and \ 2362 cmptree(self.getFirstChild(), t.getFirstChild(), partial=False) 2363 2364 ### Is 't' a subtree of the tree rooted at 'self'? The siblings 2365 ### of 'self' are ignored. 2366 ### 2367 def equalsTreePartial(self, t): 2368 if not t: 2369 return True 2370 return self.equals(t) and cmptree( 2371 self.getFirstChild(), t.getFirstChild(), partial=True) 2372 2373 ### Walk the tree looking for all exact subtree matches. Return 2374 ### an ASTEnumerator that lets the caller walk the list 2375 ### of subtree roots found herein. 2376 def findAll(self,target): 2377 roots = [] 2378 2379 ### the empty tree cannot result in an enumeration 2380 if not target: 2381 return None 2382 # find all matches recursively 2383 self.doWorkForFindAll(roots, target, False) 2384 return roots 2385 2386 ### Walk the tree looking for all subtrees. Return 2387 ### an ASTEnumerator that lets the caller walk the list 2388 ### of subtree roots found herein. 2389 def findAllPartial(self,sub): 2390 roots = [] 2391 2392 ### the empty tree cannot result in an enumeration 2393 if not sub: 2394 return None 2395 2396 self.doWorkForFindAll(roots, sub, True) ### find all matches recursively 2397 return roots 2398 2399 ### Get the first child of this node None if not children 2400 def getFirstChild(self): 2401 return self.down 2402 2403 ### Get the next sibling in line after this one 2404 def getNextSibling(self): 2405 return self.right 2406 2407 ### Get the token text for this node 2408 def getText(self): 2409 return "" 2410 2411 ### Get the token type for this node 2412 def getType(self): 2413 return 0 2414 2415 def getLine(self): 2416 return 0 2417 2418 def getColumn(self): 2419 return 0 2420 2421 ### Remove all children */ 2422 def removeChildren(self): 2423 self.down = None 2424 2425 def setFirstChild(self,c): 2426 self.down = c 2427 2428 def setNextSibling(self, n): 2429 self.right = n 2430 2431 ### Set the token text for this node 2432 def setText(self, text): 2433 pass 2434 2435 ### Set the token type for this node 2436 def setType(self, ttype): 2437 pass 2438 2439 ### static 2440 def setVerboseStringConversion(verbose,names): 2441 verboseStringConversion = verbose 2442 tokenNames = names 2443 setVerboseStringConversion = staticmethod(setVerboseStringConversion) 2444 2445 ### Return an array of strings that maps token ID to it's text. 2446 ## @since 2.7.3 2447 def getTokenNames(): 2448 return tokenNames 2449 2450 def toString(self): 2451 return self.getText() 2452 2453 ### return tree as lisp string - sibling included 2454 def toStringList(self): 2455 ts = self.toStringTree() 2456 sib = self.getNextSibling() 2457 if sib: 2458 ts += sib.toStringList() 2459 return ts 2460 2461 __str__ = toStringList 2462 2463 ### return tree as string - siblings ignored 2464 def toStringTree(self): 2465 ts = "" 2466 kid = self.getFirstChild() 2467 if kid: 2468 ts += " (" 2469 ts += " " + self.toString() 2470 if kid: 2471 ts += kid.toStringList() 2472 ts += " )" 2473 return ts 2474 2475###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2476### CommonAST ### 2477###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2478 2479### Common AST node implementation 2480class CommonAST(BaseAST): 2481 def __init__(self,token=None): 2482 super(CommonAST,self).__init__() 2483 self.ttype = INVALID_TYPE 2484 self.text = "<no text>" 2485 self.line = 0 2486 self.column= 0 2487 self.initialize(token) 2488 #assert self.text 2489 2490 ### Get the token text for this node 2491 def getText(self): 2492 return self.text 2493 2494 ### Get the token type for this node 2495 def getType(self): 2496 return self.ttype 2497 2498 ### Get the line for this node 2499 def getLine(self): 2500 return self.line 2501 2502 ### Get the column for this node 2503 def getColumn(self): 2504 return self.column 2505 2506 def initialize(self,*args): 2507 if not args: 2508 return 2509 2510 arg0 = args[0] 2511 2512 if isinstance(arg0,int): 2513 arg1 = args[1] 2514 self.setType(arg0) 2515 self.setText(arg1) 2516 return 2517 2518 if isinstance(arg0,AST) or isinstance(arg0,Token): 2519 self.setText(arg0.getText()) 2520 self.setType(arg0.getType()) 2521 self.line = arg0.getLine() 2522 self.column = arg0.getColumn() 2523 return 2524 2525 ### Set the token text for this node 2526 def setText(self,text_): 2527 assert is_string_type(text_) 2528 self.text = text_ 2529 2530 ### Set the token type for this node 2531 def setType(self,ttype_): 2532 assert isinstance(ttype_,int) 2533 self.ttype = ttype_ 2534 2535###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2536### CommonASTWithHiddenTokens ### 2537###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2538 2539class CommonASTWithHiddenTokens(CommonAST): 2540 2541 def __init__(self,*args): 2542 CommonAST.__init__(self,*args) 2543 self.hiddenBefore = None 2544 self.hiddenAfter = None 2545 2546 def getHiddenAfter(self): 2547 return self.hiddenAfter 2548 2549 def getHiddenBefore(self): 2550 return self.hiddenBefore 2551 2552 def initialize(self,*args): 2553 CommonAST.initialize(self,*args) 2554 if args and isinstance(args[0],Token): 2555 assert isinstance(args[0],CommonHiddenStreamToken) 2556 self.hiddenBefore = args[0].getHiddenBefore() 2557 self.hiddenAfter = args[0].getHiddenAfter() 2558 2559###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2560### ASTPair ### 2561###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2562 2563class ASTPair(object): 2564 def __init__(self): 2565 self.root = None ### current root of tree 2566 self.child = None ### current child to which siblings are added 2567 2568 ### Make sure that child is the last sibling */ 2569 def advanceChildToEnd(self): 2570 if self.child: 2571 while self.child.getNextSibling(): 2572 self.child = self.child.getNextSibling() 2573 2574 ### Copy an ASTPair. Don't call it clone() because we want type-safety */ 2575 def copy(self): 2576 tmp = ASTPair() 2577 tmp.root = self.root 2578 tmp.child = self.child 2579 return tmp 2580 2581 def toString(self): 2582 r = ifelse(not root,"null",self.root.getText()) 2583 c = ifelse(not child,"null",self.child.getText()) 2584 return "[%s,%s]" % (r,c) 2585 2586 __str__ = toString 2587 __repr__ = toString 2588 2589 2590###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2591### ASTFactory ### 2592###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2593 2594class ASTFactory(object): 2595 def __init__(self,table=None): 2596 self._class = None 2597 self._classmap = ifelse(table,table,None) 2598 2599 def create(self,*args): 2600 if not args: 2601 return self.create(INVALID_TYPE) 2602 2603 arg0 = args[0] 2604 arg1 = None 2605 arg2 = None 2606 2607 try: 2608 arg1 = args[1] 2609 arg2 = args[2] 2610 except: 2611 pass 2612 2613 # ctor(int) 2614 if isinstance(arg0,int) and not arg2: 2615 ### get class for 'self' type 2616 c = self.getASTNodeType(arg0) 2617 t = self.create(c) 2618 if t: 2619 t.initialize(arg0, ifelse(arg1,arg1,"")) 2620 return t 2621 2622 # ctor(int,something) 2623 if isinstance(arg0,int) and arg2: 2624 t = self.create(arg2) 2625 if t: 2626 t.initialize(arg0,arg1) 2627 return t 2628 2629 # ctor(AST) 2630 if isinstance(arg0,AST): 2631 t = self.create(arg0.getType()) 2632 if t: 2633 t.initialize(arg0) 2634 return t 2635 2636 # ctor(token) 2637 if isinstance(arg0,Token) and not arg1: 2638 ttype = arg0.getType() 2639 assert isinstance(ttype,int) 2640 t = self.create(ttype) 2641 if t: 2642 t.initialize(arg0) 2643 return t 2644 2645 # ctor(token,class) 2646 if isinstance(arg0,Token) and arg1: 2647 assert isinstance(arg1,type) 2648 assert issubclass(arg1,AST) 2649 # this creates instance of 'arg1' using 'arg0' as 2650 # argument. Wow, that's magic! 2651 t = arg1(arg0) 2652 assert t and isinstance(t,AST) 2653 return t 2654 2655 # ctor(class) 2656 if isinstance(arg0,type): 2657 ### next statement creates instance of type (!) 2658 t = arg0() 2659 assert isinstance(t,AST) 2660 return t 2661 2662 2663 def setASTNodeClass(self,className=None): 2664 if not className: 2665 return 2666 assert isinstance(className,type) 2667 assert issubclass(className,AST) 2668 self._class = className 2669 2670 ### kind of misnomer - use setASTNodeClass instead. 2671 setASTNodeType = setASTNodeClass 2672 2673 def getASTNodeClass(self): 2674 return self._class 2675 2676 2677 2678 def getTokenTypeToASTClassMap(self): 2679 return self._classmap 2680 2681 def setTokenTypeToASTClassMap(self,amap): 2682 self._classmap = amap 2683 2684 def error(self, e): 2685 import sys 2686 print >> sys.stderr, e 2687 2688 def setTokenTypeASTNodeType(self, tokenType, className): 2689 """ 2690 Specify a mapping between a token type and a (AST) class. 2691 """ 2692 if not self._classmap: 2693 self._classmap = {} 2694 2695 if not className: 2696 try: 2697 del self._classmap[tokenType] 2698 except: 2699 pass 2700 else: 2701 ### here we should also perform actions to ensure that 2702 ### a. class can be loaded 2703 ### b. class is a subclass of AST 2704 ### 2705 assert isinstance(className,type) 2706 assert issubclass(className,AST) ## a & b 2707 ### enter the class 2708 self._classmap[tokenType] = className 2709 2710 def getASTNodeType(self,tokenType): 2711 """ 2712 For a given token type return the AST node type. First we 2713 lookup a mapping table, second we try _class 2714 and finally we resolve to "antlr.CommonAST". 2715 """ 2716 2717 # first 2718 if self._classmap: 2719 try: 2720 c = self._classmap[tokenType] 2721 if c: 2722 return c 2723 except: 2724 pass 2725 # second 2726 if self._class: 2727 return self._class 2728 2729 # default 2730 return CommonAST 2731 2732 ### methods that have been moved to file scope - just listed 2733 ### here to be somewhat consistent with original API 2734 def dup(self,t): 2735 return antlr.dup(t,self) 2736 2737 def dupList(self,t): 2738 return antlr.dupList(t,self) 2739 2740 def dupTree(self,t): 2741 return antlr.dupTree(t,self) 2742 2743 ### methods moved to other classes 2744 ### 1. makeASTRoot -> Parser 2745 ### 2. addASTChild -> Parser 2746 2747 ### non-standard: create alias for longish method name 2748 maptype = setTokenTypeASTNodeType 2749 2750###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2751### ASTVisitor ### 2752###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2753 2754class ASTVisitor(object): 2755 def __init__(self,*args): 2756 pass 2757 2758 def visit(self,ast): 2759 pass 2760 2761###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2762### static methods and variables ### 2763###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### 2764 2765ASTNULL = ASTNULLType() 2766 2767### wh: moved from ASTFactory as there's nothing ASTFactory-specific 2768### in this method. 2769def make(*nodes): 2770 if not nodes: 2771 return None 2772 2773 for i in xrange(0,len(nodes)): 2774 node = nodes[i] 2775 if node: 2776 assert isinstance(node,AST) 2777 2778 root = nodes[0] 2779 tail = None 2780 if root: 2781 root.setFirstChild(None) 2782 2783 for i in xrange(1,len(nodes)): 2784 if not nodes[i]: 2785 continue 2786 if not root: 2787 root = tail = nodes[i] 2788 elif not tail: 2789 root.setFirstChild(nodes[i]) 2790 tail = root.getFirstChild() 2791 else: 2792 tail.setNextSibling(nodes[i]) 2793 tail = tail.getNextSibling() 2794 2795 ### Chase tail to last sibling 2796 while tail.getNextSibling(): 2797 tail = tail.getNextSibling() 2798 return root 2799 2800def dup(t,factory): 2801 if not t: 2802 return None 2803 2804 if factory: 2805 dup_t = factory.create(t.__class__) 2806 else: 2807 raise TypeError("dup function requires ASTFactory argument") 2808 dup_t.initialize(t) 2809 return dup_t 2810 2811def dupList(t,factory): 2812 result = dupTree(t,factory) 2813 nt = result 2814 while t: 2815 ## for each sibling of the root 2816 t = t.getNextSibling() 2817 nt.setNextSibling(dupTree(t,factory)) 2818 nt = nt.getNextSibling() 2819 return result 2820 2821def dupTree(t,factory): 2822 result = dup(t,factory) 2823 if t: 2824 result.setFirstChild(dupList(t.getFirstChild(),factory)) 2825 return result 2826 2827###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 2828### $Id: antlr.py,v 1.1.1.1 2005/02/02 10:24:36 geronimo Exp $ 2829 2830# Local Variables: *** 2831# mode: python *** 2832# py-indent-offset: 4 *** 2833# End: *** 2834