1# module pyparsing.py 2# 3# Copyright (c) 2003-2011 Paul T. McGuire 4# 5# Permission is hereby granted, free of charge, to any person obtaining 6# a copy of this software and associated documentation files (the 7# "Software"), to deal in the Software without restriction, including 8# without limitation the rights to use, copy, modify, merge, publish, 9# distribute, sublicense, and/or sell copies of the Software, and to 10# permit persons to whom the Software is furnished to do so, subject to 11# the following conditions: 12# 13# The above copyright notice and this permission notice shall be 14# included in all copies or substantial portions of the Software. 15# 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23# 24#from __future__ import generators 25 26from __future__ import division, print_function 27from __future__ import absolute_import 28from __future__ import unicode_literals 29 30__doc__ = \ 31""" 32pyparsing module - Classes and methods to define and execute parsing grammars 33 34The pyparsing module is an alternative approach to creating and executing simple grammars, 35vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you 36don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 37provides a library of classes that you use to construct the grammar directly in Python. 38 39Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 40 41 from pyparsing import Word, alphas 42 43 # define grammar of a greeting 44 greet = Word( alphas ) + "," + Word( alphas ) + "!" 45 46 hello = "Hello, World!" 47 print hello, "->", greet.parseString( hello ) 48 49The program outputs the following:: 50 51 Hello, World! -> ['Hello', ',', 'World', '!'] 52 53The Python representation of the grammar is quite readable, owing to the self-explanatory 54class names, and the use of '+', '|' and '^' operators. 55 56The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 57object with named attributes. 58 59The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 60 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) 61 - quoted strings 62 - embedded comments 63""" 64 65__version__ = "1.5.6" 66__versionTime__ = "26 June 2011 10:53" 67__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 68 69import string 70from weakref import ref as wkref 71import copy 72import sys 73import warnings 74import re 75import sre_constants 76#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 77 78__all__ = [ 79'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 80'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 81'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 82'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 83'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 84'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 85'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 86'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 87'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 88'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 89'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 90'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 91'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 92'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 93'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 94'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 95'indentedBlock', 'originalTextFor', 96] 97 98""" 99Detect if we are running version 3.X and make appropriate changes 100Robert A. Clark 101""" 102_PY3K = sys.version_info[0] > 2 103if _PY3K: 104 _MAX_INT = sys.maxsize 105 str = str 106 chr = chr 107 _ustr = str 108 alphas = string.ascii_lowercase + string.ascii_uppercase 109else: 110 _MAX_INT = sys.maxsize 111 range = xrange 112 set = lambda s : dict( [(c,0) for c in s] ) 113 alphas = string.lowercase + string.uppercase 114 115 def _ustr(obj): 116 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 117 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 118 then < returns the unicode object | encodes it with the default encoding | ... >. 119 """ 120 if isinstance(obj,str): 121 return obj 122 123 try: 124 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 125 # it won't break any existing code. 126 return str(obj) 127 128 except UnicodeEncodeError: 129 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 130 # state that "The return value must be a string object". However, does a 131 # unicode object (being a subclass of basestring) count as a "string 132 # object"? 133 # If so, then return a unicode object: 134 return str(obj) 135 # Else encode it... but how? There are many choices... :) 136 # Replace unprintables with escape codes? 137 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 138 # Replace unprintables with question marks? 139 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 140 # ... 141 142 alphas = string.lowercase + string.uppercase 143 144# build list of single arg builtins, tolerant of Python version, that can be used as parse actions 145singleArgBuiltins = [] 146import builtins 147for fname in "sum len enumerate sorted reversed list tuple set any all".split(): 148 try: 149 singleArgBuiltins.append(getattr(__builtin__,fname)) 150 except AttributeError: 151 continue 152 153def _xml_escape(data): 154 """Escape &, <, >, ", ', etc. in a string of data.""" 155 156 # ampersand must be replaced first 157 from_symbols = '&><"\'' 158 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] 159 for from_,to_ in zip(from_symbols, to_symbols): 160 data = data.replace(from_, to_) 161 return data 162 163class _Constants(object): 164 pass 165 166nums = string.digits 167hexnums = nums + "ABCDEFabcdef" 168alphanums = alphas + nums 169_bslash = chr(92) 170printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 171 172class ParseBaseException(Exception): 173 """base exception class for all parsing runtime exceptions""" 174 # Performance tuning: we construct a *lot* of these, so keep this 175 # constructor as small and fast as possible 176 def __init__( self, pstr, loc=0, msg=None, elem=None ): 177 self.loc = loc 178 if msg is None: 179 self.msg = pstr 180 self.pstr = "" 181 else: 182 self.msg = msg 183 self.pstr = pstr 184 self.parserElement = elem 185 186 def __getattr__( self, aname ): 187 """supported attributes by name are: 188 - lineno - returns the line number of the exception text 189 - col - returns the column number of the exception text 190 - line - returns the line containing the exception text 191 """ 192 if( aname == "lineno" ): 193 return lineno( self.loc, self.pstr ) 194 elif( aname in ("col", "column") ): 195 return col( self.loc, self.pstr ) 196 elif( aname == "line" ): 197 return line( self.loc, self.pstr ) 198 else: 199 raise AttributeError(aname) 200 201 def __str__( self ): 202 return "%s (at char %d), (line:%d, col:%d)" % \ 203 ( self.msg, self.loc, self.lineno, self.column ) 204 def __repr__( self ): 205 return _ustr(self) 206 def markInputline( self, markerString = ">!<" ): 207 """Extracts the exception line from the input string, and marks 208 the location of the exception with a special symbol. 209 """ 210 line_str = self.line 211 line_column = self.column - 1 212 if markerString: 213 line_str = "".join( [line_str[:line_column], 214 markerString, line_str[line_column:]]) 215 return line_str.strip() 216 def __dir__(self): 217 return "loc msg pstr parserElement lineno col line " \ 218 "markInputLine __str__ __repr__".split() 219 220class ParseException(ParseBaseException): 221 """exception thrown when parse expressions don't match class; 222 supported attributes by name are: 223 - lineno - returns the line number of the exception text 224 - col - returns the column number of the exception text 225 - line - returns the line containing the exception text 226 """ 227 pass 228 229class ParseFatalException(ParseBaseException): 230 """user-throwable exception thrown when inconsistent parse content 231 is found; stops all parsing immediately""" 232 pass 233 234class ParseSyntaxException(ParseFatalException): 235 """just like C{ParseFatalException}, but thrown internally when an 236 C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because 237 an unbacktrackable syntax error has been found""" 238 def __init__(self, pe): 239 super(ParseSyntaxException, self).__init__( 240 pe.pstr, pe.loc, pe.msg, pe.parserElement) 241 242#~ class ReparseException(ParseBaseException): 243 #~ """Experimental class - parse actions can raise this exception to cause 244 #~ pyparsing to reparse the input string: 245 #~ - with a modified input string, and/or 246 #~ - with a modified start location 247 #~ Set the values of the ReparseException in the constructor, and raise the 248 #~ exception in a parse action to cause pyparsing to use the new string/location. 249 #~ Setting the values as None causes no change to be made. 250 #~ """ 251 #~ def __init_( self, newstring, restartLoc ): 252 #~ self.newParseText = newstring 253 #~ self.reparseLoc = restartLoc 254 255class RecursiveGrammarException(Exception): 256 """exception thrown by C{validate()} if the grammar could be improperly recursive""" 257 def __init__( self, parseElementList ): 258 self.parseElementTrace = parseElementList 259 260 def __str__( self ): 261 return "RecursiveGrammarException: %s" % self.parseElementTrace 262 263class _ParseResultsWithOffset(object): 264 def __init__(self,p1,p2): 265 self.tup = (p1,p2) 266 def __getitem__(self,i): 267 return self.tup[i] 268 def __repr__(self): 269 return repr(self.tup) 270 def setOffset(self,i): 271 self.tup = (self.tup[0],i) 272 273class ParseResults(object): 274 """Structured parse results, to provide multiple means of access to the parsed data: 275 - as a list (C{len(results)}) 276 - by list index (C{results[0], results[1]}, etc.) 277 - by attribute (C{results.<resultsName>}) 278 """ 279 #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) 280 def __new__(cls, toklist, name=None, asList=True, modal=True ): 281 if isinstance(toklist, cls): 282 return toklist 283 retobj = object.__new__(cls) 284 retobj.__doinit = True 285 return retobj 286 287 # Performance tuning: we construct a *lot* of these, so keep this 288 # constructor as small and fast as possible 289 def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ): 290 if self.__doinit: 291 self.__doinit = False 292 self.__name = None 293 self.__parent = None 294 self.__accumNames = {} 295 if isinstance(toklist, list): 296 self.__toklist = toklist[:] 297 else: 298 self.__toklist = [toklist] 299 self.__tokdict = dict() 300 301 if name is not None and name: 302 if not modal: 303 self.__accumNames[name] = 0 304 if isinstance(name,int): 305 name = _ustr(name) # will always return a str, but use _ustr for consistency 306 self.__name = name 307 if not toklist in (None,'',[]): 308 if isinstance(toklist,str): 309 toklist = [ toklist ] 310 if asList: 311 if isinstance(toklist,ParseResults): 312 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 313 else: 314 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 315 self[name].__name = name 316 else: 317 try: 318 self[name] = toklist[0] 319 except (KeyError,TypeError,IndexError): 320 self[name] = toklist 321 322 def __getitem__( self, i ): 323 if isinstance( i, (int,slice) ): 324 return self.__toklist[i] 325 else: 326 if i not in self.__accumNames: 327 return self.__tokdict[i][-1][0] 328 else: 329 return ParseResults([ v[0] for v in self.__tokdict[i] ]) 330 331 def __setitem__( self, k, v, isinstance=isinstance ): 332 if isinstance(v,_ParseResultsWithOffset): 333 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 334 sub = v[0] 335 elif isinstance(k,int): 336 self.__toklist[k] = v 337 sub = v 338 else: 339 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 340 sub = v 341 if isinstance(sub,ParseResults): 342 sub.__parent = wkref(self) 343 344 def __delitem__( self, i ): 345 if isinstance(i,(int,slice)): 346 mylen = len( self.__toklist ) 347 del self.__toklist[i] 348 349 # convert int to slice 350 if isinstance(i, int): 351 if i < 0: 352 i += mylen 353 i = slice(i, i+1) 354 # get removed indices 355 removed = list(range(*i.indices(mylen))) 356 removed.reverse() 357 # fixup indices in token dictionary 358 for name in self.__tokdict: 359 occurrences = self.__tokdict[name] 360 for j in removed: 361 for k, (value, position) in enumerate(occurrences): 362 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 363 else: 364 del self.__tokdict[i] 365 366 def __contains__( self, k ): 367 return k in self.__tokdict 368 369 def __len__( self ): return len( self.__toklist ) 370 def __bool__(self): return len( self.__toklist ) > 0 371 __nonzero__ = __bool__ 372 def __iter__( self ): return iter( self.__toklist ) 373 def __reversed__( self ): return iter( self.__toklist[::-1] ) 374 def keys( self ): 375 """Returns all named result keys.""" 376 return list(self.__tokdict.keys()) 377 378 def pop( self, index=-1 ): 379 """Removes and returns item at specified index (default=last). 380 Will work with either numeric indices or dict-key indicies.""" 381 ret = self[index] 382 del self[index] 383 return ret 384 385 def get(self, key, defaultValue=None): 386 """Returns named result matching the given key, or if there is no 387 such name, then returns the given C{defaultValue} or C{None} if no 388 C{defaultValue} is specified.""" 389 if key in self: 390 return self[key] 391 else: 392 return defaultValue 393 394 def insert( self, index, insStr ): 395 """Inserts new element at location index in the list of parsed tokens.""" 396 self.__toklist.insert(index, insStr) 397 # fixup indices in token dictionary 398 for name in self.__tokdict: 399 occurrences = self.__tokdict[name] 400 for k, (value, position) in enumerate(occurrences): 401 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 402 403 def items( self ): 404 """Returns all named result keys and values as a list of tuples.""" 405 return [(k,self[k]) for k in self.__tokdict] 406 407 def values( self ): 408 """Returns all named result values.""" 409 return [ v[-1][0] for v in list(self.__tokdict.values()) ] 410 411 def __getattr__( self, name ): 412 if True: #name not in self.__slots__: 413 if name in self.__tokdict: 414 if name not in self.__accumNames: 415 return self.__tokdict[name][-1][0] 416 else: 417 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 418 else: 419 return "" 420 return None 421 422 def __add__( self, other ): 423 ret = self.copy() 424 ret += other 425 return ret 426 427 def __iadd__( self, other ): 428 if other.__tokdict: 429 offset = len(self.__toklist) 430 addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 431 otheritems = list(other.__tokdict.items()) 432 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 433 for (k,vlist) in otheritems for v in vlist] 434 for k,v in otherdictitems: 435 self[k] = v 436 if isinstance(v[0],ParseResults): 437 v[0].__parent = wkref(self) 438 439 self.__toklist += other.__toklist 440 self.__accumNames.update( other.__accumNames ) 441 return self 442 443 def __radd__(self, other): 444 if isinstance(other,int) and other == 0: 445 return self.copy() 446 447 def __repr__( self ): 448 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) 449 450 def __str__( self ): 451 out = "[" 452 sep = "" 453 for i in self.__toklist: 454 if isinstance(i, ParseResults): 455 out += sep + _ustr(i) 456 else: 457 out += sep + repr(i) 458 sep = ", " 459 out += "]" 460 return out 461 462 def _asStringList( self, sep='' ): 463 out = [] 464 for item in self.__toklist: 465 if out and sep: 466 out.append(sep) 467 if isinstance( item, ParseResults ): 468 out += item._asStringList() 469 else: 470 out.append( _ustr(item) ) 471 return out 472 473 def asList( self ): 474 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 475 out = [] 476 for res in self.__toklist: 477 if isinstance(res,ParseResults): 478 out.append( res.asList() ) 479 else: 480 out.append( res ) 481 return out 482 483 def asDict( self ): 484 """Returns the named parse results as dictionary.""" 485 return dict( list(self.items()) ) 486 487 def copy( self ): 488 """Returns a new copy of a C{ParseResults} object.""" 489 ret = ParseResults( self.__toklist ) 490 ret.__tokdict = self.__tokdict.copy() 491 ret.__parent = self.__parent 492 ret.__accumNames.update( self.__accumNames ) 493 ret.__name = self.__name 494 return ret 495 496 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): 497 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 498 nl = "\n" 499 out = [] 500 namedItems = dict( [ (v[1],k) for (k,vlist) in list(self.__tokdict.items()) 501 for v in vlist ] ) 502 nextLevelIndent = indent + " " 503 504 # collapse out indents if formatting is not desired 505 if not formatted: 506 indent = "" 507 nextLevelIndent = "" 508 nl = "" 509 510 selfTag = None 511 if doctag is not None: 512 selfTag = doctag 513 else: 514 if self.__name: 515 selfTag = self.__name 516 517 if not selfTag: 518 if namedItemsOnly: 519 return "" 520 else: 521 selfTag = "ITEM" 522 523 out += [ nl, indent, "<", selfTag, ">" ] 524 525 worklist = self.__toklist 526 for i,res in enumerate(worklist): 527 if isinstance(res,ParseResults): 528 if i in namedItems: 529 out += [ res.asXML(namedItems[i], 530 namedItemsOnly and doctag is None, 531 nextLevelIndent, 532 formatted)] 533 else: 534 out += [ res.asXML(None, 535 namedItemsOnly and doctag is None, 536 nextLevelIndent, 537 formatted)] 538 else: 539 # individual token, see if there is a name for it 540 resTag = None 541 if i in namedItems: 542 resTag = namedItems[i] 543 if not resTag: 544 if namedItemsOnly: 545 continue 546 else: 547 resTag = "ITEM" 548 xmlBodyText = _xml_escape(_ustr(res)) 549 out += [ nl, nextLevelIndent, "<", resTag, ">", 550 xmlBodyText, 551 "</", resTag, ">" ] 552 553 out += [ nl, indent, "</", selfTag, ">" ] 554 return "".join(out) 555 556 def __lookup(self,sub): 557 for k,vlist in list(self.__tokdict.items()): 558 for v,loc in vlist: 559 if sub is v: 560 return k 561 return None 562 563 def getName(self): 564 """Returns the results name for this token expression.""" 565 if self.__name: 566 return self.__name 567 elif self.__parent: 568 par = self.__parent() 569 if par: 570 return par.__lookup(self) 571 else: 572 return None 573 elif (len(self) == 1 and 574 len(self.__tokdict) == 1 and 575 list(self.__tokdict.values())[0][0][1] in (0,-1)): 576 return list(self.__tokdict.keys())[0] 577 else: 578 return None 579 580 def dump(self,indent='',depth=0): 581 """Diagnostic method for listing out the contents of a C{ParseResults}. 582 Accepts an optional C{indent} argument so that this string can be embedded 583 in a nested display of other data.""" 584 out = [] 585 out.append( indent+_ustr(self.asList()) ) 586 keys = list(self.items()) 587 keys.sort() 588 for k,v in keys: 589 if out: 590 out.append('\n') 591 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 592 if isinstance(v,ParseResults): 593 if list(v.keys()): 594 out.append( v.dump(indent,depth+1) ) 595 else: 596 out.append(_ustr(v)) 597 else: 598 out.append(_ustr(v)) 599 return "".join(out) 600 601 # add support for pickle protocol 602 def __getstate__(self): 603 return ( self.__toklist, 604 ( self.__tokdict.copy(), 605 self.__parent is not None and self.__parent() or None, 606 self.__accumNames, 607 self.__name ) ) 608 609 def __setstate__(self,state): 610 self.__toklist = state[0] 611 (self.__tokdict, 612 par, 613 inAccumNames, 614 self.__name) = state[1] 615 self.__accumNames = {} 616 self.__accumNames.update(inAccumNames) 617 if par is not None: 618 self.__parent = wkref(par) 619 else: 620 self.__parent = None 621 622 def __dir__(self): 623 return dir(super(ParseResults,self)) + list(self.keys()) 624 625def col (loc,strg): 626 """Returns current column within a string, counting newlines as line separators. 627 The first column is number 1. 628 629 Note: the default parsing behavior is to expand tabs in the input string 630 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 631 on parsing strings containing <TAB>s, and suggested methods to maintain a 632 consistent view of the parsed string, the parse location, and line and column 633 positions within the parsed string. 634 """ 635 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) 636 637def lineno(loc,strg): 638 """Returns current line number within a string, counting newlines as line separators. 639 The first line is number 1. 640 641 Note: the default parsing behavior is to expand tabs in the input string 642 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 643 on parsing strings containing <TAB>s, and suggested methods to maintain a 644 consistent view of the parsed string, the parse location, and line and column 645 positions within the parsed string. 646 """ 647 return strg.count("\n",0,loc) + 1 648 649def line( loc, strg ): 650 """Returns the line of text containing loc within a string, counting newlines as line separators. 651 """ 652 lastCR = strg.rfind("\n", 0, loc) 653 nextCR = strg.find("\n", loc) 654 if nextCR >= 0: 655 return strg[lastCR+1:nextCR] 656 else: 657 return strg[lastCR+1:] 658 659def _defaultStartDebugAction( instring, loc, expr ): 660 print(("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))) 661 662def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): 663 print(("Matched " + _ustr(expr) + " -> " + str(toks.asList()))) 664 665def _defaultExceptionDebugAction( instring, loc, expr, exc ): 666 print(("Exception raised:" + _ustr(exc))) 667 668def nullDebugAction(*args): 669 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 670 pass 671 672'decorator to trim function calls to match the arity of the target' 673if not _PY3K: 674 def _trim_arity(func, maxargs=2): 675 limit = [0] 676 def wrapper(*args): 677 while 1: 678 try: 679 return func(*args[limit[0]:]) 680 except TypeError: 681 if limit[0] <= maxargs: 682 limit[0] += 1 683 continue 684 raise 685 return wrapper 686else: 687 def _trim_arity(func, maxargs=2): 688 limit = maxargs 689 def wrapper(*args): 690 #~ nonlocal limit 691 while 1: 692 try: 693 return func(*args[limit:]) 694 except TypeError: 695 if limit: 696 limit -= 1 697 continue 698 raise 699 return wrapper 700 701class ParserElement(object): 702 """Abstract base level parser element class.""" 703 DEFAULT_WHITE_CHARS = " \n\t\r" 704 verbose_stacktrace = False 705 706 def setDefaultWhitespaceChars( chars ): 707 """Overrides the default whitespace chars 708 """ 709 ParserElement.DEFAULT_WHITE_CHARS = chars 710 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 711 712 def __init__( self, savelist=False ): 713 self.parseAction = list() 714 self.failAction = None 715 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 716 self.strRepr = None 717 self.resultsName = None 718 self.saveAsList = savelist 719 self.skipWhitespace = True 720 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 721 self.copyDefaultWhiteChars = True 722 self.mayReturnEmpty = False # used when checking for left-recursion 723 self.keepTabs = False 724 self.ignoreExprs = list() 725 self.debug = False 726 self.streamlined = False 727 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 728 self.errmsg = "" 729 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 730 self.debugActions = ( None, None, None ) #custom debug actions 731 self.re = None 732 self.callPreparse = True # used to avoid redundant calls to preParse 733 self.callDuringTry = False 734 735 def copy( self ): 736 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 737 for the same parsing pattern, using copies of the original parse element.""" 738 cpy = copy.copy( self ) 739 cpy.parseAction = self.parseAction[:] 740 cpy.ignoreExprs = self.ignoreExprs[:] 741 if self.copyDefaultWhiteChars: 742 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 743 return cpy 744 745 def setName( self, name ): 746 """Define name for this expression, for use in debugging.""" 747 self.name = name 748 self.errmsg = "Expected " + self.name 749 if hasattr(self,"exception"): 750 self.exception.msg = self.errmsg 751 return self 752 753 def setResultsName( self, name, listAllMatches=False ): 754 """Define name for referencing matching tokens as a nested attribute 755 of the returned parse results. 756 NOTE: this returns a *copy* of the original C{ParserElement} object; 757 this is so that the client can define a basic element, such as an 758 integer, and reference it in multiple places with different names. 759 760 You can also set results names using the abbreviated syntax, 761 C{expr("name")} in place of C{expr.setResultsName("name")} - 762 see L{I{__call__}<__call__>}. 763 """ 764 newself = self.copy() 765 if name.endswith("*"): 766 name = name[:-1] 767 listAllMatches=True 768 newself.resultsName = name 769 newself.modalResults = not listAllMatches 770 return newself 771 772 def setBreak(self,breakFlag = True): 773 """Method to invoke the Python pdb debugger when this element is 774 about to be parsed. Set C{breakFlag} to True to enable, False to 775 disable. 776 """ 777 if breakFlag: 778 _parseMethod = self._parse 779 def breaker(instring, loc, doActions=True, callPreParse=True): 780 import pdb 781 pdb.set_trace() 782 return _parseMethod( instring, loc, doActions, callPreParse ) 783 breaker._originalParseMethod = _parseMethod 784 self._parse = breaker 785 else: 786 if hasattr(self._parse,"_originalParseMethod"): 787 self._parse = self._parse._originalParseMethod 788 return self 789 790 def setParseAction( self, *fns, **kwargs ): 791 """Define action to perform when successfully matching parse element definition. 792 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 793 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 794 - s = the original string being parsed (see note below) 795 - loc = the location of the matching substring 796 - toks = a list of the matched tokens, packaged as a ParseResults object 797 If the functions in fns modify the tokens, they can return them as the return 798 value from fn, and the modified list of tokens will replace the original. 799 Otherwise, fn does not need to return any value. 800 801 Note: the default parsing behavior is to expand tabs in the input string 802 before starting the parsing process. See L{I{parseString}<parseString>} for more information 803 on parsing strings containing <TAB>s, and suggested methods to maintain a 804 consistent view of the parsed string, the parse location, and line and column 805 positions within the parsed string. 806 """ 807 self.parseAction = list(map(_trim_arity, list(fns))) 808 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 809 return self 810 811 def addParseAction( self, *fns, **kwargs ): 812 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 813 self.parseAction += list(map(_trim_arity, list(fns))) 814 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 815 return self 816 817 def setFailAction( self, fn ): 818 """Define action to perform if parsing fails at this expression. 819 Fail acton fn is a callable function that takes the arguments 820 C{fn(s,loc,expr,err)} where: 821 - s = string being parsed 822 - loc = location where expression match was attempted and failed 823 - expr = the parse expression that failed 824 - err = the exception thrown 825 The function returns no value. It may throw C{ParseFatalException} 826 if it is desired to stop parsing immediately.""" 827 self.failAction = fn 828 return self 829 830 def _skipIgnorables( self, instring, loc ): 831 exprsFound = True 832 while exprsFound: 833 exprsFound = False 834 for e in self.ignoreExprs: 835 try: 836 while 1: 837 loc,dummy = e._parse( instring, loc ) 838 exprsFound = True 839 except ParseException: 840 pass 841 return loc 842 843 def preParse( self, instring, loc ): 844 if self.ignoreExprs: 845 loc = self._skipIgnorables( instring, loc ) 846 847 if self.skipWhitespace: 848 wt = self.whiteChars 849 instrlen = len(instring) 850 while loc < instrlen and instring[loc] in wt: 851 loc += 1 852 853 return loc 854 855 def parseImpl( self, instring, loc, doActions=True ): 856 return loc, [] 857 858 def postParse( self, instring, loc, tokenlist ): 859 return tokenlist 860 861 #~ @profile 862 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): 863 debugging = ( self.debug ) #and doActions ) 864 865 if debugging or self.failAction: 866 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 867 if (self.debugActions[0] ): 868 self.debugActions[0]( instring, loc, self ) 869 if callPreParse and self.callPreparse: 870 preloc = self.preParse( instring, loc ) 871 else: 872 preloc = loc 873 tokensStart = preloc 874 try: 875 try: 876 loc,tokens = self.parseImpl( instring, preloc, doActions ) 877 except IndexError: 878 raise ParseException( instring, len(instring), self.errmsg, self ) 879 except ParseBaseException: 880 #~ print ("Exception raised:", err) 881 err = None 882 if self.debugActions[2]: 883 err = sys.exc_info()[1] 884 self.debugActions[2]( instring, tokensStart, self, err ) 885 if self.failAction: 886 if err is None: 887 err = sys.exc_info()[1] 888 self.failAction( instring, tokensStart, self, err ) 889 raise 890 else: 891 if callPreParse and self.callPreparse: 892 preloc = self.preParse( instring, loc ) 893 else: 894 preloc = loc 895 tokensStart = preloc 896 if self.mayIndexError or loc >= len(instring): 897 try: 898 loc,tokens = self.parseImpl( instring, preloc, doActions ) 899 except IndexError: 900 raise ParseException( instring, len(instring), self.errmsg, self ) 901 else: 902 loc,tokens = self.parseImpl( instring, preloc, doActions ) 903 904 tokens = self.postParse( instring, loc, tokens ) 905 906 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 907 if self.parseAction and (doActions or self.callDuringTry): 908 if debugging: 909 try: 910 for fn in self.parseAction: 911 tokens = fn( instring, tokensStart, retTokens ) 912 if tokens is not None: 913 retTokens = ParseResults( tokens, 914 self.resultsName, 915 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 916 modal=self.modalResults ) 917 except ParseBaseException: 918 #~ print "Exception raised in user parse action:", err 919 if (self.debugActions[2] ): 920 err = sys.exc_info()[1] 921 self.debugActions[2]( instring, tokensStart, self, err ) 922 raise 923 else: 924 for fn in self.parseAction: 925 tokens = fn( instring, tokensStart, retTokens ) 926 if tokens is not None: 927 retTokens = ParseResults( tokens, 928 self.resultsName, 929 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 930 modal=self.modalResults ) 931 932 if debugging: 933 #~ print ("Matched",self,"->",retTokens.asList()) 934 if (self.debugActions[1] ): 935 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 936 937 return loc, retTokens 938 939 def tryParse( self, instring, loc ): 940 try: 941 return self._parse( instring, loc, doActions=False )[0] 942 except ParseFatalException: 943 raise ParseException( instring, loc, self.errmsg, self) 944 945 # this method gets repeatedly called during backtracking with the same arguments - 946 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 947 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): 948 lookup = (self,instring,loc,callPreParse,doActions) 949 if lookup in ParserElement._exprArgCache: 950 value = ParserElement._exprArgCache[ lookup ] 951 if isinstance(value, Exception): 952 raise value 953 return (value[0],value[1].copy()) 954 else: 955 try: 956 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 957 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 958 return value 959 except ParseBaseException: 960 pe = sys.exc_info()[1] 961 ParserElement._exprArgCache[ lookup ] = pe 962 raise 963 964 _parse = _parseNoCache 965 966 # argument cache for optimizing repeated calls when backtracking through recursive expressions 967 _exprArgCache = {} 968 def resetCache(): 969 ParserElement._exprArgCache.clear() 970 resetCache = staticmethod(resetCache) 971 972 _packratEnabled = False 973 def enablePackrat(): 974 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 975 Repeated parse attempts at the same string location (which happens 976 often in many complex grammars) can immediately return a cached value, 977 instead of re-executing parsing/validating code. Memoizing is done of 978 both valid results and parsing exceptions. 979 980 This speedup may break existing programs that use parse actions that 981 have side-effects. For this reason, packrat parsing is disabled when 982 you first import pyparsing. To activate the packrat feature, your 983 program must call the class method C{ParserElement.enablePackrat()}. If 984 your program uses C{psyco} to "compile as you go", you must call 985 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 986 Python will crash. For best results, call C{enablePackrat()} immediately 987 after importing pyparsing. 988 """ 989 if not ParserElement._packratEnabled: 990 ParserElement._packratEnabled = True 991 ParserElement._parse = ParserElement._parseCache 992 enablePackrat = staticmethod(enablePackrat) 993 994 def parseString( self, instring, parseAll=False ): 995 """Execute the parse expression with the given string. 996 This is the main interface to the client code, once the complete 997 expression has been built. 998 999 If you want the grammar to require that the entire input string be 1000 successfully parsed, then set C{parseAll} to True (equivalent to ending 1001 the grammar with C{StringEnd()}). 1002 1003 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1004 in order to report proper column numbers in parse actions. 1005 If the input string contains tabs and 1006 the grammar uses parse actions that use the C{loc} argument to index into the 1007 string being parsed, you can ensure you have a consistent view of the input 1008 string by: 1009 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1010 (see L{I{parseWithTabs}<parseWithTabs>}) 1011 - define your parse action using the full C{(s,loc,toks)} signature, and 1012 reference the input string using the parse action's C{s} argument 1013 - explictly expand the tabs in your input string before calling 1014 C{parseString} 1015 """ 1016 ParserElement.resetCache() 1017 if not self.streamlined: 1018 self.streamline() 1019 #~ self.saveAsList = True 1020 for e in self.ignoreExprs: 1021 e.streamline() 1022 if not self.keepTabs: 1023 instring = instring.expandtabs() 1024 try: 1025 loc, tokens = self._parse( instring, 0 ) 1026 if parseAll: 1027 loc = self.preParse( instring, loc ) 1028 se = Empty() + StringEnd() 1029 se._parse( instring, loc ) 1030 except ParseBaseException: 1031 if ParserElement.verbose_stacktrace: 1032 raise 1033 else: 1034 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1035 exc = sys.exc_info()[1] 1036 raise exc 1037 else: 1038 return tokens 1039 1040 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): 1041 """Scan the input string for expression matches. Each match will return the 1042 matching tokens, start location, and end location. May be called with optional 1043 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1044 C{overlap} is specified, then overlapping matches will be reported. 1045 1046 Note that the start and end locations are reported relative to the string 1047 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1048 strings with embedded tabs.""" 1049 if not self.streamlined: 1050 self.streamline() 1051 for e in self.ignoreExprs: 1052 e.streamline() 1053 1054 if not self.keepTabs: 1055 instring = _ustr(instring).expandtabs() 1056 instrlen = len(instring) 1057 loc = 0 1058 preparseFn = self.preParse 1059 parseFn = self._parse 1060 ParserElement.resetCache() 1061 matches = 0 1062 try: 1063 while loc <= instrlen and matches < maxMatches: 1064 try: 1065 preloc = preparseFn( instring, loc ) 1066 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1067 except ParseException: 1068 loc = preloc+1 1069 else: 1070 if nextLoc > loc: 1071 matches += 1 1072 yield tokens, preloc, nextLoc 1073 if overlap: 1074 nextloc = preparseFn( instring, loc ) 1075 if nextloc > loc: 1076 loc = nextLoc 1077 else: 1078 loc += 1 1079 else: 1080 loc = nextLoc 1081 else: 1082 loc = preloc+1 1083 except ParseBaseException: 1084 if ParserElement.verbose_stacktrace: 1085 raise 1086 else: 1087 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1088 exc = sys.exc_info()[1] 1089 raise exc 1090 1091 def transformString( self, instring ): 1092 """Extension to C{scanString}, to modify matching text with modified tokens that may 1093 be returned from a parse action. To use C{transformString}, define a grammar and 1094 attach a parse action to it that modifies the returned token list. 1095 Invoking C{transformString()} on a target string will then scan for matches, 1096 and replace the matched text patterns according to the logic in the parse 1097 action. C{transformString()} returns the resulting transformed string.""" 1098 out = [] 1099 lastE = 0 1100 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1101 # keep string locs straight between transformString and scanString 1102 self.keepTabs = True 1103 try: 1104 for t,s,e in self.scanString( instring ): 1105 out.append( instring[lastE:s] ) 1106 if t: 1107 if isinstance(t,ParseResults): 1108 out += t.asList() 1109 elif isinstance(t,list): 1110 out += t 1111 else: 1112 out.append(t) 1113 lastE = e 1114 out.append(instring[lastE:]) 1115 out = [o for o in out if o] 1116 return "".join(map(_ustr,_flatten(out))) 1117 except ParseBaseException: 1118 if ParserElement.verbose_stacktrace: 1119 raise 1120 else: 1121 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1122 exc = sys.exc_info()[1] 1123 raise exc 1124 1125 def searchString( self, instring, maxMatches=_MAX_INT ): 1126 """Another extension to C{scanString}, simplifying the access to the tokens found 1127 to match the given parse expression. May be called with optional 1128 C{maxMatches} argument, to clip searching after 'n' matches are found. 1129 """ 1130 try: 1131 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1132 except ParseBaseException: 1133 if ParserElement.verbose_stacktrace: 1134 raise 1135 else: 1136 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1137 exc = sys.exc_info()[1] 1138 raise exc 1139 1140 def __add__(self, other ): 1141 """Implementation of + operator - returns And""" 1142 if isinstance( other, str ): 1143 other = Literal( other ) 1144 if not isinstance( other, ParserElement ): 1145 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1146 SyntaxWarning, stacklevel=2) 1147 return None 1148 return And( [ self, other ] ) 1149 1150 def __radd__(self, other ): 1151 """Implementation of + operator when left operand is not a C{ParserElement}""" 1152 if isinstance( other, str ): 1153 other = Literal( other ) 1154 if not isinstance( other, ParserElement ): 1155 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1156 SyntaxWarning, stacklevel=2) 1157 return None 1158 return other + self 1159 1160 def __sub__(self, other): 1161 """Implementation of - operator, returns C{And} with error stop""" 1162 if isinstance( other, str ): 1163 other = Literal( other ) 1164 if not isinstance( other, ParserElement ): 1165 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1166 SyntaxWarning, stacklevel=2) 1167 return None 1168 return And( [ self, And._ErrorStop(), other ] ) 1169 1170 def __rsub__(self, other ): 1171 """Implementation of - operator when left operand is not a C{ParserElement}""" 1172 if isinstance( other, str ): 1173 other = Literal( other ) 1174 if not isinstance( other, ParserElement ): 1175 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1176 SyntaxWarning, stacklevel=2) 1177 return None 1178 return other - self 1179 1180 def __mul__(self,other): 1181 """Implementation of * operator, allows use of C{expr * 3} in place of 1182 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1183 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1184 may also include C{None} as in: 1185 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1186 to C{expr*n + ZeroOrMore(expr)} 1187 (read as "at least n instances of C{expr}") 1188 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1189 (read as "0 to n instances of C{expr}") 1190 - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)} 1191 - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)} 1192 1193 Note that C{expr*(None,n)} does not raise an exception if 1194 more than n exprs exist in the input stream; that is, 1195 C{expr*(None,n)} does not enforce a maximum number of expr 1196 occurrences. If this behavior is desired, then write 1197 C{expr*(None,n) + ~expr} 1198 1199 """ 1200 if isinstance(other,int): 1201 minElements, optElements = other,0 1202 elif isinstance(other,tuple): 1203 other = (other + (None, None))[:2] 1204 if other[0] is None: 1205 other = (0, other[1]) 1206 if isinstance(other[0],int) and other[1] is None: 1207 if other[0] == 0: 1208 return ZeroOrMore(self) 1209 if other[0] == 1: 1210 return OneOrMore(self) 1211 else: 1212 return self*other[0] + ZeroOrMore(self) 1213 elif isinstance(other[0],int) and isinstance(other[1],int): 1214 minElements, optElements = other 1215 optElements -= minElements 1216 else: 1217 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1218 else: 1219 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1220 1221 if minElements < 0: 1222 raise ValueError("cannot multiply ParserElement by negative value") 1223 if optElements < 0: 1224 raise ValueError("second tuple value must be greater or equal to first tuple value") 1225 if minElements == optElements == 0: 1226 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1227 1228 if (optElements): 1229 def makeOptionalList(n): 1230 if n>1: 1231 return Optional(self + makeOptionalList(n-1)) 1232 else: 1233 return Optional(self) 1234 if minElements: 1235 if minElements == 1: 1236 ret = self + makeOptionalList(optElements) 1237 else: 1238 ret = And([self]*minElements) + makeOptionalList(optElements) 1239 else: 1240 ret = makeOptionalList(optElements) 1241 else: 1242 if minElements == 1: 1243 ret = self 1244 else: 1245 ret = And([self]*minElements) 1246 return ret 1247 1248 def __rmul__(self, other): 1249 return self.__mul__(other) 1250 1251 def __or__(self, other ): 1252 """Implementation of | operator - returns C{MatchFirst}""" 1253 if isinstance( other, str ): 1254 other = Literal( other ) 1255 if not isinstance( other, ParserElement ): 1256 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1257 SyntaxWarning, stacklevel=2) 1258 return None 1259 return MatchFirst( [ self, other ] ) 1260 1261 def __ror__(self, other ): 1262 """Implementation of | operator when left operand is not a C{ParserElement}""" 1263 if isinstance( other, str ): 1264 other = Literal( other ) 1265 if not isinstance( other, ParserElement ): 1266 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1267 SyntaxWarning, stacklevel=2) 1268 return None 1269 return other | self 1270 1271 def __xor__(self, other ): 1272 """Implementation of ^ operator - returns C{Or}""" 1273 if isinstance( other, str ): 1274 other = Literal( other ) 1275 if not isinstance( other, ParserElement ): 1276 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1277 SyntaxWarning, stacklevel=2) 1278 return None 1279 return Or( [ self, other ] ) 1280 1281 def __rxor__(self, other ): 1282 """Implementation of ^ operator when left operand is not a C{ParserElement}""" 1283 if isinstance( other, str ): 1284 other = Literal( other ) 1285 if not isinstance( other, ParserElement ): 1286 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1287 SyntaxWarning, stacklevel=2) 1288 return None 1289 return other ^ self 1290 1291 def __and__(self, other ): 1292 """Implementation of & operator - returns C{Each}""" 1293 if isinstance( other, str ): 1294 other = Literal( other ) 1295 if not isinstance( other, ParserElement ): 1296 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1297 SyntaxWarning, stacklevel=2) 1298 return None 1299 return Each( [ self, other ] ) 1300 1301 def __rand__(self, other ): 1302 """Implementation of & operator when left operand is not a C{ParserElement}""" 1303 if isinstance( other, str ): 1304 other = Literal( other ) 1305 if not isinstance( other, ParserElement ): 1306 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1307 SyntaxWarning, stacklevel=2) 1308 return None 1309 return other & self 1310 1311 def __invert__( self ): 1312 """Implementation of ~ operator - returns C{NotAny}""" 1313 return NotAny( self ) 1314 1315 def __call__(self, name): 1316 """Shortcut for C{setResultsName}, with C{listAllMatches=default}:: 1317 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1318 could be written as:: 1319 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1320 1321 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1322 passed as C{True}. 1323 """ 1324 return self.setResultsName(name) 1325 1326 def suppress( self ): 1327 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1328 cluttering up returned output. 1329 """ 1330 return Suppress( self ) 1331 1332 def leaveWhitespace( self ): 1333 """Disables the skipping of whitespace before matching the characters in the 1334 C{ParserElement}'s defined pattern. This is normally only used internally by 1335 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1336 """ 1337 self.skipWhitespace = False 1338 return self 1339 1340 def setWhitespaceChars( self, chars ): 1341 """Overrides the default whitespace chars 1342 """ 1343 self.skipWhitespace = True 1344 self.whiteChars = chars 1345 self.copyDefaultWhiteChars = False 1346 return self 1347 1348 def parseWithTabs( self ): 1349 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 1350 Must be called before C{parseString} when the input grammar contains elements that 1351 match C{<TAB>} characters.""" 1352 self.keepTabs = True 1353 return self 1354 1355 def ignore( self, other ): 1356 """Define expression to be ignored (e.g., comments) while doing pattern 1357 matching; may be called repeatedly, to define multiple comment or other 1358 ignorable patterns. 1359 """ 1360 if isinstance( other, Suppress ): 1361 if other not in self.ignoreExprs: 1362 self.ignoreExprs.append( other.copy() ) 1363 else: 1364 self.ignoreExprs.append( Suppress( other.copy() ) ) 1365 return self 1366 1367 def setDebugActions( self, startAction, successAction, exceptionAction ): 1368 """Enable display of debugging messages while doing pattern matching.""" 1369 self.debugActions = (startAction or _defaultStartDebugAction, 1370 successAction or _defaultSuccessDebugAction, 1371 exceptionAction or _defaultExceptionDebugAction) 1372 self.debug = True 1373 return self 1374 1375 def setDebug( self, flag=True ): 1376 """Enable display of debugging messages while doing pattern matching. 1377 Set C{flag} to True to enable, False to disable.""" 1378 if flag: 1379 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1380 else: 1381 self.debug = False 1382 return self 1383 1384 def __str__( self ): 1385 return self.name 1386 1387 def __repr__( self ): 1388 return _ustr(self) 1389 1390 def streamline( self ): 1391 self.streamlined = True 1392 self.strRepr = None 1393 return self 1394 1395 def checkRecursion( self, parseElementList ): 1396 pass 1397 1398 def validate( self, validateTrace=[] ): 1399 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1400 self.checkRecursion( [] ) 1401 1402 def parseFile( self, file_or_filename, parseAll=False ): 1403 """Execute the parse expression on the given file or filename. 1404 If a filename is specified (instead of a file object), 1405 the entire file is opened, read, and closed before parsing. 1406 """ 1407 try: 1408 file_contents = file_or_filename.read() 1409 except AttributeError: 1410 f = open(file_or_filename, "rb") 1411 file_contents = f.read() 1412 f.close() 1413 try: 1414 return self.parseString(file_contents, parseAll) 1415 except ParseBaseException: 1416 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1417 exc = sys.exc_info()[1] 1418 raise exc 1419 1420 def getException(self): 1421 return ParseException("",0,self.errmsg,self) 1422 1423 def __getattr__(self,aname): 1424 if aname == "myException": 1425 self.myException = ret = self.getException(); 1426 return ret; 1427 else: 1428 raise AttributeError("no such attribute " + aname) 1429 1430 def __eq__(self,other): 1431 if isinstance(other, ParserElement): 1432 return self is other or self.__dict__ == other.__dict__ 1433 elif isinstance(other, str): 1434 try: 1435 self.parseString(_ustr(other), parseAll=True) 1436 return True 1437 except ParseBaseException: 1438 return False 1439 else: 1440 return super(ParserElement,self)==other 1441 1442 def __ne__(self,other): 1443 return not (self == other) 1444 1445 def __hash__(self): 1446 return hash(id(self)) 1447 1448 def __req__(self,other): 1449 return self == other 1450 1451 def __rne__(self,other): 1452 return not (self == other) 1453 1454 1455class Token(ParserElement): 1456 """Abstract C{ParserElement} subclass, for defining atomic matching patterns.""" 1457 def __init__( self ): 1458 super(Token,self).__init__( savelist=False ) 1459 1460 def setName(self, name): 1461 s = super(Token,self).setName(name) 1462 self.errmsg = "Expected " + self.name 1463 return s 1464 1465 1466class Empty(Token): 1467 """An empty token, will always match.""" 1468 def __init__( self ): 1469 super(Empty,self).__init__() 1470 self.name = "Empty" 1471 self.mayReturnEmpty = True 1472 self.mayIndexError = False 1473 1474 1475class NoMatch(Token): 1476 """A token that will never match.""" 1477 def __init__( self ): 1478 super(NoMatch,self).__init__() 1479 self.name = "NoMatch" 1480 self.mayReturnEmpty = True 1481 self.mayIndexError = False 1482 self.errmsg = "Unmatchable token" 1483 1484 def parseImpl( self, instring, loc, doActions=True ): 1485 exc = self.myException 1486 exc.loc = loc 1487 exc.pstr = instring 1488 raise exc 1489 1490 1491class Literal(Token): 1492 """Token to exactly match a specified string.""" 1493 def __init__( self, matchString ): 1494 super(Literal,self).__init__() 1495 self.match = matchString 1496 self.matchLen = len(matchString) 1497 try: 1498 self.firstMatchChar = matchString[0] 1499 except IndexError: 1500 warnings.warn("null string passed to Literal; use Empty() instead", 1501 SyntaxWarning, stacklevel=2) 1502 self.__class__ = Empty 1503 self.name = '"%s"' % _ustr(self.match) 1504 self.errmsg = "Expected " + self.name 1505 self.mayReturnEmpty = False 1506 self.mayIndexError = False 1507 1508 # Performance tuning: this routine gets called a *lot* 1509 # if this is a single character match string and the first character matches, 1510 # short-circuit as quickly as possible, and avoid calling startswith 1511 #~ @profile 1512 def parseImpl( self, instring, loc, doActions=True ): 1513 if (instring[loc] == self.firstMatchChar and 1514 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1515 return loc+self.matchLen, self.match 1516 #~ raise ParseException( instring, loc, self.errmsg ) 1517 exc = self.myException 1518 exc.loc = loc 1519 exc.pstr = instring 1520 raise exc 1521_L = Literal 1522 1523class Keyword(Token): 1524 """Token to exactly match a specified string as a keyword, that is, it must be 1525 immediately followed by a non-keyword character. Compare with C{Literal}:: 1526 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1527 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1528 Accepts two optional constructor arguments in addition to the keyword string: 1529 C{identChars} is a string of characters that would be valid identifier characters, 1530 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1531 matching, default is C{False}. 1532 """ 1533 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1534 1535 def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): 1536 super(Keyword,self).__init__() 1537 self.match = matchString 1538 self.matchLen = len(matchString) 1539 try: 1540 self.firstMatchChar = matchString[0] 1541 except IndexError: 1542 warnings.warn("null string passed to Keyword; use Empty() instead", 1543 SyntaxWarning, stacklevel=2) 1544 self.name = '"%s"' % self.match 1545 self.errmsg = "Expected " + self.name 1546 self.mayReturnEmpty = False 1547 self.mayIndexError = False 1548 self.caseless = caseless 1549 if caseless: 1550 self.caselessmatch = matchString.upper() 1551 identChars = identChars.upper() 1552 self.identChars = set(identChars) 1553 1554 def parseImpl( self, instring, loc, doActions=True ): 1555 if self.caseless: 1556 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1557 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1558 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1559 return loc+self.matchLen, self.match 1560 else: 1561 if (instring[loc] == self.firstMatchChar and 1562 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1563 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1564 (loc == 0 or instring[loc-1] not in self.identChars) ): 1565 return loc+self.matchLen, self.match 1566 #~ raise ParseException( instring, loc, self.errmsg ) 1567 exc = self.myException 1568 exc.loc = loc 1569 exc.pstr = instring 1570 raise exc 1571 1572 def copy(self): 1573 c = super(Keyword,self).copy() 1574 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1575 return c 1576 1577 def setDefaultKeywordChars( chars ): 1578 """Overrides the default Keyword chars 1579 """ 1580 Keyword.DEFAULT_KEYWORD_CHARS = chars 1581 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) 1582 1583class CaselessLiteral(Literal): 1584 """Token to match a specified string, ignoring case of letters. 1585 Note: the matched results will always be in the case of the given 1586 match string, NOT the case of the input text. 1587 """ 1588 def __init__( self, matchString ): 1589 super(CaselessLiteral,self).__init__( matchString.upper() ) 1590 # Preserve the defining literal. 1591 self.returnString = matchString 1592 self.name = "'%s'" % self.returnString 1593 self.errmsg = "Expected " + self.name 1594 1595 def parseImpl( self, instring, loc, doActions=True ): 1596 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1597 return loc+self.matchLen, self.returnString 1598 #~ raise ParseException( instring, loc, self.errmsg ) 1599 exc = self.myException 1600 exc.loc = loc 1601 exc.pstr = instring 1602 raise exc 1603 1604class CaselessKeyword(Keyword): 1605 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): 1606 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) 1607 1608 def parseImpl( self, instring, loc, doActions=True ): 1609 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1610 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1611 return loc+self.matchLen, self.match 1612 #~ raise ParseException( instring, loc, self.errmsg ) 1613 exc = self.myException 1614 exc.loc = loc 1615 exc.pstr = instring 1616 raise exc 1617 1618class Word(Token): 1619 """Token for matching words composed of allowed character sets. 1620 Defined with string containing all allowed initial characters, 1621 an optional string containing allowed body characters (if omitted, 1622 defaults to the initial character set), and an optional minimum, 1623 maximum, and/or exact length. The default value for C{min} is 1 (a 1624 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1625 are 0, meaning no maximum or exact length restriction. An optional 1626 C{exclude} parameter can list characters that might be found in 1627 the input C{bodyChars} string; useful to define a word of all printables 1628 except for one or two characters, for instance. 1629 """ 1630 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): 1631 super(Word,self).__init__() 1632 if excludeChars: 1633 initChars = ''.join([c for c in initChars if c not in excludeChars]) 1634 if bodyChars: 1635 bodyChars = ''.join([c for c in bodyChars if c not in excludeChars]) 1636 self.initCharsOrig = initChars 1637 self.initChars = set(initChars) 1638 if bodyChars : 1639 self.bodyCharsOrig = bodyChars 1640 self.bodyChars = set(bodyChars) 1641 else: 1642 self.bodyCharsOrig = initChars 1643 self.bodyChars = set(initChars) 1644 1645 self.maxSpecified = max > 0 1646 1647 if min < 1: 1648 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1649 1650 self.minLen = min 1651 1652 if max > 0: 1653 self.maxLen = max 1654 else: 1655 self.maxLen = _MAX_INT 1656 1657 if exact > 0: 1658 self.maxLen = exact 1659 self.minLen = exact 1660 1661 self.name = _ustr(self) 1662 self.errmsg = "Expected " + self.name 1663 self.mayIndexError = False 1664 self.asKeyword = asKeyword 1665 1666 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1667 if self.bodyCharsOrig == self.initCharsOrig: 1668 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1669 elif len(self.bodyCharsOrig) == 1: 1670 self.reString = "%s[%s]*" % \ 1671 (re.escape(self.initCharsOrig), 1672 _escapeRegexRangeChars(self.bodyCharsOrig),) 1673 else: 1674 self.reString = "[%s][%s]*" % \ 1675 (_escapeRegexRangeChars(self.initCharsOrig), 1676 _escapeRegexRangeChars(self.bodyCharsOrig),) 1677 if self.asKeyword: 1678 self.reString = r"\b"+self.reString+r"\b" 1679 try: 1680 self.re = re.compile( self.reString ) 1681 except: 1682 self.re = None 1683 1684 def parseImpl( self, instring, loc, doActions=True ): 1685 if self.re: 1686 result = self.re.match(instring,loc) 1687 if not result: 1688 exc = self.myException 1689 exc.loc = loc 1690 exc.pstr = instring 1691 raise exc 1692 1693 loc = result.end() 1694 return loc, result.group() 1695 1696 if not(instring[ loc ] in self.initChars): 1697 #~ raise ParseException( instring, loc, self.errmsg ) 1698 exc = self.myException 1699 exc.loc = loc 1700 exc.pstr = instring 1701 raise exc 1702 start = loc 1703 loc += 1 1704 instrlen = len(instring) 1705 bodychars = self.bodyChars 1706 maxloc = start + self.maxLen 1707 maxloc = min( maxloc, instrlen ) 1708 while loc < maxloc and instring[loc] in bodychars: 1709 loc += 1 1710 1711 throwException = False 1712 if loc - start < self.minLen: 1713 throwException = True 1714 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1715 throwException = True 1716 if self.asKeyword: 1717 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1718 throwException = True 1719 1720 if throwException: 1721 #~ raise ParseException( instring, loc, self.errmsg ) 1722 exc = self.myException 1723 exc.loc = loc 1724 exc.pstr = instring 1725 raise exc 1726 1727 return loc, instring[start:loc] 1728 1729 def __str__( self ): 1730 try: 1731 return super(Word,self).__str__() 1732 except: 1733 pass 1734 1735 1736 if self.strRepr is None: 1737 1738 def charsAsStr(s): 1739 if len(s)>4: 1740 return s[:4]+"..." 1741 else: 1742 return s 1743 1744 if ( self.initCharsOrig != self.bodyCharsOrig ): 1745 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1746 else: 1747 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1748 1749 return self.strRepr 1750 1751 1752class Regex(Token): 1753 """Token for matching strings that match a given regular expression. 1754 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1755 """ 1756 compiledREtype = type(re.compile("[A-Z]")) 1757 def __init__( self, pattern, flags=0): 1758 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1759 super(Regex,self).__init__() 1760 1761 if isinstance(pattern, str): 1762 if len(pattern) == 0: 1763 warnings.warn("null string passed to Regex; use Empty() instead", 1764 SyntaxWarning, stacklevel=2) 1765 1766 self.pattern = pattern 1767 self.flags = flags 1768 1769 try: 1770 self.re = re.compile(self.pattern, self.flags) 1771 self.reString = self.pattern 1772 except sre_constants.error: 1773 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1774 SyntaxWarning, stacklevel=2) 1775 raise 1776 1777 elif isinstance(pattern, Regex.compiledREtype): 1778 self.re = pattern 1779 self.pattern = \ 1780 self.reString = str(pattern) 1781 self.flags = flags 1782 1783 else: 1784 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1785 1786 self.name = _ustr(self) 1787 self.errmsg = "Expected " + self.name 1788 self.mayIndexError = False 1789 self.mayReturnEmpty = True 1790 1791 def parseImpl( self, instring, loc, doActions=True ): 1792 result = self.re.match(instring,loc) 1793 if not result: 1794 exc = self.myException 1795 exc.loc = loc 1796 exc.pstr = instring 1797 raise exc 1798 1799 loc = result.end() 1800 d = result.groupdict() 1801 ret = ParseResults(result.group()) 1802 if d: 1803 for k in d: 1804 ret[k] = d[k] 1805 return loc,ret 1806 1807 def __str__( self ): 1808 try: 1809 return super(Regex,self).__str__() 1810 except: 1811 pass 1812 1813 if self.strRepr is None: 1814 self.strRepr = "Re:(%s)" % repr(self.pattern) 1815 1816 return self.strRepr 1817 1818 1819class QuotedString(Token): 1820 """Token for matching strings that are delimited by quoting characters. 1821 """ 1822 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): 1823 """ 1824 Defined with the following parameters: 1825 - quoteChar - string of one or more characters defining the quote delimiting string 1826 - escChar - character to escape quotes, typically backslash (default=None) 1827 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1828 - multiline - boolean indicating whether quotes can span multiple lines (default=False) 1829 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) 1830 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) 1831 """ 1832 super(QuotedString,self).__init__() 1833 1834 # remove white space from quote chars - wont work anyway 1835 quoteChar = quoteChar.strip() 1836 if len(quoteChar) == 0: 1837 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1838 raise SyntaxError() 1839 1840 if endQuoteChar is None: 1841 endQuoteChar = quoteChar 1842 else: 1843 endQuoteChar = endQuoteChar.strip() 1844 if len(endQuoteChar) == 0: 1845 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1846 raise SyntaxError() 1847 1848 self.quoteChar = quoteChar 1849 self.quoteCharLen = len(quoteChar) 1850 self.firstQuoteChar = quoteChar[0] 1851 self.endQuoteChar = endQuoteChar 1852 self.endQuoteCharLen = len(endQuoteChar) 1853 self.escChar = escChar 1854 self.escQuote = escQuote 1855 self.unquoteResults = unquoteResults 1856 1857 if multiline: 1858 self.flags = re.MULTILINE | re.DOTALL 1859 self.pattern = r'%s(?:[^%s%s]' % \ 1860 ( re.escape(self.quoteChar), 1861 _escapeRegexRangeChars(self.endQuoteChar[0]), 1862 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1863 else: 1864 self.flags = 0 1865 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1866 ( re.escape(self.quoteChar), 1867 _escapeRegexRangeChars(self.endQuoteChar[0]), 1868 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1869 if len(self.endQuoteChar) > 1: 1870 self.pattern += ( 1871 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1872 _escapeRegexRangeChars(self.endQuoteChar[i])) 1873 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 1874 ) 1875 if escQuote: 1876 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1877 if escChar: 1878 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1879 charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-') 1880 self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset) 1881 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1882 1883 try: 1884 self.re = re.compile(self.pattern, self.flags) 1885 self.reString = self.pattern 1886 except sre_constants.error: 1887 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1888 SyntaxWarning, stacklevel=2) 1889 raise 1890 1891 self.name = _ustr(self) 1892 self.errmsg = "Expected " + self.name 1893 self.mayIndexError = False 1894 self.mayReturnEmpty = True 1895 1896 def parseImpl( self, instring, loc, doActions=True ): 1897 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1898 if not result: 1899 exc = self.myException 1900 exc.loc = loc 1901 exc.pstr = instring 1902 raise exc 1903 1904 loc = result.end() 1905 ret = result.group() 1906 1907 if self.unquoteResults: 1908 1909 # strip off quotes 1910 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1911 1912 if isinstance(ret,str): 1913 # replace escaped characters 1914 if self.escChar: 1915 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1916 1917 # replace escaped quotes 1918 if self.escQuote: 1919 ret = ret.replace(self.escQuote, self.endQuoteChar) 1920 1921 return loc, ret 1922 1923 def __str__( self ): 1924 try: 1925 return super(QuotedString,self).__str__() 1926 except: 1927 pass 1928 1929 if self.strRepr is None: 1930 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 1931 1932 return self.strRepr 1933 1934 1935class CharsNotIn(Token): 1936 """Token for matching words composed of characters *not* in a given set. 1937 Defined with string containing all disallowed characters, and an optional 1938 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 1939 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1940 are 0, meaning no maximum or exact length restriction. 1941 """ 1942 def __init__( self, notChars, min=1, max=0, exact=0 ): 1943 super(CharsNotIn,self).__init__() 1944 self.skipWhitespace = False 1945 self.notChars = notChars 1946 1947 if min < 1: 1948 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 1949 1950 self.minLen = min 1951 1952 if max > 0: 1953 self.maxLen = max 1954 else: 1955 self.maxLen = _MAX_INT 1956 1957 if exact > 0: 1958 self.maxLen = exact 1959 self.minLen = exact 1960 1961 self.name = _ustr(self) 1962 self.errmsg = "Expected " + self.name 1963 self.mayReturnEmpty = ( self.minLen == 0 ) 1964 self.mayIndexError = False 1965 1966 def parseImpl( self, instring, loc, doActions=True ): 1967 if instring[loc] in self.notChars: 1968 #~ raise ParseException( instring, loc, self.errmsg ) 1969 exc = self.myException 1970 exc.loc = loc 1971 exc.pstr = instring 1972 raise exc 1973 1974 start = loc 1975 loc += 1 1976 notchars = self.notChars 1977 maxlen = min( start+self.maxLen, len(instring) ) 1978 while loc < maxlen and \ 1979 (instring[loc] not in notchars): 1980 loc += 1 1981 1982 if loc - start < self.minLen: 1983 #~ raise ParseException( instring, loc, self.errmsg ) 1984 exc = self.myException 1985 exc.loc = loc 1986 exc.pstr = instring 1987 raise exc 1988 1989 return loc, instring[start:loc] 1990 1991 def __str__( self ): 1992 try: 1993 return super(CharsNotIn, self).__str__() 1994 except: 1995 pass 1996 1997 if self.strRepr is None: 1998 if len(self.notChars) > 4: 1999 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2000 else: 2001 self.strRepr = "!W:(%s)" % self.notChars 2002 2003 return self.strRepr 2004 2005class White(Token): 2006 """Special matching class for matching whitespace. Normally, whitespace is ignored 2007 by pyparsing grammars. This class is included when some whitespace structures 2008 are significant. Define with a string containing the whitespace characters to be 2009 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2010 as defined for the C{Word} class.""" 2011 whiteStrs = { 2012 " " : "<SPC>", 2013 "\t": "<TAB>", 2014 "\n": "<LF>", 2015 "\r": "<CR>", 2016 "\f": "<FF>", 2017 } 2018 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 2019 super(White,self).__init__() 2020 self.matchWhite = ws 2021 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 2022 #~ self.leaveWhitespace() 2023 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 2024 self.mayReturnEmpty = True 2025 self.errmsg = "Expected " + self.name 2026 2027 self.minLen = min 2028 2029 if max > 0: 2030 self.maxLen = max 2031 else: 2032 self.maxLen = _MAX_INT 2033 2034 if exact > 0: 2035 self.maxLen = exact 2036 self.minLen = exact 2037 2038 def parseImpl( self, instring, loc, doActions=True ): 2039 if not(instring[ loc ] in self.matchWhite): 2040 #~ raise ParseException( instring, loc, self.errmsg ) 2041 exc = self.myException 2042 exc.loc = loc 2043 exc.pstr = instring 2044 raise exc 2045 start = loc 2046 loc += 1 2047 maxloc = start + self.maxLen 2048 maxloc = min( maxloc, len(instring) ) 2049 while loc < maxloc and instring[loc] in self.matchWhite: 2050 loc += 1 2051 2052 if loc - start < self.minLen: 2053 #~ raise ParseException( instring, loc, self.errmsg ) 2054 exc = self.myException 2055 exc.loc = loc 2056 exc.pstr = instring 2057 raise exc 2058 2059 return loc, instring[start:loc] 2060 2061 2062class _PositionToken(Token): 2063 def __init__( self ): 2064 super(_PositionToken,self).__init__() 2065 self.name=self.__class__.__name__ 2066 self.mayReturnEmpty = True 2067 self.mayIndexError = False 2068 2069class GoToColumn(_PositionToken): 2070 """Token to advance to a specific column of input text; useful for tabular report scraping.""" 2071 def __init__( self, colno ): 2072 super(GoToColumn,self).__init__() 2073 self.col = colno 2074 2075 def preParse( self, instring, loc ): 2076 if col(loc,instring) != self.col: 2077 instrlen = len(instring) 2078 if self.ignoreExprs: 2079 loc = self._skipIgnorables( instring, loc ) 2080 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2081 loc += 1 2082 return loc 2083 2084 def parseImpl( self, instring, loc, doActions=True ): 2085 thiscol = col( loc, instring ) 2086 if thiscol > self.col: 2087 raise ParseException( instring, loc, "Text not in expected column", self ) 2088 newloc = loc + self.col - thiscol 2089 ret = instring[ loc: newloc ] 2090 return newloc, ret 2091 2092class LineStart(_PositionToken): 2093 """Matches if current position is at the beginning of a line within the parse string""" 2094 def __init__( self ): 2095 super(LineStart,self).__init__() 2096 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2097 self.errmsg = "Expected start of line" 2098 2099 def preParse( self, instring, loc ): 2100 preloc = super(LineStart,self).preParse(instring,loc) 2101 if instring[preloc] == "\n": 2102 loc += 1 2103 return loc 2104 2105 def parseImpl( self, instring, loc, doActions=True ): 2106 if not( loc==0 or 2107 (loc == self.preParse( instring, 0 )) or 2108 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2109 #~ raise ParseException( instring, loc, "Expected start of line" ) 2110 exc = self.myException 2111 exc.loc = loc 2112 exc.pstr = instring 2113 raise exc 2114 return loc, [] 2115 2116class LineEnd(_PositionToken): 2117 """Matches if current position is at the end of a line within the parse string""" 2118 def __init__( self ): 2119 super(LineEnd,self).__init__() 2120 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2121 self.errmsg = "Expected end of line" 2122 2123 def parseImpl( self, instring, loc, doActions=True ): 2124 if loc<len(instring): 2125 if instring[loc] == "\n": 2126 return loc+1, "\n" 2127 else: 2128 #~ raise ParseException( instring, loc, "Expected end of line" ) 2129 exc = self.myException 2130 exc.loc = loc 2131 exc.pstr = instring 2132 raise exc 2133 elif loc == len(instring): 2134 return loc+1, [] 2135 else: 2136 exc = self.myException 2137 exc.loc = loc 2138 exc.pstr = instring 2139 raise exc 2140 2141class StringStart(_PositionToken): 2142 """Matches if current position is at the beginning of the parse string""" 2143 def __init__( self ): 2144 super(StringStart,self).__init__() 2145 self.errmsg = "Expected start of text" 2146 2147 def parseImpl( self, instring, loc, doActions=True ): 2148 if loc != 0: 2149 # see if entire string up to here is just whitespace and ignoreables 2150 if loc != self.preParse( instring, 0 ): 2151 #~ raise ParseException( instring, loc, "Expected start of text" ) 2152 exc = self.myException 2153 exc.loc = loc 2154 exc.pstr = instring 2155 raise exc 2156 return loc, [] 2157 2158class StringEnd(_PositionToken): 2159 """Matches if current position is at the end of the parse string""" 2160 def __init__( self ): 2161 super(StringEnd,self).__init__() 2162 self.errmsg = "Expected end of text" 2163 2164 def parseImpl( self, instring, loc, doActions=True ): 2165 if loc < len(instring): 2166 #~ raise ParseException( instring, loc, "Expected end of text" ) 2167 exc = self.myException 2168 exc.loc = loc 2169 exc.pstr = instring 2170 raise exc 2171 elif loc == len(instring): 2172 return loc+1, [] 2173 elif loc > len(instring): 2174 return loc, [] 2175 else: 2176 exc = self.myException 2177 exc.loc = loc 2178 exc.pstr = instring 2179 raise exc 2180 2181class WordStart(_PositionToken): 2182 """Matches if the current position is at the beginning of a Word, and 2183 is not preceded by any character in a given set of C{wordChars} 2184 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2185 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2186 the string being parsed, or at the beginning of a line. 2187 """ 2188 def __init__(self, wordChars = printables): 2189 super(WordStart,self).__init__() 2190 self.wordChars = set(wordChars) 2191 self.errmsg = "Not at the start of a word" 2192 2193 def parseImpl(self, instring, loc, doActions=True ): 2194 if loc != 0: 2195 if (instring[loc-1] in self.wordChars or 2196 instring[loc] not in self.wordChars): 2197 exc = self.myException 2198 exc.loc = loc 2199 exc.pstr = instring 2200 raise exc 2201 return loc, [] 2202 2203class WordEnd(_PositionToken): 2204 """Matches if the current position is at the end of a Word, and 2205 is not followed by any character in a given set of C{wordChars} 2206 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2207 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2208 the string being parsed, or at the end of a line. 2209 """ 2210 def __init__(self, wordChars = printables): 2211 super(WordEnd,self).__init__() 2212 self.wordChars = set(wordChars) 2213 self.skipWhitespace = False 2214 self.errmsg = "Not at the end of a word" 2215 2216 def parseImpl(self, instring, loc, doActions=True ): 2217 instrlen = len(instring) 2218 if instrlen>0 and loc<instrlen: 2219 if (instring[loc] in self.wordChars or 2220 instring[loc-1] not in self.wordChars): 2221 #~ raise ParseException( instring, loc, "Expected end of word" ) 2222 exc = self.myException 2223 exc.loc = loc 2224 exc.pstr = instring 2225 raise exc 2226 return loc, [] 2227 2228 2229class ParseExpression(ParserElement): 2230 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" 2231 def __init__( self, exprs, savelist = False ): 2232 super(ParseExpression,self).__init__(savelist) 2233 if isinstance( exprs, list ): 2234 self.exprs = exprs 2235 elif isinstance( exprs, str ): 2236 self.exprs = [ Literal( exprs ) ] 2237 else: 2238 try: 2239 self.exprs = list( exprs ) 2240 except TypeError: 2241 self.exprs = [ exprs ] 2242 self.callPreparse = False 2243 2244 def __getitem__( self, i ): 2245 return self.exprs[i] 2246 2247 def append( self, other ): 2248 self.exprs.append( other ) 2249 self.strRepr = None 2250 return self 2251 2252 def leaveWhitespace( self ): 2253 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 2254 all contained expressions.""" 2255 self.skipWhitespace = False 2256 self.exprs = [ e.copy() for e in self.exprs ] 2257 for e in self.exprs: 2258 e.leaveWhitespace() 2259 return self 2260 2261 def ignore( self, other ): 2262 if isinstance( other, Suppress ): 2263 if other not in self.ignoreExprs: 2264 super( ParseExpression, self).ignore( other ) 2265 for e in self.exprs: 2266 e.ignore( self.ignoreExprs[-1] ) 2267 else: 2268 super( ParseExpression, self).ignore( other ) 2269 for e in self.exprs: 2270 e.ignore( self.ignoreExprs[-1] ) 2271 return self 2272 2273 def __str__( self ): 2274 try: 2275 return super(ParseExpression,self).__str__() 2276 except: 2277 pass 2278 2279 if self.strRepr is None: 2280 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2281 return self.strRepr 2282 2283 def streamline( self ): 2284 super(ParseExpression,self).streamline() 2285 2286 for e in self.exprs: 2287 e.streamline() 2288 2289 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2290 # but only if there are no parse actions or resultsNames on the nested And's 2291 # (likewise for Or's and MatchFirst's) 2292 if ( len(self.exprs) == 2 ): 2293 other = self.exprs[0] 2294 if ( isinstance( other, self.__class__ ) and 2295 not(other.parseAction) and 2296 other.resultsName is None and 2297 not other.debug ): 2298 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2299 self.strRepr = None 2300 self.mayReturnEmpty |= other.mayReturnEmpty 2301 self.mayIndexError |= other.mayIndexError 2302 2303 other = self.exprs[-1] 2304 if ( isinstance( other, self.__class__ ) and 2305 not(other.parseAction) and 2306 other.resultsName is None and 2307 not other.debug ): 2308 self.exprs = self.exprs[:-1] + other.exprs[:] 2309 self.strRepr = None 2310 self.mayReturnEmpty |= other.mayReturnEmpty 2311 self.mayIndexError |= other.mayIndexError 2312 2313 return self 2314 2315 def setResultsName( self, name, listAllMatches=False ): 2316 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2317 return ret 2318 2319 def validate( self, validateTrace=[] ): 2320 tmp = validateTrace[:]+[self] 2321 for e in self.exprs: 2322 e.validate(tmp) 2323 self.checkRecursion( [] ) 2324 2325 def copy(self): 2326 ret = super(ParseExpression,self).copy() 2327 ret.exprs = [e.copy() for e in self.exprs] 2328 return ret 2329 2330class And(ParseExpression): 2331 """Requires all given C{ParseExpression}s to be found in the given order. 2332 Expressions may be separated by whitespace. 2333 May be constructed using the C{'+'} operator. 2334 """ 2335 2336 class _ErrorStop(Empty): 2337 def __init__(self, *args, **kwargs): 2338 super(Empty,self).__init__(*args, **kwargs) 2339 self.leaveWhitespace() 2340 2341 def __init__( self, exprs, savelist = True ): 2342 super(And,self).__init__(exprs, savelist) 2343 self.mayReturnEmpty = True 2344 for e in self.exprs: 2345 if not e.mayReturnEmpty: 2346 self.mayReturnEmpty = False 2347 break 2348 self.setWhitespaceChars( exprs[0].whiteChars ) 2349 self.skipWhitespace = exprs[0].skipWhitespace 2350 self.callPreparse = True 2351 2352 def parseImpl( self, instring, loc, doActions=True ): 2353 # pass False as last arg to _parse for first element, since we already 2354 # pre-parsed the string as part of our And pre-parsing 2355 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2356 errorStop = False 2357 for e in self.exprs[1:]: 2358 if isinstance(e, And._ErrorStop): 2359 errorStop = True 2360 continue 2361 if errorStop: 2362 try: 2363 loc, exprtokens = e._parse( instring, loc, doActions ) 2364 except ParseSyntaxException: 2365 raise 2366 except ParseBaseException: 2367 pe = sys.exc_info()[1] 2368 raise ParseSyntaxException(pe) 2369 except IndexError: 2370 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2371 else: 2372 loc, exprtokens = e._parse( instring, loc, doActions ) 2373 if exprtokens or list(exprtokens.keys()): 2374 resultlist += exprtokens 2375 return loc, resultlist 2376 2377 def __iadd__(self, other ): 2378 if isinstance( other, str ): 2379 other = Literal( other ) 2380 return self.append( other ) #And( [ self, other ] ) 2381 2382 def checkRecursion( self, parseElementList ): 2383 subRecCheckList = parseElementList[:] + [ self ] 2384 for e in self.exprs: 2385 e.checkRecursion( subRecCheckList ) 2386 if not e.mayReturnEmpty: 2387 break 2388 2389 def __str__( self ): 2390 if hasattr(self,"name"): 2391 return self.name 2392 2393 if self.strRepr is None: 2394 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2395 2396 return self.strRepr 2397 2398 2399class Or(ParseExpression): 2400 """Requires that at least one C{ParseExpression} is found. 2401 If two expressions match, the expression that matches the longest string will be used. 2402 May be constructed using the C{'^'} operator. 2403 """ 2404 def __init__( self, exprs, savelist = False ): 2405 super(Or,self).__init__(exprs, savelist) 2406 self.mayReturnEmpty = False 2407 for e in self.exprs: 2408 if e.mayReturnEmpty: 2409 self.mayReturnEmpty = True 2410 break 2411 2412 def parseImpl( self, instring, loc, doActions=True ): 2413 maxExcLoc = -1 2414 maxMatchLoc = -1 2415 maxException = None 2416 for e in self.exprs: 2417 try: 2418 loc2 = e.tryParse( instring, loc ) 2419 except ParseException: 2420 err = sys.exc_info()[1] 2421 if err.loc > maxExcLoc: 2422 maxException = err 2423 maxExcLoc = err.loc 2424 except IndexError: 2425 if len(instring) > maxExcLoc: 2426 maxException = ParseException(instring,len(instring),e.errmsg,self) 2427 maxExcLoc = len(instring) 2428 else: 2429 if loc2 > maxMatchLoc: 2430 maxMatchLoc = loc2 2431 maxMatchExp = e 2432 2433 if maxMatchLoc < 0: 2434 if maxException is not None: 2435 raise maxException 2436 else: 2437 raise ParseException(instring, loc, "no defined alternatives to match", self) 2438 2439 return maxMatchExp._parse( instring, loc, doActions ) 2440 2441 def __ixor__(self, other ): 2442 if isinstance( other, str ): 2443 other = Literal( other ) 2444 return self.append( other ) #Or( [ self, other ] ) 2445 2446 def __str__( self ): 2447 if hasattr(self,"name"): 2448 return self.name 2449 2450 if self.strRepr is None: 2451 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2452 2453 return self.strRepr 2454 2455 def checkRecursion( self, parseElementList ): 2456 subRecCheckList = parseElementList[:] + [ self ] 2457 for e in self.exprs: 2458 e.checkRecursion( subRecCheckList ) 2459 2460 2461class MatchFirst(ParseExpression): 2462 """Requires that at least one C{ParseExpression} is found. 2463 If two expressions match, the first one listed is the one that will match. 2464 May be constructed using the C{'|'} operator. 2465 """ 2466 def __init__( self, exprs, savelist = False ): 2467 super(MatchFirst,self).__init__(exprs, savelist) 2468 if exprs: 2469 self.mayReturnEmpty = False 2470 for e in self.exprs: 2471 if e.mayReturnEmpty: 2472 self.mayReturnEmpty = True 2473 break 2474 else: 2475 self.mayReturnEmpty = True 2476 2477 def parseImpl( self, instring, loc, doActions=True ): 2478 maxExcLoc = -1 2479 maxException = None 2480 for e in self.exprs: 2481 try: 2482 ret = e._parse( instring, loc, doActions ) 2483 return ret 2484 except ParseException as err: 2485 if err.loc > maxExcLoc: 2486 maxException = err 2487 maxExcLoc = err.loc 2488 except IndexError: 2489 if len(instring) > maxExcLoc: 2490 maxException = ParseException(instring,len(instring),e.errmsg,self) 2491 maxExcLoc = len(instring) 2492 2493 # only got here if no expression matched, raise exception for match that made it the furthest 2494 else: 2495 if maxException is not None: 2496 raise maxException 2497 else: 2498 raise ParseException(instring, loc, "no defined alternatives to match", self) 2499 2500 def __ior__(self, other ): 2501 if isinstance( other, str ): 2502 other = Literal( other ) 2503 return self.append( other ) #MatchFirst( [ self, other ] ) 2504 2505 def __str__( self ): 2506 if hasattr(self,"name"): 2507 return self.name 2508 2509 if self.strRepr is None: 2510 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2511 2512 return self.strRepr 2513 2514 def checkRecursion( self, parseElementList ): 2515 subRecCheckList = parseElementList[:] + [ self ] 2516 for e in self.exprs: 2517 e.checkRecursion( subRecCheckList ) 2518 2519 2520class Each(ParseExpression): 2521 """Requires all given C{ParseExpression}s to be found, but in any order. 2522 Expressions may be separated by whitespace. 2523 May be constructed using the C{'&'} operator. 2524 """ 2525 def __init__( self, exprs, savelist = True ): 2526 super(Each,self).__init__(exprs, savelist) 2527 self.mayReturnEmpty = True 2528 for e in self.exprs: 2529 if not e.mayReturnEmpty: 2530 self.mayReturnEmpty = False 2531 break 2532 self.skipWhitespace = True 2533 self.initExprGroups = True 2534 2535 def parseImpl( self, instring, loc, doActions=True ): 2536 if self.initExprGroups: 2537 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2538 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] 2539 self.optionals = opt1 + opt2 2540 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2541 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2542 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2543 self.required += self.multirequired 2544 self.initExprGroups = False 2545 tmpLoc = loc 2546 tmpReqd = self.required[:] 2547 tmpOpt = self.optionals[:] 2548 matchOrder = [] 2549 2550 keepMatching = True 2551 while keepMatching: 2552 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2553 failed = [] 2554 for e in tmpExprs: 2555 try: 2556 tmpLoc = e.tryParse( instring, tmpLoc ) 2557 except ParseException: 2558 failed.append(e) 2559 else: 2560 matchOrder.append(e) 2561 if e in tmpReqd: 2562 tmpReqd.remove(e) 2563 elif e in tmpOpt: 2564 tmpOpt.remove(e) 2565 if len(failed) == len(tmpExprs): 2566 keepMatching = False 2567 2568 if tmpReqd: 2569 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 2570 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2571 2572 # add any unmatched Optionals, in case they have default values defined 2573 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2574 2575 resultlist = [] 2576 for e in matchOrder: 2577 loc,results = e._parse(instring,loc,doActions) 2578 resultlist.append(results) 2579 2580 finalResults = ParseResults([]) 2581 for r in resultlist: 2582 dups = {} 2583 for k in list(r.keys()): 2584 if k in list(finalResults.keys()): 2585 tmp = ParseResults(finalResults[k]) 2586 tmp += ParseResults(r[k]) 2587 dups[k] = tmp 2588 finalResults += ParseResults(r) 2589 for k,v in list(dups.items()): 2590 finalResults[k] = v 2591 return loc, finalResults 2592 2593 def __str__( self ): 2594 if hasattr(self,"name"): 2595 return self.name 2596 2597 if self.strRepr is None: 2598 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2599 2600 return self.strRepr 2601 2602 def checkRecursion( self, parseElementList ): 2603 subRecCheckList = parseElementList[:] + [ self ] 2604 for e in self.exprs: 2605 e.checkRecursion( subRecCheckList ) 2606 2607 2608class ParseElementEnhance(ParserElement): 2609 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.""" 2610 def __init__( self, expr, savelist=False ): 2611 super(ParseElementEnhance,self).__init__(savelist) 2612 if isinstance( expr, str ): 2613 expr = Literal(expr) 2614 self.expr = expr 2615 self.strRepr = None 2616 if expr is not None: 2617 self.mayIndexError = expr.mayIndexError 2618 self.mayReturnEmpty = expr.mayReturnEmpty 2619 self.setWhitespaceChars( expr.whiteChars ) 2620 self.skipWhitespace = expr.skipWhitespace 2621 self.saveAsList = expr.saveAsList 2622 self.callPreparse = expr.callPreparse 2623 self.ignoreExprs.extend(expr.ignoreExprs) 2624 2625 def parseImpl( self, instring, loc, doActions=True ): 2626 if self.expr is not None: 2627 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2628 else: 2629 raise ParseException("",loc,self.errmsg,self) 2630 2631 def leaveWhitespace( self ): 2632 self.skipWhitespace = False 2633 self.expr = self.expr.copy() 2634 if self.expr is not None: 2635 self.expr.leaveWhitespace() 2636 return self 2637 2638 def ignore( self, other ): 2639 if isinstance( other, Suppress ): 2640 if other not in self.ignoreExprs: 2641 super( ParseElementEnhance, self).ignore( other ) 2642 if self.expr is not None: 2643 self.expr.ignore( self.ignoreExprs[-1] ) 2644 else: 2645 super( ParseElementEnhance, self).ignore( other ) 2646 if self.expr is not None: 2647 self.expr.ignore( self.ignoreExprs[-1] ) 2648 return self 2649 2650 def streamline( self ): 2651 super(ParseElementEnhance,self).streamline() 2652 if self.expr is not None: 2653 self.expr.streamline() 2654 return self 2655 2656 def checkRecursion( self, parseElementList ): 2657 if self in parseElementList: 2658 raise RecursiveGrammarException( parseElementList+[self] ) 2659 subRecCheckList = parseElementList[:] + [ self ] 2660 if self.expr is not None: 2661 self.expr.checkRecursion( subRecCheckList ) 2662 2663 def validate( self, validateTrace=[] ): 2664 tmp = validateTrace[:]+[self] 2665 if self.expr is not None: 2666 self.expr.validate(tmp) 2667 self.checkRecursion( [] ) 2668 2669 def __str__( self ): 2670 try: 2671 return super(ParseElementEnhance,self).__str__() 2672 except: 2673 pass 2674 2675 if self.strRepr is None and self.expr is not None: 2676 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2677 return self.strRepr 2678 2679 2680class FollowedBy(ParseElementEnhance): 2681 """Lookahead matching of the given parse expression. C{FollowedBy} 2682 does *not* advance the parsing position within the input string, it only 2683 verifies that the specified parse expression matches at the current 2684 position. C{FollowedBy} always returns a null token list.""" 2685 def __init__( self, expr ): 2686 super(FollowedBy,self).__init__(expr) 2687 self.mayReturnEmpty = True 2688 2689 def parseImpl( self, instring, loc, doActions=True ): 2690 self.expr.tryParse( instring, loc ) 2691 return loc, [] 2692 2693 2694class NotAny(ParseElementEnhance): 2695 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2696 does *not* advance the parsing position within the input string, it only 2697 verifies that the specified parse expression does *not* match at the current 2698 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2699 always returns a null token list. May be constructed using the '~' operator.""" 2700 def __init__( self, expr ): 2701 super(NotAny,self).__init__(expr) 2702 #~ self.leaveWhitespace() 2703 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2704 self.mayReturnEmpty = True 2705 self.errmsg = "Found unwanted token, "+_ustr(self.expr) 2706 2707 def parseImpl( self, instring, loc, doActions=True ): 2708 try: 2709 self.expr.tryParse( instring, loc ) 2710 except (ParseException,IndexError): 2711 pass 2712 else: 2713 #~ raise ParseException(instring, loc, self.errmsg ) 2714 exc = self.myException 2715 exc.loc = loc 2716 exc.pstr = instring 2717 raise exc 2718 return loc, [] 2719 2720 def __str__( self ): 2721 if hasattr(self,"name"): 2722 return self.name 2723 2724 if self.strRepr is None: 2725 self.strRepr = "~{" + _ustr(self.expr) + "}" 2726 2727 return self.strRepr 2728 2729 2730class ZeroOrMore(ParseElementEnhance): 2731 """Optional repetition of zero or more of the given expression.""" 2732 def __init__( self, expr ): 2733 super(ZeroOrMore,self).__init__(expr) 2734 self.mayReturnEmpty = True 2735 2736 def parseImpl( self, instring, loc, doActions=True ): 2737 tokens = [] 2738 try: 2739 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2740 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2741 while 1: 2742 if hasIgnoreExprs: 2743 preloc = self._skipIgnorables( instring, loc ) 2744 else: 2745 preloc = loc 2746 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2747 if tmptokens or list(tmptokens.keys()): 2748 tokens += tmptokens 2749 except (ParseException,IndexError): 2750 pass 2751 2752 return loc, tokens 2753 2754 def __str__( self ): 2755 if hasattr(self,"name"): 2756 return self.name 2757 2758 if self.strRepr is None: 2759 self.strRepr = "[" + _ustr(self.expr) + "]..." 2760 2761 return self.strRepr 2762 2763 def setResultsName( self, name, listAllMatches=False ): 2764 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2765 ret.saveAsList = True 2766 return ret 2767 2768 2769class OneOrMore(ParseElementEnhance): 2770 """Repetition of one or more of the given expression.""" 2771 def parseImpl( self, instring, loc, doActions=True ): 2772 # must be at least one 2773 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2774 try: 2775 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2776 while 1: 2777 if hasIgnoreExprs: 2778 preloc = self._skipIgnorables( instring, loc ) 2779 else: 2780 preloc = loc 2781 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2782 if tmptokens or list(tmptokens.keys()): 2783 tokens += tmptokens 2784 except (ParseException,IndexError): 2785 pass 2786 2787 return loc, tokens 2788 2789 def __str__( self ): 2790 if hasattr(self,"name"): 2791 return self.name 2792 2793 if self.strRepr is None: 2794 self.strRepr = "{" + _ustr(self.expr) + "}..." 2795 2796 return self.strRepr 2797 2798 def setResultsName( self, name, listAllMatches=False ): 2799 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2800 ret.saveAsList = True 2801 return ret 2802 2803class _NullToken(object): 2804 def __bool__(self): 2805 return False 2806 __nonzero__ = __bool__ 2807 def __str__(self): 2808 return "" 2809 2810_optionalNotMatched = _NullToken() 2811class Optional(ParseElementEnhance): 2812 """Optional matching of the given expression. 2813 A default return string can also be specified, if the optional expression 2814 is not found. 2815 """ 2816 def __init__( self, exprs, default=_optionalNotMatched ): 2817 super(Optional,self).__init__( exprs, savelist=False ) 2818 self.defaultValue = default 2819 self.mayReturnEmpty = True 2820 2821 def parseImpl( self, instring, loc, doActions=True ): 2822 try: 2823 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2824 except (ParseException,IndexError): 2825 if self.defaultValue is not _optionalNotMatched: 2826 if self.expr.resultsName: 2827 tokens = ParseResults([ self.defaultValue ]) 2828 tokens[self.expr.resultsName] = self.defaultValue 2829 else: 2830 tokens = [ self.defaultValue ] 2831 else: 2832 tokens = [] 2833 return loc, tokens 2834 2835 def __str__( self ): 2836 if hasattr(self,"name"): 2837 return self.name 2838 2839 if self.strRepr is None: 2840 self.strRepr = "[" + _ustr(self.expr) + "]" 2841 2842 return self.strRepr 2843 2844 2845class SkipTo(ParseElementEnhance): 2846 """Token for skipping over all undefined text until the matched expression is found. 2847 If C{include} is set to true, the matched expression is also parsed (the skipped text 2848 and matched expression are returned as a 2-element list). The C{ignore} 2849 argument is used to define grammars (typically quoted strings and comments) that 2850 might contain false matches. 2851 """ 2852 def __init__( self, other, include=False, ignore=None, failOn=None ): 2853 super( SkipTo, self ).__init__( other ) 2854 self.ignoreExpr = ignore 2855 self.mayReturnEmpty = True 2856 self.mayIndexError = False 2857 self.includeMatch = include 2858 self.asList = False 2859 if failOn is not None and isinstance(failOn, str): 2860 self.failOn = Literal(failOn) 2861 else: 2862 self.failOn = failOn 2863 self.errmsg = "No match found for "+_ustr(self.expr) 2864 2865 def parseImpl( self, instring, loc, doActions=True ): 2866 startLoc = loc 2867 instrlen = len(instring) 2868 expr = self.expr 2869 failParse = False 2870 while loc <= instrlen: 2871 try: 2872 if self.failOn: 2873 try: 2874 self.failOn.tryParse(instring, loc) 2875 except ParseBaseException: 2876 pass 2877 else: 2878 failParse = True 2879 raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 2880 failParse = False 2881 if self.ignoreExpr is not None: 2882 while 1: 2883 try: 2884 loc = self.ignoreExpr.tryParse(instring,loc) 2885 # print "found ignoreExpr, advance to", loc 2886 except ParseBaseException: 2887 break 2888 expr._parse( instring, loc, doActions=False, callPreParse=False ) 2889 skipText = instring[startLoc:loc] 2890 if self.includeMatch: 2891 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2892 if mat: 2893 skipRes = ParseResults( skipText ) 2894 skipRes += mat 2895 return loc, [ skipRes ] 2896 else: 2897 return loc, [ skipText ] 2898 else: 2899 return loc, [ skipText ] 2900 except (ParseException,IndexError): 2901 if failParse: 2902 raise 2903 else: 2904 loc += 1 2905 exc = self.myException 2906 exc.loc = loc 2907 exc.pstr = instring 2908 raise exc 2909 2910class Forward(ParseElementEnhance): 2911 """Forward declaration of an expression to be defined later - 2912 used for recursive grammars, such as algebraic infix notation. 2913 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 2914 2915 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 2916 Specifically, '|' has a lower precedence than '<<', so that:: 2917 fwdExpr << a | b | c 2918 will actually be evaluated as:: 2919 (fwdExpr << a) | b | c 2920 thereby leaving b and c out as parseable alternatives. It is recommended that you 2921 explicitly group the values inserted into the C{Forward}:: 2922 fwdExpr << (a | b | c) 2923 """ 2924 def __init__( self, other=None ): 2925 super(Forward,self).__init__( other, savelist=False ) 2926 2927 def __lshift__( self, other ): 2928 if isinstance( other, str ): 2929 other = Literal(other) 2930 self.expr = other 2931 self.mayReturnEmpty = other.mayReturnEmpty 2932 self.strRepr = None 2933 self.mayIndexError = self.expr.mayIndexError 2934 self.mayReturnEmpty = self.expr.mayReturnEmpty 2935 self.setWhitespaceChars( self.expr.whiteChars ) 2936 self.skipWhitespace = self.expr.skipWhitespace 2937 self.saveAsList = self.expr.saveAsList 2938 self.ignoreExprs.extend(self.expr.ignoreExprs) 2939 return None 2940 2941 def leaveWhitespace( self ): 2942 self.skipWhitespace = False 2943 return self 2944 2945 def streamline( self ): 2946 if not self.streamlined: 2947 self.streamlined = True 2948 if self.expr is not None: 2949 self.expr.streamline() 2950 return self 2951 2952 def validate( self, validateTrace=[] ): 2953 if self not in validateTrace: 2954 tmp = validateTrace[:]+[self] 2955 if self.expr is not None: 2956 self.expr.validate(tmp) 2957 self.checkRecursion([]) 2958 2959 def __str__( self ): 2960 if hasattr(self,"name"): 2961 return self.name 2962 2963 self._revertClass = self.__class__ 2964 self.__class__ = _ForwardNoRecurse 2965 try: 2966 if self.expr is not None: 2967 retString = _ustr(self.expr) 2968 else: 2969 retString = "None" 2970 finally: 2971 self.__class__ = self._revertClass 2972 return self.__class__.__name__ + ": " + retString 2973 2974 def copy(self): 2975 if self.expr is not None: 2976 return super(Forward,self).copy() 2977 else: 2978 ret = Forward() 2979 ret << self 2980 return ret 2981 2982class _ForwardNoRecurse(Forward): 2983 def __str__( self ): 2984 return "..." 2985 2986class TokenConverter(ParseElementEnhance): 2987 """Abstract subclass of C{ParseExpression}, for converting parsed results.""" 2988 def __init__( self, expr, savelist=False ): 2989 super(TokenConverter,self).__init__( expr )#, savelist ) 2990 self.saveAsList = False 2991 2992class Upcase(TokenConverter): 2993 """Converter to upper case all matching tokens.""" 2994 def __init__(self, *args): 2995 super(Upcase,self).__init__(*args) 2996 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 2997 DeprecationWarning,stacklevel=2) 2998 2999 def postParse( self, instring, loc, tokenlist ): 3000 return list(map( string.upper, tokenlist )) 3001 3002 3003class Combine(TokenConverter): 3004 """Converter to concatenate all matching tokens to a single string. 3005 By default, the matching patterns must also be contiguous in the input string; 3006 this can be disabled by specifying C{'adjacent=False'} in the constructor. 3007 """ 3008 def __init__( self, expr, joinString="", adjacent=True ): 3009 super(Combine,self).__init__( expr ) 3010 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3011 if adjacent: 3012 self.leaveWhitespace() 3013 self.adjacent = adjacent 3014 self.skipWhitespace = True 3015 self.joinString = joinString 3016 self.callPreparse = True 3017 3018 def ignore( self, other ): 3019 if self.adjacent: 3020 ParserElement.ignore(self, other) 3021 else: 3022 super( Combine, self).ignore( other ) 3023 return self 3024 3025 def postParse( self, instring, loc, tokenlist ): 3026 retToks = tokenlist.copy() 3027 del retToks[:] 3028 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3029 3030 if self.resultsName and len(list(retToks.keys()))>0: 3031 return [ retToks ] 3032 else: 3033 return retToks 3034 3035class Group(TokenConverter): 3036 """Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions.""" 3037 def __init__( self, expr ): 3038 super(Group,self).__init__( expr ) 3039 self.saveAsList = True 3040 3041 def postParse( self, instring, loc, tokenlist ): 3042 return [ tokenlist ] 3043 3044class Dict(TokenConverter): 3045 """Converter to return a repetitive expression as a list, but also as a dictionary. 3046 Each element can also be referenced using the first token in the expression as its key. 3047 Useful for tabular report scraping when the first column can be used as a item key. 3048 """ 3049 def __init__( self, exprs ): 3050 super(Dict,self).__init__( exprs ) 3051 self.saveAsList = True 3052 3053 def postParse( self, instring, loc, tokenlist ): 3054 for i,tok in enumerate(tokenlist): 3055 if len(tok) == 0: 3056 continue 3057 ikey = tok[0] 3058 if isinstance(ikey,int): 3059 ikey = _ustr(tok[0]).strip() 3060 if len(tok)==1: 3061 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3062 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3063 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3064 else: 3065 dictvalue = tok.copy() #ParseResults(i) 3066 del dictvalue[0] 3067 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and list(dictvalue.keys())): 3068 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3069 else: 3070 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3071 3072 if self.resultsName: 3073 return [ tokenlist ] 3074 else: 3075 return tokenlist 3076 3077 3078class Suppress(TokenConverter): 3079 """Converter for ignoring the results of a parsed expression.""" 3080 def postParse( self, instring, loc, tokenlist ): 3081 return [] 3082 3083 def suppress( self ): 3084 return self 3085 3086 3087class OnlyOnce(object): 3088 """Wrapper for parse actions, to ensure they are only called once.""" 3089 def __init__(self, methodCall): 3090 self.callable = _trim_arity(methodCall) 3091 self.called = False 3092 def __call__(self,s,l,t): 3093 if not self.called: 3094 results = self.callable(s,l,t) 3095 self.called = True 3096 return results 3097 raise ParseException(s,l,"") 3098 def reset(self): 3099 self.called = False 3100 3101def traceParseAction(f): 3102 """Decorator for debugging parse actions.""" 3103 f = _trim_arity(f) 3104 def z(*paArgs): 3105 thisFunc = f.__name__ 3106 s,l,t = paArgs[-3:] 3107 if len(paArgs)>3: 3108 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3109 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3110 try: 3111 ret = f(*paArgs) 3112 except Exception: 3113 exc = sys.exc_info()[1] 3114 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3115 raise 3116 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3117 return ret 3118 try: 3119 z.__name__ = f.__name__ 3120 except AttributeError: 3121 pass 3122 return z 3123 3124# 3125# global helpers 3126# 3127def delimitedList( expr, delim=",", combine=False ): 3128 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3129 By default, the list elements and delimiters can have intervening whitespace, and 3130 comments, but this can be overridden by passing C{combine=True} in the constructor. 3131 If C{combine} is set to True, the matching tokens are returned as a single token 3132 string, with the delimiters included; otherwise, the matching tokens are returned 3133 as a list of tokens, with the delimiters suppressed. 3134 """ 3135 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3136 if combine: 3137 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3138 else: 3139 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) 3140 3141def countedArray( expr, intExpr=None ): 3142 """Helper to define a counted list of expressions. 3143 This helper defines a pattern of the form:: 3144 integer expr expr expr... 3145 where the leading integer tells how many expr expressions follow. 3146 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3147 """ 3148 arrayExpr = Forward() 3149 def countFieldParseAction(s,l,t): 3150 n = t[0] 3151 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3152 return [] 3153 if intExpr is None: 3154 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3155 else: 3156 intExpr = intExpr.copy() 3157 intExpr.setName("arrayLen") 3158 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3159 return ( intExpr + arrayExpr ) 3160 3161def _flatten(L): 3162 ret = [] 3163 for i in L: 3164 if isinstance(i,list): 3165 ret.extend(_flatten(i)) 3166 else: 3167 ret.append(i) 3168 return ret 3169 3170def matchPreviousLiteral(expr): 3171 """Helper to define an expression that is indirectly defined from 3172 the tokens matched in a previous expression, that is, it looks 3173 for a 'repeat' of a previous expression. For example:: 3174 first = Word(nums) 3175 second = matchPreviousLiteral(first) 3176 matchExpr = first + ":" + second 3177 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3178 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3179 If this is not desired, use C{matchPreviousExpr}. 3180 Do *not* use with packrat parsing enabled. 3181 """ 3182 rep = Forward() 3183 def copyTokenToRepeater(s,l,t): 3184 if t: 3185 if len(t) == 1: 3186 rep << t[0] 3187 else: 3188 # flatten t tokens 3189 tflat = _flatten(t.asList()) 3190 rep << And( [ Literal(tt) for tt in tflat ] ) 3191 else: 3192 rep << Empty() 3193 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3194 return rep 3195 3196def matchPreviousExpr(expr): 3197 """Helper to define an expression that is indirectly defined from 3198 the tokens matched in a previous expression, that is, it looks 3199 for a 'repeat' of a previous expression. For example:: 3200 first = Word(nums) 3201 second = matchPreviousExpr(first) 3202 matchExpr = first + ":" + second 3203 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3204 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3205 the expressions are evaluated first, and then compared, so 3206 C{"1"} is compared with C{"10"}. 3207 Do *not* use with packrat parsing enabled. 3208 """ 3209 rep = Forward() 3210 e2 = expr.copy() 3211 rep << e2 3212 def copyTokenToRepeater(s,l,t): 3213 matchTokens = _flatten(t.asList()) 3214 def mustMatchTheseTokens(s,l,t): 3215 theseTokens = _flatten(t.asList()) 3216 if theseTokens != matchTokens: 3217 raise ParseException("",0,"") 3218 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3219 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3220 return rep 3221 3222def _escapeRegexRangeChars(s): 3223 #~ escape these chars: ^-] 3224 for c in r"\^-]": 3225 s = s.replace(c,_bslash+c) 3226 s = s.replace("\n",r"\n") 3227 s = s.replace("\t",r"\t") 3228 return _ustr(s) 3229 3230def oneOf( strs, caseless=False, useRegex=True ): 3231 """Helper to quickly define a set of alternative Literals, and makes sure to do 3232 longest-first testing when there is a conflict, regardless of the input order, 3233 but returns a C{MatchFirst} for best performance. 3234 3235 Parameters: 3236 - strs - a string of space-delimited literals, or a list of string literals 3237 - caseless - (default=False) - treat all literals as caseless 3238 - useRegex - (default=True) - as an optimization, will generate a Regex 3239 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3240 if creating a C{Regex} raises an exception) 3241 """ 3242 if caseless: 3243 isequal = ( lambda a,b: a.upper() == b.upper() ) 3244 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3245 parseElementClass = CaselessLiteral 3246 else: 3247 isequal = ( lambda a,b: a == b ) 3248 masks = ( lambda a,b: b.startswith(a) ) 3249 parseElementClass = Literal 3250 3251 if isinstance(strs,(list,tuple)): 3252 symbols = list(strs[:]) 3253 elif isinstance(strs,str): 3254 symbols = strs.split() 3255 else: 3256 warnings.warn("Invalid argument to oneOf, expected string or list", 3257 SyntaxWarning, stacklevel=2) 3258 3259 i = 0 3260 while i < len(symbols)-1: 3261 cur = symbols[i] 3262 for j,other in enumerate(symbols[i+1:]): 3263 if ( isequal(other, cur) ): 3264 del symbols[i+j+1] 3265 break 3266 elif ( masks(cur, other) ): 3267 del symbols[i+j+1] 3268 symbols.insert(i,other) 3269 cur = other 3270 break 3271 else: 3272 i += 1 3273 3274 if not caseless and useRegex: 3275 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3276 try: 3277 if len(symbols)==len("".join(symbols)): 3278 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 3279 else: 3280 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 3281 except: 3282 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3283 SyntaxWarning, stacklevel=2) 3284 3285 3286 # last resort, just use MatchFirst 3287 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) 3288 3289def dictOf( key, value ): 3290 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3291 for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens 3292 in the proper order. The key pattern can include delimiting markers or punctuation, 3293 as long as they are suppressed, thereby leaving the significant key text. The value 3294 pattern can include named results, so that the C{Dict} results can include named token 3295 fields. 3296 """ 3297 return Dict( ZeroOrMore( Group ( key + value ) ) ) 3298 3299def originalTextFor(expr, asString=True): 3300 """Helper to return the original, untokenized text for a given expression. Useful to 3301 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3302 revert separate tokens with intervening whitespace back to the original matching 3303 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not 3304 require the inspect module to chase up the call stack. By default, returns a 3305 string containing the original parsed text. 3306 3307 If the optional C{asString} argument is passed as C{False}, then the return value is a 3308 C{ParseResults} containing any results names that were originally matched, and a 3309 single token containing the original matched text from the input string. So if 3310 the expression passed to C{L{originalTextFor}} contains expressions with defined 3311 results names, you must set C{asString} to C{False} if you want to preserve those 3312 results name values.""" 3313 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3314 endlocMarker = locMarker.copy() 3315 endlocMarker.callPreparse = False 3316 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3317 if asString: 3318 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3319 else: 3320 def extractText(s,l,t): 3321 del t[:] 3322 t.insert(0, s[t._original_start:t._original_end]) 3323 del t["_original_start"] 3324 del t["_original_end"] 3325 matchExpr.setParseAction(extractText) 3326 return matchExpr 3327 3328def ungroup(expr): 3329 """Helper to undo pyparsing's default grouping of And expressions, even 3330 if all but one are non-empty.""" 3331 return TokenConverter(expr).setParseAction(lambda t:t[0]) 3332 3333# convenience constants for positional expressions 3334empty = Empty().setName("empty") 3335lineStart = LineStart().setName("lineStart") 3336lineEnd = LineEnd().setName("lineEnd") 3337stringStart = StringStart().setName("stringStart") 3338stringEnd = StringEnd().setName("stringEnd") 3339 3340_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3341_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) 3342_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:chr(int(t[0][1:],16))) 3343_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:chr(int(t[0][1:],8))) 3344_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 3345_charRange = Group(_singleChar + Suppress("-") + _singleChar) 3346_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 3347 3348_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ chr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 3349 3350def srange(s): 3351 r"""Helper to easily define string ranges for use in Word construction. Borrows 3352 syntax from regexp '[]' string range definitions:: 3353 srange("[0-9]") -> "0123456789" 3354 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3355 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3356 The input string must be enclosed in []'s, and the returned string is the expanded 3357 character set joined into a single string. 3358 The values enclosed in the []'s may be:: 3359 a single character 3360 an escaped character with a leading backslash (such as \- or \]) 3361 an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3362 (\0x## is also supported for backwards compatibility) 3363 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3364 a range of any of the above, separated by a dash ('a-z', etc.) 3365 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3366 """ 3367 try: 3368 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 3369 except: 3370 return "" 3371 3372def matchOnlyAtCol(n): 3373 """Helper method for defining parse actions that require matching at a specific 3374 column in the input text. 3375 """ 3376 def verifyCol(strg,locn,toks): 3377 if col(locn,strg) != n: 3378 raise ParseException(strg,locn,"matched token not at column %d" % n) 3379 return verifyCol 3380 3381def replaceWith(replStr): 3382 """Helper method for common parse actions that simply return a literal value. Especially 3383 useful when used with C{transformString()}. 3384 """ 3385 def _replFunc(*args): 3386 return [replStr] 3387 return _replFunc 3388 3389def removeQuotes(s,l,t): 3390 """Helper parse action for removing quotation marks from parsed quoted strings. 3391 To use, add this parse action to quoted string using:: 3392 quotedString.setParseAction( removeQuotes ) 3393 """ 3394 return t[0][1:-1] 3395 3396def upcaseTokens(s,l,t): 3397 """Helper parse action to convert tokens to upper case.""" 3398 return [ tt.upper() for tt in map(_ustr,t) ] 3399 3400def downcaseTokens(s,l,t): 3401 """Helper parse action to convert tokens to lower case.""" 3402 return [ tt.lower() for tt in map(_ustr,t) ] 3403 3404def keepOriginalText(s,startLoc,t): 3405 """DEPRECATED - use new helper method C{originalTextFor}. 3406 Helper parse action to preserve original parsed text, 3407 overriding any nested parse actions.""" 3408 try: 3409 endloc = getTokensEndLoc() 3410 except ParseException: 3411 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3412 del t[:] 3413 t += ParseResults(s[startLoc:endloc]) 3414 return t 3415 3416def getTokensEndLoc(): 3417 """Method to be called from within a parse action to determine the end 3418 location of the parsed tokens.""" 3419 import inspect 3420 fstack = inspect.stack() 3421 try: 3422 # search up the stack (through intervening argument normalizers) for correct calling routine 3423 for f in fstack[2:]: 3424 if f[3] == "_parseNoCache": 3425 endloc = f[0].f_locals["loc"] 3426 return endloc 3427 else: 3428 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3429 finally: 3430 del fstack 3431 3432def _makeTags(tagStr, xml): 3433 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3434 if isinstance(tagStr,str): 3435 resname = tagStr 3436 tagStr = Keyword(tagStr, caseless=not xml) 3437 else: 3438 resname = tagStr.name 3439 3440 tagAttrName = Word(alphas,alphanums+"_-:") 3441 if (xml): 3442 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3443 openTag = Suppress("<") + tagStr("tag") + \ 3444 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3445 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3446 else: 3447 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) 3448 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3449 openTag = Suppress("<") + tagStr("tag") + \ 3450 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3451 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3452 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3453 closeTag = Combine(_L("</") + tagStr + ">") 3454 3455 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3456 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) 3457 openTag.tag = resname 3458 closeTag.tag = resname 3459 return openTag, closeTag 3460 3461def makeHTMLTags(tagStr): 3462 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3463 return _makeTags( tagStr, False ) 3464 3465def makeXMLTags(tagStr): 3466 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3467 return _makeTags( tagStr, True ) 3468 3469def withAttribute(*args,**attrDict): 3470 """Helper to create a validating parse action to be used with start tags created 3471 with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag 3472 with a required attribute value, to avoid false matches on common tags such as 3473 C{<TD>} or C{<DIV>}. 3474 3475 Call C{withAttribute} with a series of attribute names and values. Specify the list 3476 of filter attributes names and values as: 3477 - keyword arguments, as in C{(align="right")}, or 3478 - as an explicit dict with C{**} operator, when an attribute name is also a Python 3479 reserved word, as in C{**{"class":"Customer", "align":"right"}} 3480 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3481 For attribute names with a namespace prefix, you must use the second form. Attribute 3482 names are matched insensitive to upper/lower case. 3483 3484 To verify that the attribute exists, but without specifying a value, pass 3485 C{withAttribute.ANY_VALUE} as the value. 3486 """ 3487 if args: 3488 attrs = args[:] 3489 else: 3490 attrs = list(attrDict.items()) 3491 attrs = [(k,v) for k,v in attrs] 3492 def pa(s,l,tokens): 3493 for attrName,attrValue in attrs: 3494 if attrName not in tokens: 3495 raise ParseException(s,l,"no matching attribute " + attrName) 3496 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3497 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3498 (attrName, tokens[attrName], attrValue)) 3499 return pa 3500withAttribute.ANY_VALUE = object() 3501 3502opAssoc = _Constants() 3503opAssoc.LEFT = object() 3504opAssoc.RIGHT = object() 3505 3506def operatorPrecedence( baseExpr, opList ): 3507 """Helper method for constructing grammars of expressions made up of 3508 operators working in a precedence hierarchy. Operators may be unary or 3509 binary, left- or right-associative. Parse actions can also be attached 3510 to operator expressions. 3511 3512 Parameters: 3513 - baseExpr - expression representing the most basic element for the nested 3514 - opList - list of tuples, one for each operator precedence level in the 3515 expression grammar; each tuple is of the form 3516 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3517 - opExpr is the pyparsing expression for the operator; 3518 may also be a string, which will be converted to a Literal; 3519 if numTerms is 3, opExpr is a tuple of two expressions, for the 3520 two operators separating the 3 terms 3521 - numTerms is the number of terms for this operator (must 3522 be 1, 2, or 3) 3523 - rightLeftAssoc is the indicator whether the operator is 3524 right or left associative, using the pyparsing-defined 3525 constants opAssoc.RIGHT and opAssoc.LEFT. 3526 - parseAction is the parse action to be associated with 3527 expressions matching this operator expression (the 3528 parse action tuple member may be omitted) 3529 """ 3530 ret = Forward() 3531 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 3532 for i,operDef in enumerate(opList): 3533 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3534 if arity == 3: 3535 if opExpr is None or len(opExpr) != 2: 3536 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3537 opExpr1, opExpr2 = opExpr 3538 thisExpr = Forward()#.setName("expr%d" % i) 3539 if rightLeftAssoc == opAssoc.LEFT: 3540 if arity == 1: 3541 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3542 elif arity == 2: 3543 if opExpr is not None: 3544 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3545 else: 3546 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3547 elif arity == 3: 3548 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3549 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3550 else: 3551 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3552 elif rightLeftAssoc == opAssoc.RIGHT: 3553 if arity == 1: 3554 # try to avoid LR with this extra test 3555 if not isinstance(opExpr, Optional): 3556 opExpr = Optional(opExpr) 3557 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3558 elif arity == 2: 3559 if opExpr is not None: 3560 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3561 else: 3562 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3563 elif arity == 3: 3564 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3565 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3566 else: 3567 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3568 else: 3569 raise ValueError("operator must indicate right or left associativity") 3570 if pa: 3571 matchExpr.setParseAction( pa ) 3572 thisExpr << ( matchExpr | lastExpr ) 3573 lastExpr = thisExpr 3574 ret << lastExpr 3575 return ret 3576 3577dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3578sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3579quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3580unicodeString = Combine(_L('u') + quotedString.copy()) 3581 3582def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): 3583 """Helper method for defining nested lists enclosed in opening and closing 3584 delimiters ("(" and ")" are the default). 3585 3586 Parameters: 3587 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3588 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3589 - content - expression for items within the nested lists (default=None) 3590 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3591 3592 If an expression is not provided for the content argument, the nested 3593 expression will capture all whitespace-delimited content between delimiters 3594 as a list of separate values. 3595 3596 Use the C{ignoreExpr} argument to define expressions that may contain 3597 opening or closing characters that should not be treated as opening 3598 or closing characters for nesting, such as quotedString or a comment 3599 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3600 The default is L{quotedString}, but if no expressions are to be ignored, 3601 then pass C{None} for this argument. 3602 """ 3603 if opener == closer: 3604 raise ValueError("opening and closing strings cannot be the same") 3605 if content is None: 3606 if isinstance(opener,str) and isinstance(closer,str): 3607 if len(opener) == 1 and len(closer)==1: 3608 if ignoreExpr is not None: 3609 content = (Combine(OneOrMore(~ignoreExpr + 3610 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3611 ).setParseAction(lambda t:t[0].strip())) 3612 else: 3613 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3614 ).setParseAction(lambda t:t[0].strip())) 3615 else: 3616 if ignoreExpr is not None: 3617 content = (Combine(OneOrMore(~ignoreExpr + 3618 ~Literal(opener) + ~Literal(closer) + 3619 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3620 ).setParseAction(lambda t:t[0].strip())) 3621 else: 3622 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3623 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3624 ).setParseAction(lambda t:t[0].strip())) 3625 else: 3626 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3627 ret = Forward() 3628 if ignoreExpr is not None: 3629 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3630 else: 3631 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3632 return ret 3633 3634def indentedBlock(blockStatementExpr, indentStack, indent=True): 3635 """Helper method for defining space-delimited indentation blocks, such as 3636 those used to define block statements in Python source code. 3637 3638 Parameters: 3639 - blockStatementExpr - expression defining syntax of statement that 3640 is repeated within the indented block 3641 - indentStack - list created by caller to manage indentation stack 3642 (multiple statementWithIndentedBlock expressions within a single grammar 3643 should share a common indentStack) 3644 - indent - boolean indicating whether block must be indented beyond the 3645 the current level; set to False for block of left-most statements 3646 (default=True) 3647 3648 A valid block must contain at least one C{blockStatement}. 3649 """ 3650 def checkPeerIndent(s,l,t): 3651 if l >= len(s): return 3652 curCol = col(l,s) 3653 if curCol != indentStack[-1]: 3654 if curCol > indentStack[-1]: 3655 raise ParseFatalException(s,l,"illegal nesting") 3656 raise ParseException(s,l,"not a peer entry") 3657 3658 def checkSubIndent(s,l,t): 3659 curCol = col(l,s) 3660 if curCol > indentStack[-1]: 3661 indentStack.append( curCol ) 3662 else: 3663 raise ParseException(s,l,"not a subentry") 3664 3665 def checkUnindent(s,l,t): 3666 if l >= len(s): return 3667 curCol = col(l,s) 3668 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3669 raise ParseException(s,l,"not an unindent") 3670 indentStack.pop() 3671 3672 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3673 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3674 PEER = Empty().setParseAction(checkPeerIndent) 3675 UNDENT = Empty().setParseAction(checkUnindent) 3676 if indent: 3677 smExpr = Group( Optional(NL) + 3678 #~ FollowedBy(blockStatementExpr) + 3679 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3680 else: 3681 smExpr = Group( Optional(NL) + 3682 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3683 blockStatementExpr.ignore(_bslash + LineEnd()) 3684 return smExpr 3685 3686alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3687punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3688 3689anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3690commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 3691_htmlEntityMap = dict(list(zip("gt lt amp nbsp quot".split(),'><& "'))) 3692replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3693 3694# it's easy to get these comment structures wrong - they're very common, so may as well make them available 3695cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3696 3697htmlComment = Regex(r"<!--[\s\S]*?-->") 3698restOfLine = Regex(r".*").leaveWhitespace() 3699dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3700cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3701 3702javaStyleComment = cppStyleComment 3703pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3704_noncomma = "".join( [ c for c in printables if c != "," ] ) 3705_commasepitem = Combine(OneOrMore(Word(_noncomma) + 3706 Optional( Word(" \t") + 3707 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3708commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3709 3710 3711if __name__ == "__main__": 3712 3713 def test( teststring ): 3714 try: 3715 tokens = simpleSQL.parseString( teststring ) 3716 tokenlist = tokens.asList() 3717 print((teststring + "->" + str(tokenlist))) 3718 print(("tokens = " + str(tokens))) 3719 print(("tokens.columns = " + str(tokens.columns))) 3720 print(("tokens.tables = " + str(tokens.tables))) 3721 print((tokens.asXML("SQL",True))) 3722 except ParseBaseException: 3723 err = sys.exc_info()[1] 3724 print((teststring + "->")) 3725 print((err.line)) 3726 print((" "*(err.column-1) + "^")) 3727 print (err) 3728 print() 3729 3730 selectToken = CaselessLiteral( "select" ) 3731 fromToken = CaselessLiteral( "from" ) 3732 3733 ident = Word( alphas, alphanums + "_$" ) 3734 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3735 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") 3736 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3737 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") 3738 simpleSQL = ( selectToken + \ 3739 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3740 fromToken + \ 3741 tableNameList.setResultsName( "tables" ) ) 3742 3743 test( "SELECT * from XYZZY, ABC" ) 3744 test( "select * from SYS.XYZZY" ) 3745 test( "Select A from Sys.dual" ) 3746 test( "Select AA,BB,CC from Sys.dual" ) 3747 test( "Select A, B, C from Sys.dual" ) 3748 test( "Select A, B, C from Sys.dual" ) 3749 test( "Xelect A, B, C from Sys.dual" ) 3750 test( "Select A, B, C frox Sys.dual" ) 3751 test( "Select" ) 3752 test( "Select ^^^ frox Sys.dual" ) 3753 test( "Select A, B, C from Sys.dual, Table2 " ) 3754