1# module pyparsing.py 2# 3# Copyright (c) 2003-2016 Paul T. McGuire 4# 5# Permission is hereby granted, free of charge, to any person obtaining 6# a copy of this software and associated documentation files (the 7# "Software"), to deal in the Software without restriction, including 8# without limitation the rights to use, copy, modify, merge, publish, 9# distribute, sublicense, and/or sell copies of the Software, and to 10# permit persons to whom the Software is furnished to do so, subject to 11# the following conditions: 12# 13# The above copyright notice and this permission notice shall be 14# included in all copies or substantial portions of the Software. 15# 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23# 24# flake8: noqa 25 26__doc__ = \ 27""" 28pyparsing module - Classes and methods to define and execute parsing grammars 29 30The pyparsing module is an alternative approach to creating and executing simple grammars, 31vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you 32don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 33provides a library of classes that you use to construct the grammar directly in Python. 34 35Here is a program to parse "Hello, World!" (or any greeting of the form 36C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements 37(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to 38L{Literal} expressions):: 39 40 from pyparsing import Word, alphas 41 42 # define grammar of a greeting 43 greet = Word(alphas) + "," + Word(alphas) + "!" 44 45 hello = "Hello, World!" 46 print (hello, "->", greet.parseString(hello)) 47 48The program outputs the following:: 49 50 Hello, World! -> ['Hello', ',', 'World', '!'] 51 52The Python representation of the grammar is quite readable, owing to the self-explanatory 53class names, and the use of '+', '|' and '^' operators. 54 55The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an 56object with named attributes. 57 58The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 59 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) 60 - quoted strings 61 - embedded comments 62""" 63 64__version__ = "2.2.0" 65__versionTime__ = "06 Mar 2017 02:06 UTC" 66__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 67 68import string 69from weakref import ref as wkref 70import copy 71import sys 72import warnings 73import re 74import sre_constants 75import collections 76import pprint 77import traceback 78import types 79from datetime import datetime 80 81try: 82 from _thread import RLock 83except ImportError: 84 from threading import RLock 85 86try: 87 from collections import OrderedDict as _OrderedDict 88except ImportError: 89 try: 90 from ordereddict import OrderedDict as _OrderedDict 91 except ImportError: 92 _OrderedDict = None 93 94#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 95 96__all__ = [ 97'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 98'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 99'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 100'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 101'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 102'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 103'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 104'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 105'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 106'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 107'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 108'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 109'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 110'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 111'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 112'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 113'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 114'CloseMatch', 'tokenMap', 'pyparsing_common', 115] 116 117system_version = tuple(sys.version_info)[:3] 118PY_3 = system_version[0] == 3 119if PY_3: 120 _MAX_INT = sys.maxsize 121 basestring = str 122 unichr = chr 123 _ustr = str 124 125 # build list of single arg builtins, that can be used as parse actions 126 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 127 128else: 129 _MAX_INT = sys.maxint 130 range = xrange 131 132 def _ustr(obj): 133 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 134 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 135 then < returns the unicode object | encodes it with the default encoding | ... >. 136 """ 137 if isinstance(obj,unicode): 138 return obj 139 140 try: 141 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 142 # it won't break any existing code. 143 return str(obj) 144 145 except UnicodeEncodeError: 146 # Else encode it 147 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 148 xmlcharref = Regex(r'&#\d+;') 149 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 150 return xmlcharref.transformString(ret) 151 152 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 153 singleArgBuiltins = [] 154 import __builtin__ 155 for fname in "sum len sorted reversed list tuple set any all min max".split(): 156 try: 157 singleArgBuiltins.append(getattr(__builtin__,fname)) 158 except AttributeError: 159 continue 160 161_generatorType = type((y for y in range(1))) 162 163def _xml_escape(data): 164 """Escape &, <, >, ", ', etc. in a string of data.""" 165 166 # ampersand must be replaced first 167 from_symbols = '&><"\'' 168 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 169 for from_,to_ in zip(from_symbols, to_symbols): 170 data = data.replace(from_, to_) 171 return data 172 173class _Constants(object): 174 pass 175 176alphas = string.ascii_uppercase + string.ascii_lowercase 177nums = "0123456789" 178hexnums = nums + "ABCDEFabcdef" 179alphanums = alphas + nums 180_bslash = chr(92) 181printables = "".join(c for c in string.printable if c not in string.whitespace) 182 183class ParseBaseException(Exception): 184 """base exception class for all parsing runtime exceptions""" 185 # Performance tuning: we construct a *lot* of these, so keep this 186 # constructor as small and fast as possible 187 def __init__( self, pstr, loc=0, msg=None, elem=None ): 188 self.loc = loc 189 if msg is None: 190 self.msg = pstr 191 self.pstr = "" 192 else: 193 self.msg = msg 194 self.pstr = pstr 195 self.parserElement = elem 196 self.args = (pstr, loc, msg) 197 198 @classmethod 199 def _from_exception(cls, pe): 200 """ 201 internal factory method to simplify creating one type of ParseException 202 from another - avoids having __init__ signature conflicts among subclasses 203 """ 204 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) 205 206 def __getattr__( self, aname ): 207 """supported attributes by name are: 208 - lineno - returns the line number of the exception text 209 - col - returns the column number of the exception text 210 - line - returns the line containing the exception text 211 """ 212 if( aname == "lineno" ): 213 return lineno( self.loc, self.pstr ) 214 elif( aname in ("col", "column") ): 215 return col( self.loc, self.pstr ) 216 elif( aname == "line" ): 217 return line( self.loc, self.pstr ) 218 else: 219 raise AttributeError(aname) 220 221 def __str__( self ): 222 return "%s (at char %d), (line:%d, col:%d)" % \ 223 ( self.msg, self.loc, self.lineno, self.column ) 224 def __repr__( self ): 225 return _ustr(self) 226 def markInputline( self, markerString = ">!<" ): 227 """Extracts the exception line from the input string, and marks 228 the location of the exception with a special symbol. 229 """ 230 line_str = self.line 231 line_column = self.column - 1 232 if markerString: 233 line_str = "".join((line_str[:line_column], 234 markerString, line_str[line_column:])) 235 return line_str.strip() 236 def __dir__(self): 237 return "lineno col line".split() + dir(type(self)) 238 239class ParseException(ParseBaseException): 240 """ 241 Exception thrown when parse expressions don't match class; 242 supported attributes by name are: 243 - lineno - returns the line number of the exception text 244 - col - returns the column number of the exception text 245 - line - returns the line containing the exception text 246 247 Example:: 248 try: 249 Word(nums).setName("integer").parseString("ABC") 250 except ParseException as pe: 251 print(pe) 252 print("column: {}".format(pe.col)) 253 254 prints:: 255 Expected integer (at char 0), (line:1, col:1) 256 column: 1 257 """ 258 pass 259 260class ParseFatalException(ParseBaseException): 261 """user-throwable exception thrown when inconsistent parse content 262 is found; stops all parsing immediately""" 263 pass 264 265class ParseSyntaxException(ParseFatalException): 266 """just like L{ParseFatalException}, but thrown internally when an 267 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop 268 immediately because an unbacktrackable syntax error has been found""" 269 pass 270 271#~ class ReparseException(ParseBaseException): 272 #~ """Experimental class - parse actions can raise this exception to cause 273 #~ pyparsing to reparse the input string: 274 #~ - with a modified input string, and/or 275 #~ - with a modified start location 276 #~ Set the values of the ReparseException in the constructor, and raise the 277 #~ exception in a parse action to cause pyparsing to use the new string/location. 278 #~ Setting the values as None causes no change to be made. 279 #~ """ 280 #~ def __init_( self, newstring, restartLoc ): 281 #~ self.newParseText = newstring 282 #~ self.reparseLoc = restartLoc 283 284class RecursiveGrammarException(Exception): 285 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive""" 286 def __init__( self, parseElementList ): 287 self.parseElementTrace = parseElementList 288 289 def __str__( self ): 290 return "RecursiveGrammarException: %s" % self.parseElementTrace 291 292class _ParseResultsWithOffset(object): 293 def __init__(self,p1,p2): 294 self.tup = (p1,p2) 295 def __getitem__(self,i): 296 return self.tup[i] 297 def __repr__(self): 298 return repr(self.tup[0]) 299 def setOffset(self,i): 300 self.tup = (self.tup[0],i) 301 302class ParseResults(object): 303 """ 304 Structured parse results, to provide multiple means of access to the parsed data: 305 - as a list (C{len(results)}) 306 - by list index (C{results[0], results[1]}, etc.) 307 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName}) 308 309 Example:: 310 integer = Word(nums) 311 date_str = (integer.setResultsName("year") + '/' 312 + integer.setResultsName("month") + '/' 313 + integer.setResultsName("day")) 314 # equivalent form: 315 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 316 317 # parseString returns a ParseResults object 318 result = date_str.parseString("1999/12/31") 319 320 def test(s, fn=repr): 321 print("%s -> %s" % (s, fn(eval(s)))) 322 test("list(result)") 323 test("result[0]") 324 test("result['month']") 325 test("result.day") 326 test("'month' in result") 327 test("'minutes' in result") 328 test("result.dump()", str) 329 prints:: 330 list(result) -> ['1999', '/', '12', '/', '31'] 331 result[0] -> '1999' 332 result['month'] -> '12' 333 result.day -> '31' 334 'month' in result -> True 335 'minutes' in result -> False 336 result.dump() -> ['1999', '/', '12', '/', '31'] 337 - day: 31 338 - month: 12 339 - year: 1999 340 """ 341 def __new__(cls, toklist=None, name=None, asList=True, modal=True ): 342 if isinstance(toklist, cls): 343 return toklist 344 retobj = object.__new__(cls) 345 retobj.__doinit = True 346 return retobj 347 348 # Performance tuning: we construct a *lot* of these, so keep this 349 # constructor as small and fast as possible 350 def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ): 351 if self.__doinit: 352 self.__doinit = False 353 self.__name = None 354 self.__parent = None 355 self.__accumNames = {} 356 self.__asList = asList 357 self.__modal = modal 358 if toklist is None: 359 toklist = [] 360 if isinstance(toklist, list): 361 self.__toklist = toklist[:] 362 elif isinstance(toklist, _generatorType): 363 self.__toklist = list(toklist) 364 else: 365 self.__toklist = [toklist] 366 self.__tokdict = dict() 367 368 if name is not None and name: 369 if not modal: 370 self.__accumNames[name] = 0 371 if isinstance(name,int): 372 name = _ustr(name) # will always return a str, but use _ustr for consistency 373 self.__name = name 374 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 375 if isinstance(toklist,basestring): 376 toklist = [ toklist ] 377 if asList: 378 if isinstance(toklist,ParseResults): 379 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 380 else: 381 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 382 self[name].__name = name 383 else: 384 try: 385 self[name] = toklist[0] 386 except (KeyError,TypeError,IndexError): 387 self[name] = toklist 388 389 def __getitem__( self, i ): 390 if isinstance( i, (int,slice) ): 391 return self.__toklist[i] 392 else: 393 if i not in self.__accumNames: 394 return self.__tokdict[i][-1][0] 395 else: 396 return ParseResults([ v[0] for v in self.__tokdict[i] ]) 397 398 def __setitem__( self, k, v, isinstance=isinstance ): 399 if isinstance(v,_ParseResultsWithOffset): 400 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 401 sub = v[0] 402 elif isinstance(k,(int,slice)): 403 self.__toklist[k] = v 404 sub = v 405 else: 406 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 407 sub = v 408 if isinstance(sub,ParseResults): 409 sub.__parent = wkref(self) 410 411 def __delitem__( self, i ): 412 if isinstance(i,(int,slice)): 413 mylen = len( self.__toklist ) 414 del self.__toklist[i] 415 416 # convert int to slice 417 if isinstance(i, int): 418 if i < 0: 419 i += mylen 420 i = slice(i, i+1) 421 # get removed indices 422 removed = list(range(*i.indices(mylen))) 423 removed.reverse() 424 # fixup indices in token dictionary 425 for name,occurrences in self.__tokdict.items(): 426 for j in removed: 427 for k, (value, position) in enumerate(occurrences): 428 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 429 else: 430 del self.__tokdict[i] 431 432 def __contains__( self, k ): 433 return k in self.__tokdict 434 435 def __len__( self ): return len( self.__toklist ) 436 def __bool__(self): return ( not not self.__toklist ) 437 __nonzero__ = __bool__ 438 def __iter__( self ): return iter( self.__toklist ) 439 def __reversed__( self ): return iter( self.__toklist[::-1] ) 440 def _iterkeys( self ): 441 if hasattr(self.__tokdict, "iterkeys"): 442 return self.__tokdict.iterkeys() 443 else: 444 return iter(self.__tokdict) 445 446 def _itervalues( self ): 447 return (self[k] for k in self._iterkeys()) 448 449 def _iteritems( self ): 450 return ((k, self[k]) for k in self._iterkeys()) 451 452 if PY_3: 453 keys = _iterkeys 454 """Returns an iterator of all named result keys (Python 3.x only).""" 455 456 values = _itervalues 457 """Returns an iterator of all named result values (Python 3.x only).""" 458 459 items = _iteritems 460 """Returns an iterator of all named result key-value tuples (Python 3.x only).""" 461 462 else: 463 iterkeys = _iterkeys 464 """Returns an iterator of all named result keys (Python 2.x only).""" 465 466 itervalues = _itervalues 467 """Returns an iterator of all named result values (Python 2.x only).""" 468 469 iteritems = _iteritems 470 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 471 472 def keys( self ): 473 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 474 return list(self.iterkeys()) 475 476 def values( self ): 477 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 478 return list(self.itervalues()) 479 480 def items( self ): 481 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 482 return list(self.iteritems()) 483 484 def haskeys( self ): 485 """Since keys() returns an iterator, this method is helpful in bypassing 486 code that looks for the existence of any defined results names.""" 487 return bool(self.__tokdict) 488 489 def pop( self, *args, **kwargs): 490 """ 491 Removes and returns item at specified index (default=C{last}). 492 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no 493 argument or an integer argument, it will use C{list} semantics 494 and pop tokens from the list of parsed tokens. If passed a 495 non-integer argument (most likely a string), it will use C{dict} 496 semantics and pop the corresponding value from any defined 497 results names. A second default return value argument is 498 supported, just as in C{dict.pop()}. 499 500 Example:: 501 def remove_first(tokens): 502 tokens.pop(0) 503 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 504 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 505 506 label = Word(alphas) 507 patt = label("LABEL") + OneOrMore(Word(nums)) 508 print(patt.parseString("AAB 123 321").dump()) 509 510 # Use pop() in a parse action to remove named result (note that corresponding value is not 511 # removed from list form of results) 512 def remove_LABEL(tokens): 513 tokens.pop("LABEL") 514 return tokens 515 patt.addParseAction(remove_LABEL) 516 print(patt.parseString("AAB 123 321").dump()) 517 prints:: 518 ['AAB', '123', '321'] 519 - LABEL: AAB 520 521 ['AAB', '123', '321'] 522 """ 523 if not args: 524 args = [-1] 525 for k,v in kwargs.items(): 526 if k == 'default': 527 args = (args[0], v) 528 else: 529 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 530 if (isinstance(args[0], int) or 531 len(args) == 1 or 532 args[0] in self): 533 index = args[0] 534 ret = self[index] 535 del self[index] 536 return ret 537 else: 538 defaultvalue = args[1] 539 return defaultvalue 540 541 def get(self, key, defaultValue=None): 542 """ 543 Returns named result matching the given key, or if there is no 544 such name, then returns the given C{defaultValue} or C{None} if no 545 C{defaultValue} is specified. 546 547 Similar to C{dict.get()}. 548 549 Example:: 550 integer = Word(nums) 551 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 552 553 result = date_str.parseString("1999/12/31") 554 print(result.get("year")) # -> '1999' 555 print(result.get("hour", "not specified")) # -> 'not specified' 556 print(result.get("hour")) # -> None 557 """ 558 if key in self: 559 return self[key] 560 else: 561 return defaultValue 562 563 def insert( self, index, insStr ): 564 """ 565 Inserts new element at location index in the list of parsed tokens. 566 567 Similar to C{list.insert()}. 568 569 Example:: 570 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 571 572 # use a parse action to insert the parse location in the front of the parsed results 573 def insert_locn(locn, tokens): 574 tokens.insert(0, locn) 575 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 576 """ 577 self.__toklist.insert(index, insStr) 578 # fixup indices in token dictionary 579 for name,occurrences in self.__tokdict.items(): 580 for k, (value, position) in enumerate(occurrences): 581 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 582 583 def append( self, item ): 584 """ 585 Add single element to end of ParseResults list of elements. 586 587 Example:: 588 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 589 590 # use a parse action to compute the sum of the parsed integers, and add it to the end 591 def append_sum(tokens): 592 tokens.append(sum(map(int, tokens))) 593 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 594 """ 595 self.__toklist.append(item) 596 597 def extend( self, itemseq ): 598 """ 599 Add sequence of elements to end of ParseResults list of elements. 600 601 Example:: 602 patt = OneOrMore(Word(alphas)) 603 604 # use a parse action to append the reverse of the matched strings, to make a palindrome 605 def make_palindrome(tokens): 606 tokens.extend(reversed([t[::-1] for t in tokens])) 607 return ''.join(tokens) 608 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 609 """ 610 if isinstance(itemseq, ParseResults): 611 self += itemseq 612 else: 613 self.__toklist.extend(itemseq) 614 615 def clear( self ): 616 """ 617 Clear all elements and results names. 618 """ 619 del self.__toklist[:] 620 self.__tokdict.clear() 621 622 def __getattr__( self, name ): 623 try: 624 return self[name] 625 except KeyError: 626 return "" 627 628 if name in self.__tokdict: 629 if name not in self.__accumNames: 630 return self.__tokdict[name][-1][0] 631 else: 632 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 633 else: 634 return "" 635 636 def __add__( self, other ): 637 ret = self.copy() 638 ret += other 639 return ret 640 641 def __iadd__( self, other ): 642 if other.__tokdict: 643 offset = len(self.__toklist) 644 addoffset = lambda a: offset if a<0 else a+offset 645 otheritems = other.__tokdict.items() 646 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 647 for (k,vlist) in otheritems for v in vlist] 648 for k,v in otherdictitems: 649 self[k] = v 650 if isinstance(v[0],ParseResults): 651 v[0].__parent = wkref(self) 652 653 self.__toklist += other.__toklist 654 self.__accumNames.update( other.__accumNames ) 655 return self 656 657 def __radd__(self, other): 658 if isinstance(other,int) and other == 0: 659 # useful for merging many ParseResults using sum() builtin 660 return self.copy() 661 else: 662 # this may raise a TypeError - so be it 663 return other + self 664 665 def __repr__( self ): 666 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) 667 668 def __str__( self ): 669 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']' 670 671 def _asStringList( self, sep='' ): 672 out = [] 673 for item in self.__toklist: 674 if out and sep: 675 out.append(sep) 676 if isinstance( item, ParseResults ): 677 out += item._asStringList() 678 else: 679 out.append( _ustr(item) ) 680 return out 681 682 def asList( self ): 683 """ 684 Returns the parse results as a nested list of matching tokens, all converted to strings. 685 686 Example:: 687 patt = OneOrMore(Word(alphas)) 688 result = patt.parseString("sldkj lsdkj sldkj") 689 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 690 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 691 692 # Use asList() to create an actual list 693 result_list = result.asList() 694 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 695 """ 696 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist] 697 698 def asDict( self ): 699 """ 700 Returns the named parse results as a nested dictionary. 701 702 Example:: 703 integer = Word(nums) 704 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 705 706 result = date_str.parseString('12/31/1999') 707 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 708 709 result_dict = result.asDict() 710 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 711 712 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 713 import json 714 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 715 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 716 """ 717 if PY_3: 718 item_fn = self.items 719 else: 720 item_fn = self.iteritems 721 722 def toItem(obj): 723 if isinstance(obj, ParseResults): 724 if obj.haskeys(): 725 return obj.asDict() 726 else: 727 return [toItem(v) for v in obj] 728 else: 729 return obj 730 731 return dict((k,toItem(v)) for k,v in item_fn()) 732 733 def copy( self ): 734 """ 735 Returns a new copy of a C{ParseResults} object. 736 """ 737 ret = ParseResults( self.__toklist ) 738 ret.__tokdict = self.__tokdict.copy() 739 ret.__parent = self.__parent 740 ret.__accumNames.update( self.__accumNames ) 741 ret.__name = self.__name 742 return ret 743 744 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): 745 """ 746 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 747 """ 748 nl = "\n" 749 out = [] 750 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 751 for v in vlist) 752 nextLevelIndent = indent + " " 753 754 # collapse out indents if formatting is not desired 755 if not formatted: 756 indent = "" 757 nextLevelIndent = "" 758 nl = "" 759 760 selfTag = None 761 if doctag is not None: 762 selfTag = doctag 763 else: 764 if self.__name: 765 selfTag = self.__name 766 767 if not selfTag: 768 if namedItemsOnly: 769 return "" 770 else: 771 selfTag = "ITEM" 772 773 out += [ nl, indent, "<", selfTag, ">" ] 774 775 for i,res in enumerate(self.__toklist): 776 if isinstance(res,ParseResults): 777 if i in namedItems: 778 out += [ res.asXML(namedItems[i], 779 namedItemsOnly and doctag is None, 780 nextLevelIndent, 781 formatted)] 782 else: 783 out += [ res.asXML(None, 784 namedItemsOnly and doctag is None, 785 nextLevelIndent, 786 formatted)] 787 else: 788 # individual token, see if there is a name for it 789 resTag = None 790 if i in namedItems: 791 resTag = namedItems[i] 792 if not resTag: 793 if namedItemsOnly: 794 continue 795 else: 796 resTag = "ITEM" 797 xmlBodyText = _xml_escape(_ustr(res)) 798 out += [ nl, nextLevelIndent, "<", resTag, ">", 799 xmlBodyText, 800 "</", resTag, ">" ] 801 802 out += [ nl, indent, "</", selfTag, ">" ] 803 return "".join(out) 804 805 def __lookup(self,sub): 806 for k,vlist in self.__tokdict.items(): 807 for v,loc in vlist: 808 if sub is v: 809 return k 810 return None 811 812 def getName(self): 813 r""" 814 Returns the results name for this token expression. Useful when several 815 different expressions might match at a particular location. 816 817 Example:: 818 integer = Word(nums) 819 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 820 house_number_expr = Suppress('#') + Word(nums, alphanums) 821 user_data = (Group(house_number_expr)("house_number") 822 | Group(ssn_expr)("ssn") 823 | Group(integer)("age")) 824 user_info = OneOrMore(user_data) 825 826 result = user_info.parseString("22 111-22-3333 #221B") 827 for item in result: 828 print(item.getName(), ':', item[0]) 829 prints:: 830 age : 22 831 ssn : 111-22-3333 832 house_number : 221B 833 """ 834 if self.__name: 835 return self.__name 836 elif self.__parent: 837 par = self.__parent() 838 if par: 839 return par.__lookup(self) 840 else: 841 return None 842 elif (len(self) == 1 and 843 len(self.__tokdict) == 1 and 844 next(iter(self.__tokdict.values()))[0][1] in (0,-1)): 845 return next(iter(self.__tokdict.keys())) 846 else: 847 return None 848 849 def dump(self, indent='', depth=0, full=True): 850 """ 851 Diagnostic method for listing out the contents of a C{ParseResults}. 852 Accepts an optional C{indent} argument so that this string can be embedded 853 in a nested display of other data. 854 855 Example:: 856 integer = Word(nums) 857 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 858 859 result = date_str.parseString('12/31/1999') 860 print(result.dump()) 861 prints:: 862 ['12', '/', '31', '/', '1999'] 863 - day: 1999 864 - month: 31 865 - year: 12 866 """ 867 out = [] 868 NL = '\n' 869 out.append( indent+_ustr(self.asList()) ) 870 if full: 871 if self.haskeys(): 872 items = sorted((str(k), v) for k,v in self.items()) 873 for k,v in items: 874 if out: 875 out.append(NL) 876 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 877 if isinstance(v,ParseResults): 878 if v: 879 out.append( v.dump(indent,depth+1) ) 880 else: 881 out.append(_ustr(v)) 882 else: 883 out.append(repr(v)) 884 elif any(isinstance(vv,ParseResults) for vv in self): 885 v = self 886 for i,vv in enumerate(v): 887 if isinstance(vv,ParseResults): 888 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 889 else: 890 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 891 892 return "".join(out) 893 894 def pprint(self, *args, **kwargs): 895 """ 896 Pretty-printer for parsed results as a list, using the C{pprint} module. 897 Accepts additional positional or keyword args as defined for the 898 C{pprint.pprint} method. (U{https://docs.python.org/3/library/pprint.html#pprint.pprint}) 899 900 Example:: 901 ident = Word(alphas, alphanums) 902 num = Word(nums) 903 func = Forward() 904 term = ident | num | Group('(' + func + ')') 905 func <<= ident + Group(Optional(delimitedList(term))) 906 result = func.parseString("fna a,b,(fnb c,d,200),100") 907 result.pprint(width=40) 908 prints:: 909 ['fna', 910 ['a', 911 'b', 912 ['(', 'fnb', ['c', 'd', '200'], ')'], 913 '100']] 914 """ 915 pprint.pprint(self.asList(), *args, **kwargs) 916 917 # add support for pickle protocol 918 def __getstate__(self): 919 return ( self.__toklist, 920 ( self.__tokdict.copy(), 921 self.__parent is not None and self.__parent() or None, 922 self.__accumNames, 923 self.__name ) ) 924 925 def __setstate__(self,state): 926 self.__toklist = state[0] 927 (self.__tokdict, 928 par, 929 inAccumNames, 930 self.__name) = state[1] 931 self.__accumNames = {} 932 self.__accumNames.update(inAccumNames) 933 if par is not None: 934 self.__parent = wkref(par) 935 else: 936 self.__parent = None 937 938 def __getnewargs__(self): 939 return self.__toklist, self.__name, self.__asList, self.__modal 940 941 def __dir__(self): 942 return (dir(type(self)) + list(self.keys())) 943 944collections.MutableMapping.register(ParseResults) 945 946def col (loc,strg): 947 """Returns current column within a string, counting newlines as line separators. 948 The first column is number 1. 949 950 Note: the default parsing behavior is to expand tabs in the input string 951 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 952 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 953 consistent view of the parsed string, the parse location, and line and column 954 positions within the parsed string. 955 """ 956 s = strg 957 return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc) 958 959def lineno(loc,strg): 960 """Returns current line number within a string, counting newlines as line separators. 961 The first line is number 1. 962 963 Note: the default parsing behavior is to expand tabs in the input string 964 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 965 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 966 consistent view of the parsed string, the parse location, and line and column 967 positions within the parsed string. 968 """ 969 return strg.count("\n",0,loc) + 1 970 971def line( loc, strg ): 972 """Returns the line of text containing loc within a string, counting newlines as line separators. 973 """ 974 lastCR = strg.rfind("\n", 0, loc) 975 nextCR = strg.find("\n", loc) 976 if nextCR >= 0: 977 return strg[lastCR+1:nextCR] 978 else: 979 return strg[lastCR+1:] 980 981def _defaultStartDebugAction( instring, loc, expr ): 982 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))) 983 984def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): 985 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 986 987def _defaultExceptionDebugAction( instring, loc, expr, exc ): 988 print ("Exception raised:" + _ustr(exc)) 989 990def nullDebugAction(*args): 991 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 992 pass 993 994# Only works on Python 3.x - nonlocal is toxic to Python 2 installs 995#~ 'decorator to trim function calls to match the arity of the target' 996#~ def _trim_arity(func, maxargs=3): 997 #~ if func in singleArgBuiltins: 998 #~ return lambda s,l,t: func(t) 999 #~ limit = 0 1000 #~ foundArity = False 1001 #~ def wrapper(*args): 1002 #~ nonlocal limit,foundArity 1003 #~ while 1: 1004 #~ try: 1005 #~ ret = func(*args[limit:]) 1006 #~ foundArity = True 1007 #~ return ret 1008 #~ except TypeError: 1009 #~ if limit == maxargs or foundArity: 1010 #~ raise 1011 #~ limit += 1 1012 #~ continue 1013 #~ return wrapper 1014 1015# this version is Python 2.x-3.x cross-compatible 1016'decorator to trim function calls to match the arity of the target' 1017def _trim_arity(func, maxargs=2): 1018 if func in singleArgBuiltins: 1019 return lambda s,l,t: func(t) 1020 limit = [0] 1021 foundArity = [False] 1022 1023 def extract_stack(limit=0): 1024 offset = -2 1025 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] 1026 return [(frame_summary.filename, frame_summary.lineno)] 1027 def extract_tb(tb, limit=0): 1028 frames = traceback.extract_tb(tb, limit=limit) 1029 frame_summary = frames[-1] 1030 return [(frame_summary.filename, frame_summary.lineno)] 1031 1032 # synthesize what would be returned by traceback.extract_stack at the call to 1033 # user's parse action 'func', so that we don't incur call penalty at parse time 1034 1035 LINE_DIFF = 6 1036 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 1037 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 1038 this_line = extract_stack(limit=2)[-1] 1039 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 1040 1041 def wrapper(*args): 1042 while 1: 1043 try: 1044 ret = func(*args[limit[0]:]) 1045 foundArity[0] = True 1046 return ret 1047 except TypeError: 1048 # re-raise TypeErrors if they did not come from our arity testing 1049 if foundArity[0]: 1050 raise 1051 else: 1052 try: 1053 tb = sys.exc_info()[-1] 1054 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: 1055 raise 1056 finally: 1057 del tb 1058 1059 if limit[0] <= maxargs: 1060 limit[0] += 1 1061 continue 1062 raise 1063 1064 # copy func name to wrapper for sensible debug output 1065 func_name = "<parse action>" 1066 try: 1067 func_name = getattr(func, '__name__', 1068 getattr(func, '__class__').__name__) 1069 except Exception: 1070 func_name = str(func) 1071 wrapper.__name__ = func_name 1072 1073 return wrapper 1074 1075class ParserElement(object): 1076 """Abstract base level parser element class.""" 1077 DEFAULT_WHITE_CHARS = " \n\t\r" 1078 verbose_stacktrace = False 1079 1080 @staticmethod 1081 def setDefaultWhitespaceChars( chars ): 1082 r""" 1083 Overrides the default whitespace chars 1084 1085 Example:: 1086 # default whitespace chars are space, <TAB> and newline 1087 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 1088 1089 # change to just treat newline as significant 1090 ParserElement.setDefaultWhitespaceChars(" \t") 1091 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] 1092 """ 1093 ParserElement.DEFAULT_WHITE_CHARS = chars 1094 1095 @staticmethod 1096 def inlineLiteralsUsing(cls): 1097 """ 1098 Set class to be used for inclusion of string literals into a parser. 1099 1100 Example:: 1101 # default literal class used is Literal 1102 integer = Word(nums) 1103 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1104 1105 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1106 1107 1108 # change to Suppress 1109 ParserElement.inlineLiteralsUsing(Suppress) 1110 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1111 1112 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] 1113 """ 1114 ParserElement._literalStringClass = cls 1115 1116 def __init__( self, savelist=False ): 1117 self.parseAction = list() 1118 self.failAction = None 1119 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 1120 self.strRepr = None 1121 self.resultsName = None 1122 self.saveAsList = savelist 1123 self.skipWhitespace = True 1124 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1125 self.copyDefaultWhiteChars = True 1126 self.mayReturnEmpty = False # used when checking for left-recursion 1127 self.keepTabs = False 1128 self.ignoreExprs = list() 1129 self.debug = False 1130 self.streamlined = False 1131 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 1132 self.errmsg = "" 1133 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 1134 self.debugActions = ( None, None, None ) #custom debug actions 1135 self.re = None 1136 self.callPreparse = True # used to avoid redundant calls to preParse 1137 self.callDuringTry = False 1138 1139 def copy( self ): 1140 """ 1141 Make a copy of this C{ParserElement}. Useful for defining different parse actions 1142 for the same parsing pattern, using copies of the original parse element. 1143 1144 Example:: 1145 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1146 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") 1147 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1148 1149 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 1150 prints:: 1151 [5120, 100, 655360, 268435456] 1152 Equivalent form of C{expr.copy()} is just C{expr()}:: 1153 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1154 """ 1155 cpy = copy.copy( self ) 1156 cpy.parseAction = self.parseAction[:] 1157 cpy.ignoreExprs = self.ignoreExprs[:] 1158 if self.copyDefaultWhiteChars: 1159 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1160 return cpy 1161 1162 def setName( self, name ): 1163 """ 1164 Define name for this expression, makes debugging and exception messages clearer. 1165 1166 Example:: 1167 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 1168 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 1169 """ 1170 self.name = name 1171 self.errmsg = "Expected " + self.name 1172 if hasattr(self,"exception"): 1173 self.exception.msg = self.errmsg 1174 return self 1175 1176 def setResultsName( self, name, listAllMatches=False ): 1177 """ 1178 Define name for referencing matching tokens as a nested attribute 1179 of the returned parse results. 1180 NOTE: this returns a *copy* of the original C{ParserElement} object; 1181 this is so that the client can define a basic element, such as an 1182 integer, and reference it in multiple places with different names. 1183 1184 You can also set results names using the abbreviated syntax, 1185 C{expr("name")} in place of C{expr.setResultsName("name")} - 1186 see L{I{__call__}<__call__>}. 1187 1188 Example:: 1189 date_str = (integer.setResultsName("year") + '/' 1190 + integer.setResultsName("month") + '/' 1191 + integer.setResultsName("day")) 1192 1193 # equivalent form: 1194 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1195 """ 1196 newself = self.copy() 1197 if name.endswith("*"): 1198 name = name[:-1] 1199 listAllMatches=True 1200 newself.resultsName = name 1201 newself.modalResults = not listAllMatches 1202 return newself 1203 1204 def setBreak(self,breakFlag = True): 1205 """Method to invoke the Python pdb debugger when this element is 1206 about to be parsed. Set C{breakFlag} to True to enable, False to 1207 disable. 1208 """ 1209 if breakFlag: 1210 _parseMethod = self._parse 1211 def breaker(instring, loc, doActions=True, callPreParse=True): 1212 import pdb 1213 pdb.set_trace() 1214 return _parseMethod( instring, loc, doActions, callPreParse ) 1215 breaker._originalParseMethod = _parseMethod 1216 self._parse = breaker 1217 else: 1218 if hasattr(self._parse,"_originalParseMethod"): 1219 self._parse = self._parse._originalParseMethod 1220 return self 1221 1222 def setParseAction( self, *fns, **kwargs ): 1223 """ 1224 Define one or more actions to perform when successfully matching parse element definition. 1225 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 1226 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 1227 - s = the original string being parsed (see note below) 1228 - loc = the location of the matching substring 1229 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 1230 If the functions in fns modify the tokens, they can return them as the return 1231 value from fn, and the modified list of tokens will replace the original. 1232 Otherwise, fn does not need to return any value. 1233 1234 Optional keyword arguments: 1235 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing 1236 1237 Note: the default parsing behavior is to expand tabs in the input string 1238 before starting the parsing process. See L{I{parseString}<parseString>} for more information 1239 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 1240 consistent view of the parsed string, the parse location, and line and column 1241 positions within the parsed string. 1242 1243 Example:: 1244 integer = Word(nums) 1245 date_str = integer + '/' + integer + '/' + integer 1246 1247 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1248 1249 # use parse action to convert to ints at parse time 1250 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1251 date_str = integer + '/' + integer + '/' + integer 1252 1253 # note that integer fields are now ints, not strings 1254 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] 1255 """ 1256 self.parseAction = list(map(_trim_arity, list(fns))) 1257 self.callDuringTry = kwargs.get("callDuringTry", False) 1258 return self 1259 1260 def addParseAction( self, *fns, **kwargs ): 1261 """ 1262 Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}. 1263 1264 See examples in L{I{copy}<copy>}. 1265 """ 1266 self.parseAction += list(map(_trim_arity, list(fns))) 1267 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1268 return self 1269 1270 def addCondition(self, *fns, **kwargs): 1271 """Add a boolean predicate function to expression's list of parse actions. See 1272 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 1273 functions passed to C{addCondition} need to return boolean success/fail of the condition. 1274 1275 Optional keyword arguments: 1276 - message = define a custom message to be used in the raised exception 1277 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 1278 1279 Example:: 1280 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1281 year_int = integer.copy() 1282 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 1283 date_str = year_int + '/' + integer + '/' + integer 1284 1285 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 1286 """ 1287 msg = kwargs.get("message", "failed user-defined condition") 1288 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException 1289 for fn in fns: 1290 def pa(s,l,t): 1291 if not bool(_trim_arity(fn)(s,l,t)): 1292 raise exc_type(s,l,msg) 1293 self.parseAction.append(pa) 1294 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1295 return self 1296 1297 def setFailAction( self, fn ): 1298 """Define action to perform if parsing fails at this expression. 1299 Fail action fn is a callable function that takes the arguments 1300 C{fn(s,loc,expr,err)} where: 1301 - s = string being parsed 1302 - loc = location where expression match was attempted and failed 1303 - expr = the parse expression that failed 1304 - err = the exception thrown 1305 The function returns no value. It may throw C{L{ParseFatalException}} 1306 if it is desired to stop parsing immediately.""" 1307 self.failAction = fn 1308 return self 1309 1310 def _skipIgnorables( self, instring, loc ): 1311 exprsFound = True 1312 while exprsFound: 1313 exprsFound = False 1314 for e in self.ignoreExprs: 1315 try: 1316 while 1: 1317 loc,dummy = e._parse( instring, loc ) 1318 exprsFound = True 1319 except ParseException: 1320 pass 1321 return loc 1322 1323 def preParse( self, instring, loc ): 1324 if self.ignoreExprs: 1325 loc = self._skipIgnorables( instring, loc ) 1326 1327 if self.skipWhitespace: 1328 wt = self.whiteChars 1329 instrlen = len(instring) 1330 while loc < instrlen and instring[loc] in wt: 1331 loc += 1 1332 1333 return loc 1334 1335 def parseImpl( self, instring, loc, doActions=True ): 1336 return loc, [] 1337 1338 def postParse( self, instring, loc, tokenlist ): 1339 return tokenlist 1340 1341 #~ @profile 1342 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): 1343 debugging = ( self.debug ) #and doActions ) 1344 1345 if debugging or self.failAction: 1346 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1347 if (self.debugActions[0] ): 1348 self.debugActions[0]( instring, loc, self ) 1349 if callPreParse and self.callPreparse: 1350 preloc = self.preParse( instring, loc ) 1351 else: 1352 preloc = loc 1353 tokensStart = preloc 1354 try: 1355 try: 1356 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1357 except IndexError: 1358 raise ParseException( instring, len(instring), self.errmsg, self ) 1359 except ParseBaseException as err: 1360 #~ print ("Exception raised:", err) 1361 if self.debugActions[2]: 1362 self.debugActions[2]( instring, tokensStart, self, err ) 1363 if self.failAction: 1364 self.failAction( instring, tokensStart, self, err ) 1365 raise 1366 else: 1367 if callPreParse and self.callPreparse: 1368 preloc = self.preParse( instring, loc ) 1369 else: 1370 preloc = loc 1371 tokensStart = preloc 1372 if self.mayIndexError or loc >= len(instring): 1373 try: 1374 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1375 except IndexError: 1376 raise ParseException( instring, len(instring), self.errmsg, self ) 1377 else: 1378 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1379 1380 tokens = self.postParse( instring, loc, tokens ) 1381 1382 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1383 if self.parseAction and (doActions or self.callDuringTry): 1384 if debugging: 1385 try: 1386 for fn in self.parseAction: 1387 tokens = fn( instring, tokensStart, retTokens ) 1388 if tokens is not None: 1389 retTokens = ParseResults( tokens, 1390 self.resultsName, 1391 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1392 modal=self.modalResults ) 1393 except ParseBaseException as err: 1394 #~ print "Exception raised in user parse action:", err 1395 if (self.debugActions[2] ): 1396 self.debugActions[2]( instring, tokensStart, self, err ) 1397 raise 1398 else: 1399 for fn in self.parseAction: 1400 tokens = fn( instring, tokensStart, retTokens ) 1401 if tokens is not None: 1402 retTokens = ParseResults( tokens, 1403 self.resultsName, 1404 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1405 modal=self.modalResults ) 1406 1407 if debugging: 1408 #~ print ("Matched",self,"->",retTokens.asList()) 1409 if (self.debugActions[1] ): 1410 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1411 1412 return loc, retTokens 1413 1414 def tryParse( self, instring, loc ): 1415 try: 1416 return self._parse( instring, loc, doActions=False )[0] 1417 except ParseFatalException: 1418 raise ParseException( instring, loc, self.errmsg, self) 1419 1420 def canParseNext(self, instring, loc): 1421 try: 1422 self.tryParse(instring, loc) 1423 except (ParseException, IndexError): 1424 return False 1425 else: 1426 return True 1427 1428 class _UnboundedCache(object): 1429 def __init__(self): 1430 cache = {} 1431 self.not_in_cache = not_in_cache = object() 1432 1433 def get(self, key): 1434 return cache.get(key, not_in_cache) 1435 1436 def set(self, key, value): 1437 cache[key] = value 1438 1439 def clear(self): 1440 cache.clear() 1441 1442 def cache_len(self): 1443 return len(cache) 1444 1445 self.get = types.MethodType(get, self) 1446 self.set = types.MethodType(set, self) 1447 self.clear = types.MethodType(clear, self) 1448 self.__len__ = types.MethodType(cache_len, self) 1449 1450 if _OrderedDict is not None: 1451 class _FifoCache(object): 1452 def __init__(self, size): 1453 self.not_in_cache = not_in_cache = object() 1454 1455 cache = _OrderedDict() 1456 1457 def get(self, key): 1458 return cache.get(key, not_in_cache) 1459 1460 def set(self, key, value): 1461 cache[key] = value 1462 while len(cache) > size: 1463 try: 1464 cache.popitem(False) 1465 except KeyError: 1466 pass 1467 1468 def clear(self): 1469 cache.clear() 1470 1471 def cache_len(self): 1472 return len(cache) 1473 1474 self.get = types.MethodType(get, self) 1475 self.set = types.MethodType(set, self) 1476 self.clear = types.MethodType(clear, self) 1477 self.__len__ = types.MethodType(cache_len, self) 1478 1479 else: 1480 class _FifoCache(object): 1481 def __init__(self, size): 1482 self.not_in_cache = not_in_cache = object() 1483 1484 cache = {} 1485 key_fifo = collections.deque([], size) 1486 1487 def get(self, key): 1488 return cache.get(key, not_in_cache) 1489 1490 def set(self, key, value): 1491 cache[key] = value 1492 while len(key_fifo) > size: 1493 cache.pop(key_fifo.popleft(), None) 1494 key_fifo.append(key) 1495 1496 def clear(self): 1497 cache.clear() 1498 key_fifo.clear() 1499 1500 def cache_len(self): 1501 return len(cache) 1502 1503 self.get = types.MethodType(get, self) 1504 self.set = types.MethodType(set, self) 1505 self.clear = types.MethodType(clear, self) 1506 self.__len__ = types.MethodType(cache_len, self) 1507 1508 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1509 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail 1510 packrat_cache_lock = RLock() 1511 packrat_cache_stats = [0, 0] 1512 1513 # this method gets repeatedly called during backtracking with the same arguments - 1514 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 1515 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): 1516 HIT, MISS = 0, 1 1517 lookup = (self, instring, loc, callPreParse, doActions) 1518 with ParserElement.packrat_cache_lock: 1519 cache = ParserElement.packrat_cache 1520 value = cache.get(lookup) 1521 if value is cache.not_in_cache: 1522 ParserElement.packrat_cache_stats[MISS] += 1 1523 try: 1524 value = self._parseNoCache(instring, loc, doActions, callPreParse) 1525 except ParseBaseException as pe: 1526 # cache a copy of the exception, without the traceback 1527 cache.set(lookup, pe.__class__(*pe.args)) 1528 raise 1529 else: 1530 cache.set(lookup, (value[0], value[1].copy())) 1531 return value 1532 else: 1533 ParserElement.packrat_cache_stats[HIT] += 1 1534 if isinstance(value, Exception): 1535 raise value 1536 return (value[0], value[1].copy()) 1537 1538 _parse = _parseNoCache 1539 1540 @staticmethod 1541 def resetCache(): 1542 ParserElement.packrat_cache.clear() 1543 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats) 1544 1545 _packratEnabled = False 1546 @staticmethod 1547 def enablePackrat(cache_size_limit=128): 1548 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1549 Repeated parse attempts at the same string location (which happens 1550 often in many complex grammars) can immediately return a cached value, 1551 instead of re-executing parsing/validating code. Memoizing is done of 1552 both valid results and parsing exceptions. 1553 1554 Parameters: 1555 - cache_size_limit - (default=C{128}) - if an integer value is provided 1556 will limit the size of the packrat cache; if None is passed, then 1557 the cache size will be unbounded; if 0 is passed, the cache will 1558 be effectively disabled. 1559 1560 This speedup may break existing programs that use parse actions that 1561 have side-effects. For this reason, packrat parsing is disabled when 1562 you first import pyparsing. To activate the packrat feature, your 1563 program must call the class method C{ParserElement.enablePackrat()}. If 1564 your program uses C{psyco} to "compile as you go", you must call 1565 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1566 Python will crash. For best results, call C{enablePackrat()} immediately 1567 after importing pyparsing. 1568 1569 Example:: 1570 import pyparsing 1571 pyparsing.ParserElement.enablePackrat() 1572 """ 1573 if not ParserElement._packratEnabled: 1574 ParserElement._packratEnabled = True 1575 if cache_size_limit is None: 1576 ParserElement.packrat_cache = ParserElement._UnboundedCache() 1577 else: 1578 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 1579 ParserElement._parse = ParserElement._parseCache 1580 1581 def parseString( self, instring, parseAll=False ): 1582 """ 1583 Execute the parse expression with the given string. 1584 This is the main interface to the client code, once the complete 1585 expression has been built. 1586 1587 If you want the grammar to require that the entire input string be 1588 successfully parsed, then set C{parseAll} to True (equivalent to ending 1589 the grammar with C{L{StringEnd()}}). 1590 1591 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1592 in order to report proper column numbers in parse actions. 1593 If the input string contains tabs and 1594 the grammar uses parse actions that use the C{loc} argument to index into the 1595 string being parsed, you can ensure you have a consistent view of the input 1596 string by: 1597 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1598 (see L{I{parseWithTabs}<parseWithTabs>}) 1599 - define your parse action using the full C{(s,loc,toks)} signature, and 1600 reference the input string using the parse action's C{s} argument 1601 - explicitly expand the tabs in your input string before calling 1602 C{parseString} 1603 1604 Example:: 1605 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] 1606 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text 1607 """ 1608 ParserElement.resetCache() 1609 if not self.streamlined: 1610 self.streamline() 1611 #~ self.saveAsList = True 1612 for e in self.ignoreExprs: 1613 e.streamline() 1614 if not self.keepTabs: 1615 instring = instring.expandtabs() 1616 try: 1617 loc, tokens = self._parse( instring, 0 ) 1618 if parseAll: 1619 loc = self.preParse( instring, loc ) 1620 se = Empty() + StringEnd() 1621 se._parse( instring, loc ) 1622 except ParseBaseException as exc: 1623 if ParserElement.verbose_stacktrace: 1624 raise 1625 else: 1626 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1627 raise exc 1628 else: 1629 return tokens 1630 1631 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): 1632 """ 1633 Scan the input string for expression matches. Each match will return the 1634 matching tokens, start location, and end location. May be called with optional 1635 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1636 C{overlap} is specified, then overlapping matches will be reported. 1637 1638 Note that the start and end locations are reported relative to the string 1639 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1640 strings with embedded tabs. 1641 1642 Example:: 1643 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 1644 print(source) 1645 for tokens,start,end in Word(alphas).scanString(source): 1646 print(' '*start + '^'*(end-start)) 1647 print(' '*start + tokens[0]) 1648 1649 prints:: 1650 1651 sldjf123lsdjjkf345sldkjf879lkjsfd987 1652 ^^^^^ 1653 sldjf 1654 ^^^^^^^ 1655 lsdjjkf 1656 ^^^^^^ 1657 sldkjf 1658 ^^^^^^ 1659 lkjsfd 1660 """ 1661 if not self.streamlined: 1662 self.streamline() 1663 for e in self.ignoreExprs: 1664 e.streamline() 1665 1666 if not self.keepTabs: 1667 instring = _ustr(instring).expandtabs() 1668 instrlen = len(instring) 1669 loc = 0 1670 preparseFn = self.preParse 1671 parseFn = self._parse 1672 ParserElement.resetCache() 1673 matches = 0 1674 try: 1675 while loc <= instrlen and matches < maxMatches: 1676 try: 1677 preloc = preparseFn( instring, loc ) 1678 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1679 except ParseException: 1680 loc = preloc+1 1681 else: 1682 if nextLoc > loc: 1683 matches += 1 1684 yield tokens, preloc, nextLoc 1685 if overlap: 1686 nextloc = preparseFn( instring, loc ) 1687 if nextloc > loc: 1688 loc = nextLoc 1689 else: 1690 loc += 1 1691 else: 1692 loc = nextLoc 1693 else: 1694 loc = preloc+1 1695 except ParseBaseException as exc: 1696 if ParserElement.verbose_stacktrace: 1697 raise 1698 else: 1699 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1700 raise exc 1701 1702 def transformString( self, instring ): 1703 """ 1704 Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1705 be returned from a parse action. To use C{transformString}, define a grammar and 1706 attach a parse action to it that modifies the returned token list. 1707 Invoking C{transformString()} on a target string will then scan for matches, 1708 and replace the matched text patterns according to the logic in the parse 1709 action. C{transformString()} returns the resulting transformed string. 1710 1711 Example:: 1712 wd = Word(alphas) 1713 wd.setParseAction(lambda toks: toks[0].title()) 1714 1715 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) 1716 Prints:: 1717 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 1718 """ 1719 out = [] 1720 lastE = 0 1721 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1722 # keep string locs straight between transformString and scanString 1723 self.keepTabs = True 1724 try: 1725 for t,s,e in self.scanString( instring ): 1726 out.append( instring[lastE:s] ) 1727 if t: 1728 if isinstance(t,ParseResults): 1729 out += t.asList() 1730 elif isinstance(t,list): 1731 out += t 1732 else: 1733 out.append(t) 1734 lastE = e 1735 out.append(instring[lastE:]) 1736 out = [o for o in out if o] 1737 return "".join(map(_ustr,_flatten(out))) 1738 except ParseBaseException as exc: 1739 if ParserElement.verbose_stacktrace: 1740 raise 1741 else: 1742 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1743 raise exc 1744 1745 def searchString( self, instring, maxMatches=_MAX_INT ): 1746 """ 1747 Another extension to C{L{scanString}}, simplifying the access to the tokens found 1748 to match the given parse expression. May be called with optional 1749 C{maxMatches} argument, to clip searching after 'n' matches are found. 1750 1751 Example:: 1752 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 1753 cap_word = Word(alphas.upper(), alphas.lower()) 1754 1755 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 1756 1757 # the sum() builtin can be used to merge results into a single ParseResults object 1758 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) 1759 prints:: 1760 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 1761 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 1762 """ 1763 try: 1764 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1765 except ParseBaseException as exc: 1766 if ParserElement.verbose_stacktrace: 1767 raise 1768 else: 1769 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1770 raise exc 1771 1772 def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): 1773 """ 1774 Generator method to split a string using the given expression as a separator. 1775 May be called with optional C{maxsplit} argument, to limit the number of splits; 1776 and the optional C{includeSeparators} argument (default=C{False}), if the separating 1777 matching text should be included in the split results. 1778 1779 Example:: 1780 punc = oneOf(list(".,;:/-!?")) 1781 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 1782 prints:: 1783 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 1784 """ 1785 splits = 0 1786 last = 0 1787 for t,s,e in self.scanString(instring, maxMatches=maxsplit): 1788 yield instring[last:s] 1789 if includeSeparators: 1790 yield t[0] 1791 last = e 1792 yield instring[last:] 1793 1794 def __add__(self, other ): 1795 """ 1796 Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement 1797 converts them to L{Literal}s by default. 1798 1799 Example:: 1800 greet = Word(alphas) + "," + Word(alphas) + "!" 1801 hello = "Hello, World!" 1802 print (hello, "->", greet.parseString(hello)) 1803 Prints:: 1804 Hello, World! -> ['Hello', ',', 'World', '!'] 1805 """ 1806 if isinstance( other, basestring ): 1807 other = ParserElement._literalStringClass( other ) 1808 if not isinstance( other, ParserElement ): 1809 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1810 SyntaxWarning, stacklevel=2) 1811 return None 1812 return And( [ self, other ] ) 1813 1814 def __radd__(self, other ): 1815 """ 1816 Implementation of + operator when left operand is not a C{L{ParserElement}} 1817 """ 1818 if isinstance( other, basestring ): 1819 other = ParserElement._literalStringClass( other ) 1820 if not isinstance( other, ParserElement ): 1821 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1822 SyntaxWarning, stacklevel=2) 1823 return None 1824 return other + self 1825 1826 def __sub__(self, other): 1827 """ 1828 Implementation of - operator, returns C{L{And}} with error stop 1829 """ 1830 if isinstance( other, basestring ): 1831 other = ParserElement._literalStringClass( other ) 1832 if not isinstance( other, ParserElement ): 1833 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1834 SyntaxWarning, stacklevel=2) 1835 return None 1836 return self + And._ErrorStop() + other 1837 1838 def __rsub__(self, other ): 1839 """ 1840 Implementation of - operator when left operand is not a C{L{ParserElement}} 1841 """ 1842 if isinstance( other, basestring ): 1843 other = ParserElement._literalStringClass( other ) 1844 if not isinstance( other, ParserElement ): 1845 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1846 SyntaxWarning, stacklevel=2) 1847 return None 1848 return other - self 1849 1850 def __mul__(self,other): 1851 """ 1852 Implementation of * operator, allows use of C{expr * 3} in place of 1853 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1854 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1855 may also include C{None} as in: 1856 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1857 to C{expr*n + L{ZeroOrMore}(expr)} 1858 (read as "at least n instances of C{expr}") 1859 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1860 (read as "0 to n instances of C{expr}") 1861 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1862 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1863 1864 Note that C{expr*(None,n)} does not raise an exception if 1865 more than n exprs exist in the input stream; that is, 1866 C{expr*(None,n)} does not enforce a maximum number of expr 1867 occurrences. If this behavior is desired, then write 1868 C{expr*(None,n) + ~expr} 1869 """ 1870 if isinstance(other,int): 1871 minElements, optElements = other,0 1872 elif isinstance(other,tuple): 1873 other = (other + (None, None))[:2] 1874 if other[0] is None: 1875 other = (0, other[1]) 1876 if isinstance(other[0],int) and other[1] is None: 1877 if other[0] == 0: 1878 return ZeroOrMore(self) 1879 if other[0] == 1: 1880 return OneOrMore(self) 1881 else: 1882 return self*other[0] + ZeroOrMore(self) 1883 elif isinstance(other[0],int) and isinstance(other[1],int): 1884 minElements, optElements = other 1885 optElements -= minElements 1886 else: 1887 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1888 else: 1889 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1890 1891 if minElements < 0: 1892 raise ValueError("cannot multiply ParserElement by negative value") 1893 if optElements < 0: 1894 raise ValueError("second tuple value must be greater or equal to first tuple value") 1895 if minElements == optElements == 0: 1896 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1897 1898 if (optElements): 1899 def makeOptionalList(n): 1900 if n>1: 1901 return Optional(self + makeOptionalList(n-1)) 1902 else: 1903 return Optional(self) 1904 if minElements: 1905 if minElements == 1: 1906 ret = self + makeOptionalList(optElements) 1907 else: 1908 ret = And([self]*minElements) + makeOptionalList(optElements) 1909 else: 1910 ret = makeOptionalList(optElements) 1911 else: 1912 if minElements == 1: 1913 ret = self 1914 else: 1915 ret = And([self]*minElements) 1916 return ret 1917 1918 def __rmul__(self, other): 1919 return self.__mul__(other) 1920 1921 def __or__(self, other ): 1922 """ 1923 Implementation of | operator - returns C{L{MatchFirst}} 1924 """ 1925 if isinstance( other, basestring ): 1926 other = ParserElement._literalStringClass( other ) 1927 if not isinstance( other, ParserElement ): 1928 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1929 SyntaxWarning, stacklevel=2) 1930 return None 1931 return MatchFirst( [ self, other ] ) 1932 1933 def __ror__(self, other ): 1934 """ 1935 Implementation of | operator when left operand is not a C{L{ParserElement}} 1936 """ 1937 if isinstance( other, basestring ): 1938 other = ParserElement._literalStringClass( other ) 1939 if not isinstance( other, ParserElement ): 1940 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1941 SyntaxWarning, stacklevel=2) 1942 return None 1943 return other | self 1944 1945 def __xor__(self, other ): 1946 """ 1947 Implementation of ^ operator - returns C{L{Or}} 1948 """ 1949 if isinstance( other, basestring ): 1950 other = ParserElement._literalStringClass( other ) 1951 if not isinstance( other, ParserElement ): 1952 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1953 SyntaxWarning, stacklevel=2) 1954 return None 1955 return Or( [ self, other ] ) 1956 1957 def __rxor__(self, other ): 1958 """ 1959 Implementation of ^ operator when left operand is not a C{L{ParserElement}} 1960 """ 1961 if isinstance( other, basestring ): 1962 other = ParserElement._literalStringClass( other ) 1963 if not isinstance( other, ParserElement ): 1964 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1965 SyntaxWarning, stacklevel=2) 1966 return None 1967 return other ^ self 1968 1969 def __and__(self, other ): 1970 """ 1971 Implementation of & operator - returns C{L{Each}} 1972 """ 1973 if isinstance( other, basestring ): 1974 other = ParserElement._literalStringClass( other ) 1975 if not isinstance( other, ParserElement ): 1976 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1977 SyntaxWarning, stacklevel=2) 1978 return None 1979 return Each( [ self, other ] ) 1980 1981 def __rand__(self, other ): 1982 """ 1983 Implementation of & operator when left operand is not a C{L{ParserElement}} 1984 """ 1985 if isinstance( other, basestring ): 1986 other = ParserElement._literalStringClass( other ) 1987 if not isinstance( other, ParserElement ): 1988 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1989 SyntaxWarning, stacklevel=2) 1990 return None 1991 return other & self 1992 1993 def __invert__( self ): 1994 """ 1995 Implementation of ~ operator - returns C{L{NotAny}} 1996 """ 1997 return NotAny( self ) 1998 1999 def __call__(self, name=None): 2000 """ 2001 Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. 2002 2003 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 2004 passed as C{True}. 2005 2006 If C{name} is omitted, same as calling C{L{copy}}. 2007 2008 Example:: 2009 # these are equivalent 2010 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 2011 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 2012 """ 2013 if name is not None: 2014 return self.setResultsName(name) 2015 else: 2016 return self.copy() 2017 2018 def suppress( self ): 2019 """ 2020 Suppresses the output of this C{ParserElement}; useful to keep punctuation from 2021 cluttering up returned output. 2022 """ 2023 return Suppress( self ) 2024 2025 def leaveWhitespace( self ): 2026 """ 2027 Disables the skipping of whitespace before matching the characters in the 2028 C{ParserElement}'s defined pattern. This is normally only used internally by 2029 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 2030 """ 2031 self.skipWhitespace = False 2032 return self 2033 2034 def setWhitespaceChars( self, chars ): 2035 """ 2036 Overrides the default whitespace chars 2037 """ 2038 self.skipWhitespace = True 2039 self.whiteChars = chars 2040 self.copyDefaultWhiteChars = False 2041 return self 2042 2043 def parseWithTabs( self ): 2044 """ 2045 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 2046 Must be called before C{parseString} when the input grammar contains elements that 2047 match C{<TAB>} characters. 2048 """ 2049 self.keepTabs = True 2050 return self 2051 2052 def ignore( self, other ): 2053 """ 2054 Define expression to be ignored (e.g., comments) while doing pattern 2055 matching; may be called repeatedly, to define multiple comment or other 2056 ignorable patterns. 2057 2058 Example:: 2059 patt = OneOrMore(Word(alphas)) 2060 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 2061 2062 patt.ignore(cStyleComment) 2063 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 2064 """ 2065 if isinstance(other, basestring): 2066 other = Suppress(other) 2067 2068 if isinstance( other, Suppress ): 2069 if other not in self.ignoreExprs: 2070 self.ignoreExprs.append(other) 2071 else: 2072 self.ignoreExprs.append( Suppress( other.copy() ) ) 2073 return self 2074 2075 def setDebugActions( self, startAction, successAction, exceptionAction ): 2076 """ 2077 Enable display of debugging messages while doing pattern matching. 2078 """ 2079 self.debugActions = (startAction or _defaultStartDebugAction, 2080 successAction or _defaultSuccessDebugAction, 2081 exceptionAction or _defaultExceptionDebugAction) 2082 self.debug = True 2083 return self 2084 2085 def setDebug( self, flag=True ): 2086 """ 2087 Enable display of debugging messages while doing pattern matching. 2088 Set C{flag} to True to enable, False to disable. 2089 2090 Example:: 2091 wd = Word(alphas).setName("alphaword") 2092 integer = Word(nums).setName("numword") 2093 term = wd | integer 2094 2095 # turn on debugging for wd 2096 wd.setDebug() 2097 2098 OneOrMore(term).parseString("abc 123 xyz 890") 2099 2100 prints:: 2101 Match alphaword at loc 0(1,1) 2102 Matched alphaword -> ['abc'] 2103 Match alphaword at loc 3(1,4) 2104 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 2105 Match alphaword at loc 7(1,8) 2106 Matched alphaword -> ['xyz'] 2107 Match alphaword at loc 11(1,12) 2108 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 2109 Match alphaword at loc 15(1,16) 2110 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 2111 2112 The output shown is that produced by the default debug actions - custom debug actions can be 2113 specified using L{setDebugActions}. Prior to attempting 2114 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"} 2115 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"} 2116 message is shown. Also note the use of L{setName} to assign a human-readable name to the expression, 2117 which makes debugging and exception messages easier to understand - for instance, the default 2118 name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. 2119 """ 2120 if flag: 2121 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 2122 else: 2123 self.debug = False 2124 return self 2125 2126 def __str__( self ): 2127 return self.name 2128 2129 def __repr__( self ): 2130 return _ustr(self) 2131 2132 def streamline( self ): 2133 self.streamlined = True 2134 self.strRepr = None 2135 return self 2136 2137 def checkRecursion( self, parseElementList ): 2138 pass 2139 2140 def validate( self, validateTrace=[] ): 2141 """ 2142 Check defined expressions for valid structure, check for infinite recursive definitions. 2143 """ 2144 self.checkRecursion( [] ) 2145 2146 def parseFile( self, file_or_filename, parseAll=False ): 2147 """ 2148 Execute the parse expression on the given file or filename. 2149 If a filename is specified (instead of a file object), 2150 the entire file is opened, read, and closed before parsing. 2151 """ 2152 try: 2153 file_contents = file_or_filename.read() 2154 except AttributeError: 2155 with open(file_or_filename, "r") as f: 2156 file_contents = f.read() 2157 try: 2158 return self.parseString(file_contents, parseAll) 2159 except ParseBaseException as exc: 2160 if ParserElement.verbose_stacktrace: 2161 raise 2162 else: 2163 # catch and re-raise exception from here, clears out pyparsing internal stack trace 2164 raise exc 2165 2166 def __eq__(self,other): 2167 if isinstance(other, ParserElement): 2168 return self is other or vars(self) == vars(other) 2169 elif isinstance(other, basestring): 2170 return self.matches(other) 2171 else: 2172 return super(ParserElement,self)==other 2173 2174 def __ne__(self,other): 2175 return not (self == other) 2176 2177 def __hash__(self): 2178 return hash(id(self)) 2179 2180 def __req__(self,other): 2181 return self == other 2182 2183 def __rne__(self,other): 2184 return not (self == other) 2185 2186 def matches(self, testString, parseAll=True): 2187 """ 2188 Method for quick testing of a parser against a test string. Good for simple 2189 inline microtests of sub expressions while building up larger parser. 2190 2191 Parameters: 2192 - testString - to test against this expression for a match 2193 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2194 2195 Example:: 2196 expr = Word(nums) 2197 assert expr.matches("100") 2198 """ 2199 try: 2200 self.parseString(_ustr(testString), parseAll=parseAll) 2201 return True 2202 except ParseBaseException: 2203 return False 2204 2205 def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False): 2206 """ 2207 Execute the parse expression on a series of test strings, showing each 2208 test, the parsed results or where the parse failed. Quick and easy way to 2209 run a parse expression against a list of sample strings. 2210 2211 Parameters: 2212 - tests - a list of separate test strings, or a multiline string of test strings 2213 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2214 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test 2215 string; pass None to disable comment filtering 2216 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; 2217 if False, only dump nested list 2218 - printResults - (default=C{True}) prints test output to stdout 2219 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing 2220 2221 Returns: a (success, results) tuple, where success indicates that all tests succeeded 2222 (or failed if C{failureTests} is True), and the results contain a list of lines of each 2223 test's output 2224 2225 Example:: 2226 number_expr = pyparsing_common.number.copy() 2227 2228 result = number_expr.runTests(''' 2229 # unsigned integer 2230 100 2231 # negative integer 2232 -100 2233 # float with scientific notation 2234 6.02e23 2235 # integer with scientific notation 2236 1e-12 2237 ''') 2238 print("Success" if result[0] else "Failed!") 2239 2240 result = number_expr.runTests(''' 2241 # stray character 2242 100Z 2243 # missing leading digit before '.' 2244 -.100 2245 # too many '.' 2246 3.14.159 2247 ''', failureTests=True) 2248 print("Success" if result[0] else "Failed!") 2249 prints:: 2250 # unsigned integer 2251 100 2252 [100] 2253 2254 # negative integer 2255 -100 2256 [-100] 2257 2258 # float with scientific notation 2259 6.02e23 2260 [6.02e+23] 2261 2262 # integer with scientific notation 2263 1e-12 2264 [1e-12] 2265 2266 Success 2267 2268 # stray character 2269 100Z 2270 ^ 2271 FAIL: Expected end of text (at char 3), (line:1, col:4) 2272 2273 # missing leading digit before '.' 2274 -.100 2275 ^ 2276 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 2277 2278 # too many '.' 2279 3.14.159 2280 ^ 2281 FAIL: Expected end of text (at char 4), (line:1, col:5) 2282 2283 Success 2284 2285 Each test string must be on a single line. If you want to test a string that spans multiple 2286 lines, create a test like this:: 2287 2288 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") 2289 2290 (Note that this is a raw string literal, you must include the leading 'r'.) 2291 """ 2292 if isinstance(tests, basestring): 2293 tests = list(map(str.strip, tests.rstrip().splitlines())) 2294 if isinstance(comment, basestring): 2295 comment = Literal(comment) 2296 allResults = [] 2297 comments = [] 2298 success = True 2299 for t in tests: 2300 if comment is not None and comment.matches(t, False) or comments and not t: 2301 comments.append(t) 2302 continue 2303 if not t: 2304 continue 2305 out = ['\n'.join(comments), t] 2306 comments = [] 2307 try: 2308 t = t.replace(r'\n','\n') 2309 result = self.parseString(t, parseAll=parseAll) 2310 out.append(result.dump(full=fullDump)) 2311 success = success and not failureTests 2312 except ParseBaseException as pe: 2313 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 2314 if '\n' in t: 2315 out.append(line(pe.loc, t)) 2316 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) 2317 else: 2318 out.append(' '*pe.loc + '^' + fatal) 2319 out.append("FAIL: " + str(pe)) 2320 success = success and failureTests 2321 result = pe 2322 except Exception as exc: 2323 out.append("FAIL-EXCEPTION: " + str(exc)) 2324 success = success and failureTests 2325 result = exc 2326 2327 if printResults: 2328 if fullDump: 2329 out.append('') 2330 print('\n'.join(out)) 2331 2332 allResults.append((t, result)) 2333 2334 return success, allResults 2335 2336 2337class Token(ParserElement): 2338 """ 2339 Abstract C{ParserElement} subclass, for defining atomic matching patterns. 2340 """ 2341 def __init__( self ): 2342 super(Token,self).__init__( savelist=False ) 2343 2344 2345class Empty(Token): 2346 """ 2347 An empty token, will always match. 2348 """ 2349 def __init__( self ): 2350 super(Empty,self).__init__() 2351 self.name = "Empty" 2352 self.mayReturnEmpty = True 2353 self.mayIndexError = False 2354 2355 2356class NoMatch(Token): 2357 """ 2358 A token that will never match. 2359 """ 2360 def __init__( self ): 2361 super(NoMatch,self).__init__() 2362 self.name = "NoMatch" 2363 self.mayReturnEmpty = True 2364 self.mayIndexError = False 2365 self.errmsg = "Unmatchable token" 2366 2367 def parseImpl( self, instring, loc, doActions=True ): 2368 raise ParseException(instring, loc, self.errmsg, self) 2369 2370 2371class Literal(Token): 2372 """ 2373 Token to exactly match a specified string. 2374 2375 Example:: 2376 Literal('blah').parseString('blah') # -> ['blah'] 2377 Literal('blah').parseString('blahfooblah') # -> ['blah'] 2378 Literal('blah').parseString('bla') # -> Exception: Expected "blah" 2379 2380 For case-insensitive matching, use L{CaselessLiteral}. 2381 2382 For keyword matching (force word break before and after the matched string), 2383 use L{Keyword} or L{CaselessKeyword}. 2384 """ 2385 def __init__( self, matchString ): 2386 super(Literal,self).__init__() 2387 self.match = matchString 2388 self.matchLen = len(matchString) 2389 try: 2390 self.firstMatchChar = matchString[0] 2391 except IndexError: 2392 warnings.warn("null string passed to Literal; use Empty() instead", 2393 SyntaxWarning, stacklevel=2) 2394 self.__class__ = Empty 2395 self.name = '"%s"' % _ustr(self.match) 2396 self.errmsg = "Expected " + self.name 2397 self.mayReturnEmpty = False 2398 self.mayIndexError = False 2399 2400 # Performance tuning: this routine gets called a *lot* 2401 # if this is a single character match string and the first character matches, 2402 # short-circuit as quickly as possible, and avoid calling startswith 2403 #~ @profile 2404 def parseImpl( self, instring, loc, doActions=True ): 2405 if (instring[loc] == self.firstMatchChar and 2406 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 2407 return loc+self.matchLen, self.match 2408 raise ParseException(instring, loc, self.errmsg, self) 2409_L = Literal 2410ParserElement._literalStringClass = Literal 2411 2412class Keyword(Token): 2413 """ 2414 Token to exactly match a specified string as a keyword, that is, it must be 2415 immediately followed by a non-keyword character. Compare with C{L{Literal}}: 2416 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. 2417 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 2418 Accepts two optional constructor arguments in addition to the keyword string: 2419 - C{identChars} is a string of characters that would be valid identifier characters, 2420 defaulting to all alphanumerics + "_" and "$" 2421 - C{caseless} allows case-insensitive matching, default is C{False}. 2422 2423 Example:: 2424 Keyword("start").parseString("start") # -> ['start'] 2425 Keyword("start").parseString("starting") # -> Exception 2426 2427 For case-insensitive matching, use L{CaselessKeyword}. 2428 """ 2429 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 2430 2431 def __init__( self, matchString, identChars=None, caseless=False ): 2432 super(Keyword,self).__init__() 2433 if identChars is None: 2434 identChars = Keyword.DEFAULT_KEYWORD_CHARS 2435 self.match = matchString 2436 self.matchLen = len(matchString) 2437 try: 2438 self.firstMatchChar = matchString[0] 2439 except IndexError: 2440 warnings.warn("null string passed to Keyword; use Empty() instead", 2441 SyntaxWarning, stacklevel=2) 2442 self.name = '"%s"' % self.match 2443 self.errmsg = "Expected " + self.name 2444 self.mayReturnEmpty = False 2445 self.mayIndexError = False 2446 self.caseless = caseless 2447 if caseless: 2448 self.caselessmatch = matchString.upper() 2449 identChars = identChars.upper() 2450 self.identChars = set(identChars) 2451 2452 def parseImpl( self, instring, loc, doActions=True ): 2453 if self.caseless: 2454 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2455 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 2456 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 2457 return loc+self.matchLen, self.match 2458 else: 2459 if (instring[loc] == self.firstMatchChar and 2460 (self.matchLen==1 or instring.startswith(self.match,loc)) and 2461 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 2462 (loc == 0 or instring[loc-1] not in self.identChars) ): 2463 return loc+self.matchLen, self.match 2464 raise ParseException(instring, loc, self.errmsg, self) 2465 2466 def copy(self): 2467 c = super(Keyword,self).copy() 2468 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 2469 return c 2470 2471 @staticmethod 2472 def setDefaultKeywordChars( chars ): 2473 """Overrides the default Keyword chars 2474 """ 2475 Keyword.DEFAULT_KEYWORD_CHARS = chars 2476 2477class CaselessLiteral(Literal): 2478 """ 2479 Token to match a specified string, ignoring case of letters. 2480 Note: the matched results will always be in the case of the given 2481 match string, NOT the case of the input text. 2482 2483 Example:: 2484 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 2485 2486 (Contrast with example for L{CaselessKeyword}.) 2487 """ 2488 def __init__( self, matchString ): 2489 super(CaselessLiteral,self).__init__( matchString.upper() ) 2490 # Preserve the defining literal. 2491 self.returnString = matchString 2492 self.name = "'%s'" % self.returnString 2493 self.errmsg = "Expected " + self.name 2494 2495 def parseImpl( self, instring, loc, doActions=True ): 2496 if instring[ loc:loc+self.matchLen ].upper() == self.match: 2497 return loc+self.matchLen, self.returnString 2498 raise ParseException(instring, loc, self.errmsg, self) 2499 2500class CaselessKeyword(Keyword): 2501 """ 2502 Caseless version of L{Keyword}. 2503 2504 Example:: 2505 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 2506 2507 (Contrast with example for L{CaselessLiteral}.) 2508 """ 2509 def __init__( self, matchString, identChars=None ): 2510 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) 2511 2512 def parseImpl( self, instring, loc, doActions=True ): 2513 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2514 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 2515 return loc+self.matchLen, self.match 2516 raise ParseException(instring, loc, self.errmsg, self) 2517 2518class CloseMatch(Token): 2519 """ 2520 A variation on L{Literal} which matches "close" matches, that is, 2521 strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters: 2522 - C{match_string} - string to be matched 2523 - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match 2524 2525 The results from a successful parse will contain the matched text from the input string and the following named results: 2526 - C{mismatches} - a list of the positions within the match_string where mismatches were found 2527 - C{original} - the original match_string used to compare against the input string 2528 2529 If C{mismatches} is an empty list, then the match was an exact match. 2530 2531 Example:: 2532 patt = CloseMatch("ATCATCGAATGGA") 2533 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 2534 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 2535 2536 # exact match 2537 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 2538 2539 # close match allowing up to 2 mismatches 2540 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) 2541 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 2542 """ 2543 def __init__(self, match_string, maxMismatches=1): 2544 super(CloseMatch,self).__init__() 2545 self.name = match_string 2546 self.match_string = match_string 2547 self.maxMismatches = maxMismatches 2548 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) 2549 self.mayIndexError = False 2550 self.mayReturnEmpty = False 2551 2552 def parseImpl( self, instring, loc, doActions=True ): 2553 start = loc 2554 instrlen = len(instring) 2555 maxloc = start + len(self.match_string) 2556 2557 if maxloc <= instrlen: 2558 match_string = self.match_string 2559 match_stringloc = 0 2560 mismatches = [] 2561 maxMismatches = self.maxMismatches 2562 2563 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)): 2564 src,mat = s_m 2565 if src != mat: 2566 mismatches.append(match_stringloc) 2567 if len(mismatches) > maxMismatches: 2568 break 2569 else: 2570 loc = match_stringloc + 1 2571 results = ParseResults([instring[start:loc]]) 2572 results['original'] = self.match_string 2573 results['mismatches'] = mismatches 2574 return loc, results 2575 2576 raise ParseException(instring, loc, self.errmsg, self) 2577 2578 2579class Word(Token): 2580 """ 2581 Token for matching words composed of allowed character sets. 2582 Defined with string containing all allowed initial characters, 2583 an optional string containing allowed body characters (if omitted, 2584 defaults to the initial character set), and an optional minimum, 2585 maximum, and/or exact length. The default value for C{min} is 1 (a 2586 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2587 are 0, meaning no maximum or exact length restriction. An optional 2588 C{excludeChars} parameter can list characters that might be found in 2589 the input C{bodyChars} string; useful to define a word of all printables 2590 except for one or two characters, for instance. 2591 2592 L{srange} is useful for defining custom character set strings for defining 2593 C{Word} expressions, using range notation from regular expression character sets. 2594 2595 A common mistake is to use C{Word} to match a specific literal string, as in 2596 C{Word("Address")}. Remember that C{Word} uses the string argument to define 2597 I{sets} of matchable characters. This expression would match "Add", "AAA", 2598 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. 2599 To match an exact literal string, use L{Literal} or L{Keyword}. 2600 2601 pyparsing includes helper strings for building Words: 2602 - L{alphas} 2603 - L{nums} 2604 - L{alphanums} 2605 - L{hexnums} 2606 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) 2607 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) 2608 - L{printables} (any non-whitespace character) 2609 2610 Example:: 2611 # a word composed of digits 2612 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 2613 2614 # a word with a leading capital, and zero or more lowercase 2615 capital_word = Word(alphas.upper(), alphas.lower()) 2616 2617 # hostnames are alphanumeric, with leading alpha, and '-' 2618 hostname = Word(alphas, alphanums+'-') 2619 2620 # roman numeral (not a strict parser, accepts invalid mix of characters) 2621 roman = Word("IVXLCDM") 2622 2623 # any string of non-whitespace characters, except for ',' 2624 csv_value = Word(printables, excludeChars=",") 2625 """ 2626 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): 2627 super(Word,self).__init__() 2628 if excludeChars: 2629 initChars = ''.join(c for c in initChars if c not in excludeChars) 2630 if bodyChars: 2631 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 2632 self.initCharsOrig = initChars 2633 self.initChars = set(initChars) 2634 if bodyChars : 2635 self.bodyCharsOrig = bodyChars 2636 self.bodyChars = set(bodyChars) 2637 else: 2638 self.bodyCharsOrig = initChars 2639 self.bodyChars = set(initChars) 2640 2641 self.maxSpecified = max > 0 2642 2643 if min < 1: 2644 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 2645 2646 self.minLen = min 2647 2648 if max > 0: 2649 self.maxLen = max 2650 else: 2651 self.maxLen = _MAX_INT 2652 2653 if exact > 0: 2654 self.maxLen = exact 2655 self.minLen = exact 2656 2657 self.name = _ustr(self) 2658 self.errmsg = "Expected " + self.name 2659 self.mayIndexError = False 2660 self.asKeyword = asKeyword 2661 2662 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 2663 if self.bodyCharsOrig == self.initCharsOrig: 2664 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 2665 elif len(self.initCharsOrig) == 1: 2666 self.reString = "%s[%s]*" % \ 2667 (re.escape(self.initCharsOrig), 2668 _escapeRegexRangeChars(self.bodyCharsOrig),) 2669 else: 2670 self.reString = "[%s][%s]*" % \ 2671 (_escapeRegexRangeChars(self.initCharsOrig), 2672 _escapeRegexRangeChars(self.bodyCharsOrig),) 2673 if self.asKeyword: 2674 self.reString = r"\b"+self.reString+r"\b" 2675 try: 2676 self.re = re.compile( self.reString ) 2677 except Exception: 2678 self.re = None 2679 2680 def parseImpl( self, instring, loc, doActions=True ): 2681 if self.re: 2682 result = self.re.match(instring,loc) 2683 if not result: 2684 raise ParseException(instring, loc, self.errmsg, self) 2685 2686 loc = result.end() 2687 return loc, result.group() 2688 2689 if not(instring[ loc ] in self.initChars): 2690 raise ParseException(instring, loc, self.errmsg, self) 2691 2692 start = loc 2693 loc += 1 2694 instrlen = len(instring) 2695 bodychars = self.bodyChars 2696 maxloc = start + self.maxLen 2697 maxloc = min( maxloc, instrlen ) 2698 while loc < maxloc and instring[loc] in bodychars: 2699 loc += 1 2700 2701 throwException = False 2702 if loc - start < self.minLen: 2703 throwException = True 2704 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 2705 throwException = True 2706 if self.asKeyword: 2707 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 2708 throwException = True 2709 2710 if throwException: 2711 raise ParseException(instring, loc, self.errmsg, self) 2712 2713 return loc, instring[start:loc] 2714 2715 def __str__( self ): 2716 try: 2717 return super(Word,self).__str__() 2718 except Exception: 2719 pass 2720 2721 2722 if self.strRepr is None: 2723 2724 def charsAsStr(s): 2725 if len(s)>4: 2726 return s[:4]+"..." 2727 else: 2728 return s 2729 2730 if ( self.initCharsOrig != self.bodyCharsOrig ): 2731 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 2732 else: 2733 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 2734 2735 return self.strRepr 2736 2737 2738class Regex(Token): 2739 r""" 2740 Token for matching strings that match a given regular expression. 2741 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 2742 If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as 2743 named parse results. 2744 2745 Example:: 2746 realnum = Regex(r"[+-]?\d+\.\d*") 2747 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 2748 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 2749 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 2750 """ 2751 compiledREtype = type(re.compile("[A-Z]")) 2752 def __init__( self, pattern, flags=0): 2753 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 2754 super(Regex,self).__init__() 2755 2756 if isinstance(pattern, basestring): 2757 if not pattern: 2758 warnings.warn("null string passed to Regex; use Empty() instead", 2759 SyntaxWarning, stacklevel=2) 2760 2761 self.pattern = pattern 2762 self.flags = flags 2763 2764 try: 2765 self.re = re.compile(self.pattern, self.flags) 2766 self.reString = self.pattern 2767 except sre_constants.error: 2768 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 2769 SyntaxWarning, stacklevel=2) 2770 raise 2771 2772 elif isinstance(pattern, Regex.compiledREtype): 2773 self.re = pattern 2774 self.pattern = \ 2775 self.reString = str(pattern) 2776 self.flags = flags 2777 2778 else: 2779 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 2780 2781 self.name = _ustr(self) 2782 self.errmsg = "Expected " + self.name 2783 self.mayIndexError = False 2784 self.mayReturnEmpty = True 2785 2786 def parseImpl( self, instring, loc, doActions=True ): 2787 result = self.re.match(instring,loc) 2788 if not result: 2789 raise ParseException(instring, loc, self.errmsg, self) 2790 2791 loc = result.end() 2792 d = result.groupdict() 2793 ret = ParseResults(result.group()) 2794 if d: 2795 for k in d: 2796 ret[k] = d[k] 2797 return loc,ret 2798 2799 def __str__( self ): 2800 try: 2801 return super(Regex,self).__str__() 2802 except Exception: 2803 pass 2804 2805 if self.strRepr is None: 2806 self.strRepr = "Re:(%s)" % repr(self.pattern) 2807 2808 return self.strRepr 2809 2810 2811class QuotedString(Token): 2812 r""" 2813 Token for matching strings that are delimited by quoting characters. 2814 2815 Defined with the following parameters: 2816 - quoteChar - string of one or more characters defining the quote delimiting string 2817 - escChar - character to escape quotes, typically backslash (default=C{None}) 2818 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) 2819 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 2820 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 2821 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 2822 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 2823 2824 Example:: 2825 qs = QuotedString('"') 2826 print(qs.searchString('lsjdf "This is the quote" sldjf')) 2827 complex_qs = QuotedString('{{', endQuoteChar='}}') 2828 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 2829 sql_qs = QuotedString('"', escQuote='""') 2830 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 2831 prints:: 2832 [['This is the quote']] 2833 [['This is the "quote"']] 2834 [['This is the quote with "embedded" quotes']] 2835 """ 2836 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): 2837 super(QuotedString,self).__init__() 2838 2839 # remove white space from quote chars - won't work anyway 2840 quoteChar = quoteChar.strip() 2841 if not quoteChar: 2842 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2843 raise SyntaxError() 2844 2845 if endQuoteChar is None: 2846 endQuoteChar = quoteChar 2847 else: 2848 endQuoteChar = endQuoteChar.strip() 2849 if not endQuoteChar: 2850 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2851 raise SyntaxError() 2852 2853 self.quoteChar = quoteChar 2854 self.quoteCharLen = len(quoteChar) 2855 self.firstQuoteChar = quoteChar[0] 2856 self.endQuoteChar = endQuoteChar 2857 self.endQuoteCharLen = len(endQuoteChar) 2858 self.escChar = escChar 2859 self.escQuote = escQuote 2860 self.unquoteResults = unquoteResults 2861 self.convertWhitespaceEscapes = convertWhitespaceEscapes 2862 2863 if multiline: 2864 self.flags = re.MULTILINE | re.DOTALL 2865 self.pattern = r'%s(?:[^%s%s]' % \ 2866 ( re.escape(self.quoteChar), 2867 _escapeRegexRangeChars(self.endQuoteChar[0]), 2868 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2869 else: 2870 self.flags = 0 2871 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 2872 ( re.escape(self.quoteChar), 2873 _escapeRegexRangeChars(self.endQuoteChar[0]), 2874 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2875 if len(self.endQuoteChar) > 1: 2876 self.pattern += ( 2877 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 2878 _escapeRegexRangeChars(self.endQuoteChar[i])) 2879 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 2880 ) 2881 if escQuote: 2882 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2883 if escChar: 2884 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2885 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2886 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2887 2888 try: 2889 self.re = re.compile(self.pattern, self.flags) 2890 self.reString = self.pattern 2891 except sre_constants.error: 2892 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2893 SyntaxWarning, stacklevel=2) 2894 raise 2895 2896 self.name = _ustr(self) 2897 self.errmsg = "Expected " + self.name 2898 self.mayIndexError = False 2899 self.mayReturnEmpty = True 2900 2901 def parseImpl( self, instring, loc, doActions=True ): 2902 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2903 if not result: 2904 raise ParseException(instring, loc, self.errmsg, self) 2905 2906 loc = result.end() 2907 ret = result.group() 2908 2909 if self.unquoteResults: 2910 2911 # strip off quotes 2912 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2913 2914 if isinstance(ret,basestring): 2915 # replace escaped whitespace 2916 if '\\' in ret and self.convertWhitespaceEscapes: 2917 ws_map = { 2918 r'\t' : '\t', 2919 r'\n' : '\n', 2920 r'\f' : '\f', 2921 r'\r' : '\r', 2922 } 2923 for wslit,wschar in ws_map.items(): 2924 ret = ret.replace(wslit, wschar) 2925 2926 # replace escaped characters 2927 if self.escChar: 2928 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) 2929 2930 # replace escaped quotes 2931 if self.escQuote: 2932 ret = ret.replace(self.escQuote, self.endQuoteChar) 2933 2934 return loc, ret 2935 2936 def __str__( self ): 2937 try: 2938 return super(QuotedString,self).__str__() 2939 except Exception: 2940 pass 2941 2942 if self.strRepr is None: 2943 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2944 2945 return self.strRepr 2946 2947 2948class CharsNotIn(Token): 2949 """ 2950 Token for matching words composed of characters I{not} in a given set (will 2951 include whitespace in matched characters if not listed in the provided exclusion set - see example). 2952 Defined with string containing all disallowed characters, and an optional 2953 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2954 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2955 are 0, meaning no maximum or exact length restriction. 2956 2957 Example:: 2958 # define a comma-separated-value as anything that is not a ',' 2959 csv_value = CharsNotIn(',') 2960 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 2961 prints:: 2962 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 2963 """ 2964 def __init__( self, notChars, min=1, max=0, exact=0 ): 2965 super(CharsNotIn,self).__init__() 2966 self.skipWhitespace = False 2967 self.notChars = notChars 2968 2969 if min < 1: 2970 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2971 2972 self.minLen = min 2973 2974 if max > 0: 2975 self.maxLen = max 2976 else: 2977 self.maxLen = _MAX_INT 2978 2979 if exact > 0: 2980 self.maxLen = exact 2981 self.minLen = exact 2982 2983 self.name = _ustr(self) 2984 self.errmsg = "Expected " + self.name 2985 self.mayReturnEmpty = ( self.minLen == 0 ) 2986 self.mayIndexError = False 2987 2988 def parseImpl( self, instring, loc, doActions=True ): 2989 if instring[loc] in self.notChars: 2990 raise ParseException(instring, loc, self.errmsg, self) 2991 2992 start = loc 2993 loc += 1 2994 notchars = self.notChars 2995 maxlen = min( start+self.maxLen, len(instring) ) 2996 while loc < maxlen and \ 2997 (instring[loc] not in notchars): 2998 loc += 1 2999 3000 if loc - start < self.minLen: 3001 raise ParseException(instring, loc, self.errmsg, self) 3002 3003 return loc, instring[start:loc] 3004 3005 def __str__( self ): 3006 try: 3007 return super(CharsNotIn, self).__str__() 3008 except Exception: 3009 pass 3010 3011 if self.strRepr is None: 3012 if len(self.notChars) > 4: 3013 self.strRepr = "!W:(%s...)" % self.notChars[:4] 3014 else: 3015 self.strRepr = "!W:(%s)" % self.notChars 3016 3017 return self.strRepr 3018 3019class White(Token): 3020 """ 3021 Special matching class for matching whitespace. Normally, whitespace is ignored 3022 by pyparsing grammars. This class is included when some whitespace structures 3023 are significant. Define with a string containing the whitespace characters to be 3024 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 3025 as defined for the C{L{Word}} class. 3026 """ 3027 whiteStrs = { 3028 " " : "<SPC>", 3029 "\t": "<TAB>", 3030 "\n": "<LF>", 3031 "\r": "<CR>", 3032 "\f": "<FF>", 3033 } 3034 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 3035 super(White,self).__init__() 3036 self.matchWhite = ws 3037 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 3038 #~ self.leaveWhitespace() 3039 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 3040 self.mayReturnEmpty = True 3041 self.errmsg = "Expected " + self.name 3042 3043 self.minLen = min 3044 3045 if max > 0: 3046 self.maxLen = max 3047 else: 3048 self.maxLen = _MAX_INT 3049 3050 if exact > 0: 3051 self.maxLen = exact 3052 self.minLen = exact 3053 3054 def parseImpl( self, instring, loc, doActions=True ): 3055 if not(instring[ loc ] in self.matchWhite): 3056 raise ParseException(instring, loc, self.errmsg, self) 3057 start = loc 3058 loc += 1 3059 maxloc = start + self.maxLen 3060 maxloc = min( maxloc, len(instring) ) 3061 while loc < maxloc and instring[loc] in self.matchWhite: 3062 loc += 1 3063 3064 if loc - start < self.minLen: 3065 raise ParseException(instring, loc, self.errmsg, self) 3066 3067 return loc, instring[start:loc] 3068 3069 3070class _PositionToken(Token): 3071 def __init__( self ): 3072 super(_PositionToken,self).__init__() 3073 self.name=self.__class__.__name__ 3074 self.mayReturnEmpty = True 3075 self.mayIndexError = False 3076 3077class GoToColumn(_PositionToken): 3078 """ 3079 Token to advance to a specific column of input text; useful for tabular report scraping. 3080 """ 3081 def __init__( self, colno ): 3082 super(GoToColumn,self).__init__() 3083 self.col = colno 3084 3085 def preParse( self, instring, loc ): 3086 if col(loc,instring) != self.col: 3087 instrlen = len(instring) 3088 if self.ignoreExprs: 3089 loc = self._skipIgnorables( instring, loc ) 3090 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 3091 loc += 1 3092 return loc 3093 3094 def parseImpl( self, instring, loc, doActions=True ): 3095 thiscol = col( loc, instring ) 3096 if thiscol > self.col: 3097 raise ParseException( instring, loc, "Text not in expected column", self ) 3098 newloc = loc + self.col - thiscol 3099 ret = instring[ loc: newloc ] 3100 return newloc, ret 3101 3102 3103class LineStart(_PositionToken): 3104 """ 3105 Matches if current position is at the beginning of a line within the parse string 3106 3107 Example:: 3108 3109 test = '''\ 3110 AAA this line 3111 AAA and this line 3112 AAA but not this one 3113 B AAA and definitely not this one 3114 ''' 3115 3116 for t in (LineStart() + 'AAA' + restOfLine).searchString(test): 3117 print(t) 3118 3119 Prints:: 3120 ['AAA', ' this line'] 3121 ['AAA', ' and this line'] 3122 3123 """ 3124 def __init__( self ): 3125 super(LineStart,self).__init__() 3126 self.errmsg = "Expected start of line" 3127 3128 def parseImpl( self, instring, loc, doActions=True ): 3129 if col(loc, instring) == 1: 3130 return loc, [] 3131 raise ParseException(instring, loc, self.errmsg, self) 3132 3133class LineEnd(_PositionToken): 3134 """ 3135 Matches if current position is at the end of a line within the parse string 3136 """ 3137 def __init__( self ): 3138 super(LineEnd,self).__init__() 3139 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 3140 self.errmsg = "Expected end of line" 3141 3142 def parseImpl( self, instring, loc, doActions=True ): 3143 if loc<len(instring): 3144 if instring[loc] == "\n": 3145 return loc+1, "\n" 3146 else: 3147 raise ParseException(instring, loc, self.errmsg, self) 3148 elif loc == len(instring): 3149 return loc+1, [] 3150 else: 3151 raise ParseException(instring, loc, self.errmsg, self) 3152 3153class StringStart(_PositionToken): 3154 """ 3155 Matches if current position is at the beginning of the parse string 3156 """ 3157 def __init__( self ): 3158 super(StringStart,self).__init__() 3159 self.errmsg = "Expected start of text" 3160 3161 def parseImpl( self, instring, loc, doActions=True ): 3162 if loc != 0: 3163 # see if entire string up to here is just whitespace and ignoreables 3164 if loc != self.preParse( instring, 0 ): 3165 raise ParseException(instring, loc, self.errmsg, self) 3166 return loc, [] 3167 3168class StringEnd(_PositionToken): 3169 """ 3170 Matches if current position is at the end of the parse string 3171 """ 3172 def __init__( self ): 3173 super(StringEnd,self).__init__() 3174 self.errmsg = "Expected end of text" 3175 3176 def parseImpl( self, instring, loc, doActions=True ): 3177 if loc < len(instring): 3178 raise ParseException(instring, loc, self.errmsg, self) 3179 elif loc == len(instring): 3180 return loc+1, [] 3181 elif loc > len(instring): 3182 return loc, [] 3183 else: 3184 raise ParseException(instring, loc, self.errmsg, self) 3185 3186class WordStart(_PositionToken): 3187 """ 3188 Matches if the current position is at the beginning of a Word, and 3189 is not preceded by any character in a given set of C{wordChars} 3190 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3191 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 3192 the string being parsed, or at the beginning of a line. 3193 """ 3194 def __init__(self, wordChars = printables): 3195 super(WordStart,self).__init__() 3196 self.wordChars = set(wordChars) 3197 self.errmsg = "Not at the start of a word" 3198 3199 def parseImpl(self, instring, loc, doActions=True ): 3200 if loc != 0: 3201 if (instring[loc-1] in self.wordChars or 3202 instring[loc] not in self.wordChars): 3203 raise ParseException(instring, loc, self.errmsg, self) 3204 return loc, [] 3205 3206class WordEnd(_PositionToken): 3207 """ 3208 Matches if the current position is at the end of a Word, and 3209 is not followed by any character in a given set of C{wordChars} 3210 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3211 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 3212 the string being parsed, or at the end of a line. 3213 """ 3214 def __init__(self, wordChars = printables): 3215 super(WordEnd,self).__init__() 3216 self.wordChars = set(wordChars) 3217 self.skipWhitespace = False 3218 self.errmsg = "Not at the end of a word" 3219 3220 def parseImpl(self, instring, loc, doActions=True ): 3221 instrlen = len(instring) 3222 if instrlen>0 and loc<instrlen: 3223 if (instring[loc] in self.wordChars or 3224 instring[loc-1] not in self.wordChars): 3225 raise ParseException(instring, loc, self.errmsg, self) 3226 return loc, [] 3227 3228 3229class ParseExpression(ParserElement): 3230 """ 3231 Abstract subclass of ParserElement, for combining and post-processing parsed tokens. 3232 """ 3233 def __init__( self, exprs, savelist = False ): 3234 super(ParseExpression,self).__init__(savelist) 3235 if isinstance( exprs, _generatorType ): 3236 exprs = list(exprs) 3237 3238 if isinstance( exprs, basestring ): 3239 self.exprs = [ ParserElement._literalStringClass( exprs ) ] 3240 elif isinstance( exprs, collections.Iterable ): 3241 exprs = list(exprs) 3242 # if sequence of strings provided, wrap with Literal 3243 if all(isinstance(expr, basestring) for expr in exprs): 3244 exprs = map(ParserElement._literalStringClass, exprs) 3245 self.exprs = list(exprs) 3246 else: 3247 try: 3248 self.exprs = list( exprs ) 3249 except TypeError: 3250 self.exprs = [ exprs ] 3251 self.callPreparse = False 3252 3253 def __getitem__( self, i ): 3254 return self.exprs[i] 3255 3256 def append( self, other ): 3257 self.exprs.append( other ) 3258 self.strRepr = None 3259 return self 3260 3261 def leaveWhitespace( self ): 3262 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 3263 all contained expressions.""" 3264 self.skipWhitespace = False 3265 self.exprs = [ e.copy() for e in self.exprs ] 3266 for e in self.exprs: 3267 e.leaveWhitespace() 3268 return self 3269 3270 def ignore( self, other ): 3271 if isinstance( other, Suppress ): 3272 if other not in self.ignoreExprs: 3273 super( ParseExpression, self).ignore( other ) 3274 for e in self.exprs: 3275 e.ignore( self.ignoreExprs[-1] ) 3276 else: 3277 super( ParseExpression, self).ignore( other ) 3278 for e in self.exprs: 3279 e.ignore( self.ignoreExprs[-1] ) 3280 return self 3281 3282 def __str__( self ): 3283 try: 3284 return super(ParseExpression,self).__str__() 3285 except Exception: 3286 pass 3287 3288 if self.strRepr is None: 3289 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 3290 return self.strRepr 3291 3292 def streamline( self ): 3293 super(ParseExpression,self).streamline() 3294 3295 for e in self.exprs: 3296 e.streamline() 3297 3298 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 3299 # but only if there are no parse actions or resultsNames on the nested And's 3300 # (likewise for Or's and MatchFirst's) 3301 if ( len(self.exprs) == 2 ): 3302 other = self.exprs[0] 3303 if ( isinstance( other, self.__class__ ) and 3304 not(other.parseAction) and 3305 other.resultsName is None and 3306 not other.debug ): 3307 self.exprs = other.exprs[:] + [ self.exprs[1] ] 3308 self.strRepr = None 3309 self.mayReturnEmpty |= other.mayReturnEmpty 3310 self.mayIndexError |= other.mayIndexError 3311 3312 other = self.exprs[-1] 3313 if ( isinstance( other, self.__class__ ) and 3314 not(other.parseAction) and 3315 other.resultsName is None and 3316 not other.debug ): 3317 self.exprs = self.exprs[:-1] + other.exprs[:] 3318 self.strRepr = None 3319 self.mayReturnEmpty |= other.mayReturnEmpty 3320 self.mayIndexError |= other.mayIndexError 3321 3322 self.errmsg = "Expected " + _ustr(self) 3323 3324 return self 3325 3326 def setResultsName( self, name, listAllMatches=False ): 3327 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 3328 return ret 3329 3330 def validate( self, validateTrace=[] ): 3331 tmp = validateTrace[:]+[self] 3332 for e in self.exprs: 3333 e.validate(tmp) 3334 self.checkRecursion( [] ) 3335 3336 def copy(self): 3337 ret = super(ParseExpression,self).copy() 3338 ret.exprs = [e.copy() for e in self.exprs] 3339 return ret 3340 3341class And(ParseExpression): 3342 """ 3343 Requires all given C{ParseExpression}s to be found in the given order. 3344 Expressions may be separated by whitespace. 3345 May be constructed using the C{'+'} operator. 3346 May also be constructed using the C{'-'} operator, which will suppress backtracking. 3347 3348 Example:: 3349 integer = Word(nums) 3350 name_expr = OneOrMore(Word(alphas)) 3351 3352 expr = And([integer("id"),name_expr("name"),integer("age")]) 3353 # more easily written as: 3354 expr = integer("id") + name_expr("name") + integer("age") 3355 """ 3356 3357 class _ErrorStop(Empty): 3358 def __init__(self, *args, **kwargs): 3359 super(And._ErrorStop,self).__init__(*args, **kwargs) 3360 self.name = '-' 3361 self.leaveWhitespace() 3362 3363 def __init__( self, exprs, savelist = True ): 3364 super(And,self).__init__(exprs, savelist) 3365 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3366 self.setWhitespaceChars( self.exprs[0].whiteChars ) 3367 self.skipWhitespace = self.exprs[0].skipWhitespace 3368 self.callPreparse = True 3369 3370 def parseImpl( self, instring, loc, doActions=True ): 3371 # pass False as last arg to _parse for first element, since we already 3372 # pre-parsed the string as part of our And pre-parsing 3373 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 3374 errorStop = False 3375 for e in self.exprs[1:]: 3376 if isinstance(e, And._ErrorStop): 3377 errorStop = True 3378 continue 3379 if errorStop: 3380 try: 3381 loc, exprtokens = e._parse( instring, loc, doActions ) 3382 except ParseSyntaxException: 3383 raise 3384 except ParseBaseException as pe: 3385 pe.__traceback__ = None 3386 raise ParseSyntaxException._from_exception(pe) 3387 except IndexError: 3388 raise ParseSyntaxException(instring, len(instring), self.errmsg, self) 3389 else: 3390 loc, exprtokens = e._parse( instring, loc, doActions ) 3391 if exprtokens or exprtokens.haskeys(): 3392 resultlist += exprtokens 3393 return loc, resultlist 3394 3395 def __iadd__(self, other ): 3396 if isinstance( other, basestring ): 3397 other = ParserElement._literalStringClass( other ) 3398 return self.append( other ) #And( [ self, other ] ) 3399 3400 def checkRecursion( self, parseElementList ): 3401 subRecCheckList = parseElementList[:] + [ self ] 3402 for e in self.exprs: 3403 e.checkRecursion( subRecCheckList ) 3404 if not e.mayReturnEmpty: 3405 break 3406 3407 def __str__( self ): 3408 if hasattr(self,"name"): 3409 return self.name 3410 3411 if self.strRepr is None: 3412 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 3413 3414 return self.strRepr 3415 3416 3417class Or(ParseExpression): 3418 """ 3419 Requires that at least one C{ParseExpression} is found. 3420 If two expressions match, the expression that matches the longest string will be used. 3421 May be constructed using the C{'^'} operator. 3422 3423 Example:: 3424 # construct Or using '^' operator 3425 3426 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 3427 print(number.searchString("123 3.1416 789")) 3428 prints:: 3429 [['123'], ['3.1416'], ['789']] 3430 """ 3431 def __init__( self, exprs, savelist = False ): 3432 super(Or,self).__init__(exprs, savelist) 3433 if self.exprs: 3434 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3435 else: 3436 self.mayReturnEmpty = True 3437 3438 def parseImpl( self, instring, loc, doActions=True ): 3439 maxExcLoc = -1 3440 maxException = None 3441 matches = [] 3442 for e in self.exprs: 3443 try: 3444 loc2 = e.tryParse( instring, loc ) 3445 except ParseException as err: 3446 err.__traceback__ = None 3447 if err.loc > maxExcLoc: 3448 maxException = err 3449 maxExcLoc = err.loc 3450 except IndexError: 3451 if len(instring) > maxExcLoc: 3452 maxException = ParseException(instring,len(instring),e.errmsg,self) 3453 maxExcLoc = len(instring) 3454 else: 3455 # save match among all matches, to retry longest to shortest 3456 matches.append((loc2, e)) 3457 3458 if matches: 3459 matches.sort(key=lambda x: -x[0]) 3460 for _,e in matches: 3461 try: 3462 return e._parse( instring, loc, doActions ) 3463 except ParseException as err: 3464 err.__traceback__ = None 3465 if err.loc > maxExcLoc: 3466 maxException = err 3467 maxExcLoc = err.loc 3468 3469 if maxException is not None: 3470 maxException.msg = self.errmsg 3471 raise maxException 3472 else: 3473 raise ParseException(instring, loc, "no defined alternatives to match", self) 3474 3475 3476 def __ixor__(self, other ): 3477 if isinstance( other, basestring ): 3478 other = ParserElement._literalStringClass( other ) 3479 return self.append( other ) #Or( [ self, other ] ) 3480 3481 def __str__( self ): 3482 if hasattr(self,"name"): 3483 return self.name 3484 3485 if self.strRepr is None: 3486 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 3487 3488 return self.strRepr 3489 3490 def checkRecursion( self, parseElementList ): 3491 subRecCheckList = parseElementList[:] + [ self ] 3492 for e in self.exprs: 3493 e.checkRecursion( subRecCheckList ) 3494 3495 3496class MatchFirst(ParseExpression): 3497 """ 3498 Requires that at least one C{ParseExpression} is found. 3499 If two expressions match, the first one listed is the one that will match. 3500 May be constructed using the C{'|'} operator. 3501 3502 Example:: 3503 # construct MatchFirst using '|' operator 3504 3505 # watch the order of expressions to match 3506 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 3507 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 3508 3509 # put more selective expression first 3510 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 3511 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 3512 """ 3513 def __init__( self, exprs, savelist = False ): 3514 super(MatchFirst,self).__init__(exprs, savelist) 3515 if self.exprs: 3516 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3517 else: 3518 self.mayReturnEmpty = True 3519 3520 def parseImpl( self, instring, loc, doActions=True ): 3521 maxExcLoc = -1 3522 maxException = None 3523 for e in self.exprs: 3524 try: 3525 ret = e._parse( instring, loc, doActions ) 3526 return ret 3527 except ParseException as err: 3528 if err.loc > maxExcLoc: 3529 maxException = err 3530 maxExcLoc = err.loc 3531 except IndexError: 3532 if len(instring) > maxExcLoc: 3533 maxException = ParseException(instring,len(instring),e.errmsg,self) 3534 maxExcLoc = len(instring) 3535 3536 # only got here if no expression matched, raise exception for match that made it the furthest 3537 else: 3538 if maxException is not None: 3539 maxException.msg = self.errmsg 3540 raise maxException 3541 else: 3542 raise ParseException(instring, loc, "no defined alternatives to match", self) 3543 3544 def __ior__(self, other ): 3545 if isinstance( other, basestring ): 3546 other = ParserElement._literalStringClass( other ) 3547 return self.append( other ) #MatchFirst( [ self, other ] ) 3548 3549 def __str__( self ): 3550 if hasattr(self,"name"): 3551 return self.name 3552 3553 if self.strRepr is None: 3554 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 3555 3556 return self.strRepr 3557 3558 def checkRecursion( self, parseElementList ): 3559 subRecCheckList = parseElementList[:] + [ self ] 3560 for e in self.exprs: 3561 e.checkRecursion( subRecCheckList ) 3562 3563 3564class Each(ParseExpression): 3565 """ 3566 Requires all given C{ParseExpression}s to be found, but in any order. 3567 Expressions may be separated by whitespace. 3568 May be constructed using the C{'&'} operator. 3569 3570 Example:: 3571 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 3572 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 3573 integer = Word(nums) 3574 shape_attr = "shape:" + shape_type("shape") 3575 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 3576 color_attr = "color:" + color("color") 3577 size_attr = "size:" + integer("size") 3578 3579 # use Each (using operator '&') to accept attributes in any order 3580 # (shape and posn are required, color and size are optional) 3581 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) 3582 3583 shape_spec.runTests(''' 3584 shape: SQUARE color: BLACK posn: 100, 120 3585 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3586 color:GREEN size:20 shape:TRIANGLE posn:20,40 3587 ''' 3588 ) 3589 prints:: 3590 shape: SQUARE color: BLACK posn: 100, 120 3591 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 3592 - color: BLACK 3593 - posn: ['100', ',', '120'] 3594 - x: 100 3595 - y: 120 3596 - shape: SQUARE 3597 3598 3599 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3600 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 3601 - color: BLUE 3602 - posn: ['50', ',', '80'] 3603 - x: 50 3604 - y: 80 3605 - shape: CIRCLE 3606 - size: 50 3607 3608 3609 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 3610 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 3611 - color: GREEN 3612 - posn: ['20', ',', '40'] 3613 - x: 20 3614 - y: 40 3615 - shape: TRIANGLE 3616 - size: 20 3617 """ 3618 def __init__( self, exprs, savelist = True ): 3619 super(Each,self).__init__(exprs, savelist) 3620 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3621 self.skipWhitespace = True 3622 self.initExprGroups = True 3623 3624 def parseImpl( self, instring, loc, doActions=True ): 3625 if self.initExprGroups: 3626 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 3627 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 3628 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 3629 self.optionals = opt1 + opt2 3630 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 3631 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 3632 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 3633 self.required += self.multirequired 3634 self.initExprGroups = False 3635 tmpLoc = loc 3636 tmpReqd = self.required[:] 3637 tmpOpt = self.optionals[:] 3638 matchOrder = [] 3639 3640 keepMatching = True 3641 while keepMatching: 3642 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 3643 failed = [] 3644 for e in tmpExprs: 3645 try: 3646 tmpLoc = e.tryParse( instring, tmpLoc ) 3647 except ParseException: 3648 failed.append(e) 3649 else: 3650 matchOrder.append(self.opt1map.get(id(e),e)) 3651 if e in tmpReqd: 3652 tmpReqd.remove(e) 3653 elif e in tmpOpt: 3654 tmpOpt.remove(e) 3655 if len(failed) == len(tmpExprs): 3656 keepMatching = False 3657 3658 if tmpReqd: 3659 missing = ", ".join(_ustr(e) for e in tmpReqd) 3660 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 3661 3662 # add any unmatched Optionals, in case they have default values defined 3663 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 3664 3665 resultlist = [] 3666 for e in matchOrder: 3667 loc,results = e._parse(instring,loc,doActions) 3668 resultlist.append(results) 3669 3670 finalResults = sum(resultlist, ParseResults([])) 3671 return loc, finalResults 3672 3673 def __str__( self ): 3674 if hasattr(self,"name"): 3675 return self.name 3676 3677 if self.strRepr is None: 3678 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 3679 3680 return self.strRepr 3681 3682 def checkRecursion( self, parseElementList ): 3683 subRecCheckList = parseElementList[:] + [ self ] 3684 for e in self.exprs: 3685 e.checkRecursion( subRecCheckList ) 3686 3687 3688class ParseElementEnhance(ParserElement): 3689 """ 3690 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. 3691 """ 3692 def __init__( self, expr, savelist=False ): 3693 super(ParseElementEnhance,self).__init__(savelist) 3694 if isinstance( expr, basestring ): 3695 if issubclass(ParserElement._literalStringClass, Token): 3696 expr = ParserElement._literalStringClass(expr) 3697 else: 3698 expr = ParserElement._literalStringClass(Literal(expr)) 3699 self.expr = expr 3700 self.strRepr = None 3701 if expr is not None: 3702 self.mayIndexError = expr.mayIndexError 3703 self.mayReturnEmpty = expr.mayReturnEmpty 3704 self.setWhitespaceChars( expr.whiteChars ) 3705 self.skipWhitespace = expr.skipWhitespace 3706 self.saveAsList = expr.saveAsList 3707 self.callPreparse = expr.callPreparse 3708 self.ignoreExprs.extend(expr.ignoreExprs) 3709 3710 def parseImpl( self, instring, loc, doActions=True ): 3711 if self.expr is not None: 3712 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 3713 else: 3714 raise ParseException("",loc,self.errmsg,self) 3715 3716 def leaveWhitespace( self ): 3717 self.skipWhitespace = False 3718 self.expr = self.expr.copy() 3719 if self.expr is not None: 3720 self.expr.leaveWhitespace() 3721 return self 3722 3723 def ignore( self, other ): 3724 if isinstance( other, Suppress ): 3725 if other not in self.ignoreExprs: 3726 super( ParseElementEnhance, self).ignore( other ) 3727 if self.expr is not None: 3728 self.expr.ignore( self.ignoreExprs[-1] ) 3729 else: 3730 super( ParseElementEnhance, self).ignore( other ) 3731 if self.expr is not None: 3732 self.expr.ignore( self.ignoreExprs[-1] ) 3733 return self 3734 3735 def streamline( self ): 3736 super(ParseElementEnhance,self).streamline() 3737 if self.expr is not None: 3738 self.expr.streamline() 3739 return self 3740 3741 def checkRecursion( self, parseElementList ): 3742 if self in parseElementList: 3743 raise RecursiveGrammarException( parseElementList+[self] ) 3744 subRecCheckList = parseElementList[:] + [ self ] 3745 if self.expr is not None: 3746 self.expr.checkRecursion( subRecCheckList ) 3747 3748 def validate( self, validateTrace=[] ): 3749 tmp = validateTrace[:]+[self] 3750 if self.expr is not None: 3751 self.expr.validate(tmp) 3752 self.checkRecursion( [] ) 3753 3754 def __str__( self ): 3755 try: 3756 return super(ParseElementEnhance,self).__str__() 3757 except Exception: 3758 pass 3759 3760 if self.strRepr is None and self.expr is not None: 3761 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 3762 return self.strRepr 3763 3764 3765class FollowedBy(ParseElementEnhance): 3766 """ 3767 Lookahead matching of the given parse expression. C{FollowedBy} 3768 does I{not} advance the parsing position within the input string, it only 3769 verifies that the specified parse expression matches at the current 3770 position. C{FollowedBy} always returns a null token list. 3771 3772 Example:: 3773 # use FollowedBy to match a label only if it is followed by a ':' 3774 data_word = Word(alphas) 3775 label = data_word + FollowedBy(':') 3776 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3777 3778 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() 3779 prints:: 3780 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 3781 """ 3782 def __init__( self, expr ): 3783 super(FollowedBy,self).__init__(expr) 3784 self.mayReturnEmpty = True 3785 3786 def parseImpl( self, instring, loc, doActions=True ): 3787 self.expr.tryParse( instring, loc ) 3788 return loc, [] 3789 3790 3791class NotAny(ParseElementEnhance): 3792 """ 3793 Lookahead to disallow matching with the given parse expression. C{NotAny} 3794 does I{not} advance the parsing position within the input string, it only 3795 verifies that the specified parse expression does I{not} match at the current 3796 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny} 3797 always returns a null token list. May be constructed using the '~' operator. 3798 3799 Example:: 3800 3801 """ 3802 def __init__( self, expr ): 3803 super(NotAny,self).__init__(expr) 3804 #~ self.leaveWhitespace() 3805 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 3806 self.mayReturnEmpty = True 3807 self.errmsg = "Found unwanted token, "+_ustr(self.expr) 3808 3809 def parseImpl( self, instring, loc, doActions=True ): 3810 if self.expr.canParseNext(instring, loc): 3811 raise ParseException(instring, loc, self.errmsg, self) 3812 return loc, [] 3813 3814 def __str__( self ): 3815 if hasattr(self,"name"): 3816 return self.name 3817 3818 if self.strRepr is None: 3819 self.strRepr = "~{" + _ustr(self.expr) + "}" 3820 3821 return self.strRepr 3822 3823class _MultipleMatch(ParseElementEnhance): 3824 def __init__( self, expr, stopOn=None): 3825 super(_MultipleMatch, self).__init__(expr) 3826 self.saveAsList = True 3827 ender = stopOn 3828 if isinstance(ender, basestring): 3829 ender = ParserElement._literalStringClass(ender) 3830 self.not_ender = ~ender if ender is not None else None 3831 3832 def parseImpl( self, instring, loc, doActions=True ): 3833 self_expr_parse = self.expr._parse 3834 self_skip_ignorables = self._skipIgnorables 3835 check_ender = self.not_ender is not None 3836 if check_ender: 3837 try_not_ender = self.not_ender.tryParse 3838 3839 # must be at least one (but first see if we are the stopOn sentinel; 3840 # if so, fail) 3841 if check_ender: 3842 try_not_ender(instring, loc) 3843 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 3844 try: 3845 hasIgnoreExprs = (not not self.ignoreExprs) 3846 while 1: 3847 if check_ender: 3848 try_not_ender(instring, loc) 3849 if hasIgnoreExprs: 3850 preloc = self_skip_ignorables( instring, loc ) 3851 else: 3852 preloc = loc 3853 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 3854 if tmptokens or tmptokens.haskeys(): 3855 tokens += tmptokens 3856 except (ParseException,IndexError): 3857 pass 3858 3859 return loc, tokens 3860 3861class OneOrMore(_MultipleMatch): 3862 """ 3863 Repetition of one or more of the given expression. 3864 3865 Parameters: 3866 - expr - expression that must match one or more times 3867 - stopOn - (default=C{None}) - expression for a terminating sentinel 3868 (only required if the sentinel would ordinarily match the repetition 3869 expression) 3870 3871 Example:: 3872 data_word = Word(alphas) 3873 label = data_word + FollowedBy(':') 3874 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 3875 3876 text = "shape: SQUARE posn: upper left color: BLACK" 3877 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 3878 3879 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data 3880 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3881 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 3882 3883 # could also be written as 3884 (attr_expr * (1,)).parseString(text).pprint() 3885 """ 3886 3887 def __str__( self ): 3888 if hasattr(self,"name"): 3889 return self.name 3890 3891 if self.strRepr is None: 3892 self.strRepr = "{" + _ustr(self.expr) + "}..." 3893 3894 return self.strRepr 3895 3896class ZeroOrMore(_MultipleMatch): 3897 """ 3898 Optional repetition of zero or more of the given expression. 3899 3900 Parameters: 3901 - expr - expression that must match zero or more times 3902 - stopOn - (default=C{None}) - expression for a terminating sentinel 3903 (only required if the sentinel would ordinarily match the repetition 3904 expression) 3905 3906 Example: similar to L{OneOrMore} 3907 """ 3908 def __init__( self, expr, stopOn=None): 3909 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 3910 self.mayReturnEmpty = True 3911 3912 def parseImpl( self, instring, loc, doActions=True ): 3913 try: 3914 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 3915 except (ParseException,IndexError): 3916 return loc, [] 3917 3918 def __str__( self ): 3919 if hasattr(self,"name"): 3920 return self.name 3921 3922 if self.strRepr is None: 3923 self.strRepr = "[" + _ustr(self.expr) + "]..." 3924 3925 return self.strRepr 3926 3927class _NullToken(object): 3928 def __bool__(self): 3929 return False 3930 __nonzero__ = __bool__ 3931 def __str__(self): 3932 return "" 3933 3934_optionalNotMatched = _NullToken() 3935class Optional(ParseElementEnhance): 3936 """ 3937 Optional matching of the given expression. 3938 3939 Parameters: 3940 - expr - expression that must match zero or more times 3941 - default (optional) - value to be returned if the optional expression is not found. 3942 3943 Example:: 3944 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 3945 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) 3946 zip.runTests(''' 3947 # traditional ZIP code 3948 12345 3949 3950 # ZIP+4 form 3951 12101-0001 3952 3953 # invalid ZIP 3954 98765- 3955 ''') 3956 prints:: 3957 # traditional ZIP code 3958 12345 3959 ['12345'] 3960 3961 # ZIP+4 form 3962 12101-0001 3963 ['12101-0001'] 3964 3965 # invalid ZIP 3966 98765- 3967 ^ 3968 FAIL: Expected end of text (at char 5), (line:1, col:6) 3969 """ 3970 def __init__( self, expr, default=_optionalNotMatched ): 3971 super(Optional,self).__init__( expr, savelist=False ) 3972 self.saveAsList = self.expr.saveAsList 3973 self.defaultValue = default 3974 self.mayReturnEmpty = True 3975 3976 def parseImpl( self, instring, loc, doActions=True ): 3977 try: 3978 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 3979 except (ParseException,IndexError): 3980 if self.defaultValue is not _optionalNotMatched: 3981 if self.expr.resultsName: 3982 tokens = ParseResults([ self.defaultValue ]) 3983 tokens[self.expr.resultsName] = self.defaultValue 3984 else: 3985 tokens = [ self.defaultValue ] 3986 else: 3987 tokens = [] 3988 return loc, tokens 3989 3990 def __str__( self ): 3991 if hasattr(self,"name"): 3992 return self.name 3993 3994 if self.strRepr is None: 3995 self.strRepr = "[" + _ustr(self.expr) + "]" 3996 3997 return self.strRepr 3998 3999class SkipTo(ParseElementEnhance): 4000 """ 4001 Token for skipping over all undefined text until the matched expression is found. 4002 4003 Parameters: 4004 - expr - target expression marking the end of the data to be skipped 4005 - include - (default=C{False}) if True, the target expression is also parsed 4006 (the skipped text and target expression are returned as a 2-element list). 4007 - ignore - (default=C{None}) used to define grammars (typically quoted strings and 4008 comments) that might contain false matches to the target expression 4009 - failOn - (default=C{None}) define expressions that are not allowed to be 4010 included in the skipped test; if found before the target expression is found, 4011 the SkipTo is not a match 4012 4013 Example:: 4014 report = ''' 4015 Outstanding Issues Report - 1 Jan 2000 4016 4017 # | Severity | Description | Days Open 4018 -----+----------+-------------------------------------------+----------- 4019 101 | Critical | Intermittent system crash | 6 4020 94 | Cosmetic | Spelling error on Login ('log|n') | 14 4021 79 | Minor | System slow when running too many reports | 47 4022 ''' 4023 integer = Word(nums) 4024 SEP = Suppress('|') 4025 # use SkipTo to simply match everything up until the next SEP 4026 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 4027 # - parse action will call token.strip() for each matched token, i.e., the description body 4028 string_data = SkipTo(SEP, ignore=quotedString) 4029 string_data.setParseAction(tokenMap(str.strip)) 4030 ticket_expr = (integer("issue_num") + SEP 4031 + string_data("sev") + SEP 4032 + string_data("desc") + SEP 4033 + integer("days_open")) 4034 4035 for tkt in ticket_expr.searchString(report): 4036 print tkt.dump() 4037 prints:: 4038 ['101', 'Critical', 'Intermittent system crash', '6'] 4039 - days_open: 6 4040 - desc: Intermittent system crash 4041 - issue_num: 101 4042 - sev: Critical 4043 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 4044 - days_open: 14 4045 - desc: Spelling error on Login ('log|n') 4046 - issue_num: 94 4047 - sev: Cosmetic 4048 ['79', 'Minor', 'System slow when running too many reports', '47'] 4049 - days_open: 47 4050 - desc: System slow when running too many reports 4051 - issue_num: 79 4052 - sev: Minor 4053 """ 4054 def __init__( self, other, include=False, ignore=None, failOn=None ): 4055 super( SkipTo, self ).__init__( other ) 4056 self.ignoreExpr = ignore 4057 self.mayReturnEmpty = True 4058 self.mayIndexError = False 4059 self.includeMatch = include 4060 self.asList = False 4061 if isinstance(failOn, basestring): 4062 self.failOn = ParserElement._literalStringClass(failOn) 4063 else: 4064 self.failOn = failOn 4065 self.errmsg = "No match found for "+_ustr(self.expr) 4066 4067 def parseImpl( self, instring, loc, doActions=True ): 4068 startloc = loc 4069 instrlen = len(instring) 4070 expr = self.expr 4071 expr_parse = self.expr._parse 4072 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 4073 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 4074 4075 tmploc = loc 4076 while tmploc <= instrlen: 4077 if self_failOn_canParseNext is not None: 4078 # break if failOn expression matches 4079 if self_failOn_canParseNext(instring, tmploc): 4080 break 4081 4082 if self_ignoreExpr_tryParse is not None: 4083 # advance past ignore expressions 4084 while 1: 4085 try: 4086 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 4087 except ParseBaseException: 4088 break 4089 4090 try: 4091 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 4092 except (ParseException, IndexError): 4093 # no match, advance loc in string 4094 tmploc += 1 4095 else: 4096 # matched skipto expr, done 4097 break 4098 4099 else: 4100 # ran off the end of the input string without matching skipto expr, fail 4101 raise ParseException(instring, loc, self.errmsg, self) 4102 4103 # build up return values 4104 loc = tmploc 4105 skiptext = instring[startloc:loc] 4106 skipresult = ParseResults(skiptext) 4107 4108 if self.includeMatch: 4109 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 4110 skipresult += mat 4111 4112 return loc, skipresult 4113 4114class Forward(ParseElementEnhance): 4115 """ 4116 Forward declaration of an expression to be defined later - 4117 used for recursive grammars, such as algebraic infix notation. 4118 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 4119 4120 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 4121 Specifically, '|' has a lower precedence than '<<', so that:: 4122 fwdExpr << a | b | c 4123 will actually be evaluated as:: 4124 (fwdExpr << a) | b | c 4125 thereby leaving b and c out as parseable alternatives. It is recommended that you 4126 explicitly group the values inserted into the C{Forward}:: 4127 fwdExpr << (a | b | c) 4128 Converting to use the '<<=' operator instead will avoid this problem. 4129 4130 See L{ParseResults.pprint} for an example of a recursive parser created using 4131 C{Forward}. 4132 """ 4133 def __init__( self, other=None ): 4134 super(Forward,self).__init__( other, savelist=False ) 4135 4136 def __lshift__( self, other ): 4137 if isinstance( other, basestring ): 4138 other = ParserElement._literalStringClass(other) 4139 self.expr = other 4140 self.strRepr = None 4141 self.mayIndexError = self.expr.mayIndexError 4142 self.mayReturnEmpty = self.expr.mayReturnEmpty 4143 self.setWhitespaceChars( self.expr.whiteChars ) 4144 self.skipWhitespace = self.expr.skipWhitespace 4145 self.saveAsList = self.expr.saveAsList 4146 self.ignoreExprs.extend(self.expr.ignoreExprs) 4147 return self 4148 4149 def __ilshift__(self, other): 4150 return self << other 4151 4152 def leaveWhitespace( self ): 4153 self.skipWhitespace = False 4154 return self 4155 4156 def streamline( self ): 4157 if not self.streamlined: 4158 self.streamlined = True 4159 if self.expr is not None: 4160 self.expr.streamline() 4161 return self 4162 4163 def validate( self, validateTrace=[] ): 4164 if self not in validateTrace: 4165 tmp = validateTrace[:]+[self] 4166 if self.expr is not None: 4167 self.expr.validate(tmp) 4168 self.checkRecursion([]) 4169 4170 def __str__( self ): 4171 if hasattr(self,"name"): 4172 return self.name 4173 return self.__class__.__name__ + ": ..." 4174 4175 # stubbed out for now - creates awful memory and perf issues 4176 self._revertClass = self.__class__ 4177 self.__class__ = _ForwardNoRecurse 4178 try: 4179 if self.expr is not None: 4180 retString = _ustr(self.expr) 4181 else: 4182 retString = "None" 4183 finally: 4184 self.__class__ = self._revertClass 4185 return self.__class__.__name__ + ": " + retString 4186 4187 def copy(self): 4188 if self.expr is not None: 4189 return super(Forward,self).copy() 4190 else: 4191 ret = Forward() 4192 ret <<= self 4193 return ret 4194 4195class _ForwardNoRecurse(Forward): 4196 def __str__( self ): 4197 return "..." 4198 4199class TokenConverter(ParseElementEnhance): 4200 """ 4201 Abstract subclass of C{ParseExpression}, for converting parsed results. 4202 """ 4203 def __init__( self, expr, savelist=False ): 4204 super(TokenConverter,self).__init__( expr )#, savelist ) 4205 self.saveAsList = False 4206 4207class Combine(TokenConverter): 4208 """ 4209 Converter to concatenate all matching tokens to a single string. 4210 By default, the matching patterns must also be contiguous in the input string; 4211 this can be disabled by specifying C{'adjacent=False'} in the constructor. 4212 4213 Example:: 4214 real = Word(nums) + '.' + Word(nums) 4215 print(real.parseString('3.1416')) # -> ['3', '.', '1416'] 4216 # will also erroneously match the following 4217 print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] 4218 4219 real = Combine(Word(nums) + '.' + Word(nums)) 4220 print(real.parseString('3.1416')) # -> ['3.1416'] 4221 # no match when there are internal spaces 4222 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) 4223 """ 4224 def __init__( self, expr, joinString="", adjacent=True ): 4225 super(Combine,self).__init__( expr ) 4226 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 4227 if adjacent: 4228 self.leaveWhitespace() 4229 self.adjacent = adjacent 4230 self.skipWhitespace = True 4231 self.joinString = joinString 4232 self.callPreparse = True 4233 4234 def ignore( self, other ): 4235 if self.adjacent: 4236 ParserElement.ignore(self, other) 4237 else: 4238 super( Combine, self).ignore( other ) 4239 return self 4240 4241 def postParse( self, instring, loc, tokenlist ): 4242 retToks = tokenlist.copy() 4243 del retToks[:] 4244 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 4245 4246 if self.resultsName and retToks.haskeys(): 4247 return [ retToks ] 4248 else: 4249 return retToks 4250 4251class Group(TokenConverter): 4252 """ 4253 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. 4254 4255 Example:: 4256 ident = Word(alphas) 4257 num = Word(nums) 4258 term = ident | num 4259 func = ident + Optional(delimitedList(term)) 4260 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] 4261 4262 func = ident + Group(Optional(delimitedList(term))) 4263 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] 4264 """ 4265 def __init__( self, expr ): 4266 super(Group,self).__init__( expr ) 4267 self.saveAsList = True 4268 4269 def postParse( self, instring, loc, tokenlist ): 4270 return [ tokenlist ] 4271 4272class Dict(TokenConverter): 4273 """ 4274 Converter to return a repetitive expression as a list, but also as a dictionary. 4275 Each element can also be referenced using the first token in the expression as its key. 4276 Useful for tabular report scraping when the first column can be used as a item key. 4277 4278 Example:: 4279 data_word = Word(alphas) 4280 label = data_word + FollowedBy(':') 4281 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 4282 4283 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4284 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4285 4286 # print attributes as plain groups 4287 print(OneOrMore(attr_expr).parseString(text).dump()) 4288 4289 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 4290 result = Dict(OneOrMore(Group(attr_expr))).parseString(text) 4291 print(result.dump()) 4292 4293 # access named fields as dict entries, or output as dict 4294 print(result['shape']) 4295 print(result.asDict()) 4296 prints:: 4297 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 4298 4299 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4300 - color: light blue 4301 - posn: upper left 4302 - shape: SQUARE 4303 - texture: burlap 4304 SQUARE 4305 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 4306 See more examples at L{ParseResults} of accessing fields by results name. 4307 """ 4308 def __init__( self, expr ): 4309 super(Dict,self).__init__( expr ) 4310 self.saveAsList = True 4311 4312 def postParse( self, instring, loc, tokenlist ): 4313 for i,tok in enumerate(tokenlist): 4314 if len(tok) == 0: 4315 continue 4316 ikey = tok[0] 4317 if isinstance(ikey,int): 4318 ikey = _ustr(tok[0]).strip() 4319 if len(tok)==1: 4320 tokenlist[ikey] = _ParseResultsWithOffset("",i) 4321 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 4322 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 4323 else: 4324 dictvalue = tok.copy() #ParseResults(i) 4325 del dictvalue[0] 4326 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 4327 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 4328 else: 4329 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 4330 4331 if self.resultsName: 4332 return [ tokenlist ] 4333 else: 4334 return tokenlist 4335 4336 4337class Suppress(TokenConverter): 4338 """ 4339 Converter for ignoring the results of a parsed expression. 4340 4341 Example:: 4342 source = "a, b, c,d" 4343 wd = Word(alphas) 4344 wd_list1 = wd + ZeroOrMore(',' + wd) 4345 print(wd_list1.parseString(source)) 4346 4347 # often, delimiters that are useful during parsing are just in the 4348 # way afterward - use Suppress to keep them out of the parsed output 4349 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 4350 print(wd_list2.parseString(source)) 4351 prints:: 4352 ['a', ',', 'b', ',', 'c', ',', 'd'] 4353 ['a', 'b', 'c', 'd'] 4354 (See also L{delimitedList}.) 4355 """ 4356 def postParse( self, instring, loc, tokenlist ): 4357 return [] 4358 4359 def suppress( self ): 4360 return self 4361 4362 4363class OnlyOnce(object): 4364 """ 4365 Wrapper for parse actions, to ensure they are only called once. 4366 """ 4367 def __init__(self, methodCall): 4368 self.callable = _trim_arity(methodCall) 4369 self.called = False 4370 def __call__(self,s,l,t): 4371 if not self.called: 4372 results = self.callable(s,l,t) 4373 self.called = True 4374 return results 4375 raise ParseException(s,l,"") 4376 def reset(self): 4377 self.called = False 4378 4379def traceParseAction(f): 4380 """ 4381 Decorator for debugging parse actions. 4382 4383 When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} 4384 When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. 4385 4386 Example:: 4387 wd = Word(alphas) 4388 4389 @traceParseAction 4390 def remove_duplicate_chars(tokens): 4391 return ''.join(sorted(set(''.join(tokens))) 4392 4393 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) 4394 print(wds.parseString("slkdjs sld sldd sdlf sdljf")) 4395 prints:: 4396 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 4397 <<leaving remove_duplicate_chars (ret: 'dfjkls') 4398 ['dfjkls'] 4399 """ 4400 f = _trim_arity(f) 4401 def z(*paArgs): 4402 thisFunc = f.__name__ 4403 s,l,t = paArgs[-3:] 4404 if len(paArgs)>3: 4405 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 4406 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) 4407 try: 4408 ret = f(*paArgs) 4409 except Exception as exc: 4410 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 4411 raise 4412 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) ) 4413 return ret 4414 try: 4415 z.__name__ = f.__name__ 4416 except AttributeError: 4417 pass 4418 return z 4419 4420# 4421# global helpers 4422# 4423def delimitedList( expr, delim=",", combine=False ): 4424 """ 4425 Helper to define a delimited list of expressions - the delimiter defaults to ','. 4426 By default, the list elements and delimiters can have intervening whitespace, and 4427 comments, but this can be overridden by passing C{combine=True} in the constructor. 4428 If C{combine} is set to C{True}, the matching tokens are returned as a single token 4429 string, with the delimiters included; otherwise, the matching tokens are returned 4430 as a list of tokens, with the delimiters suppressed. 4431 4432 Example:: 4433 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 4434 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 4435 """ 4436 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 4437 if combine: 4438 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 4439 else: 4440 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) 4441 4442def countedArray( expr, intExpr=None ): 4443 """ 4444 Helper to define a counted list of expressions. 4445 This helper defines a pattern of the form:: 4446 integer expr expr expr... 4447 where the leading integer tells how many expr expressions follow. 4448 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 4449 4450 If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. 4451 4452 Example:: 4453 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] 4454 4455 # in this parser, the leading integer value is given in binary, 4456 # '10' indicating that 2 values are in the array 4457 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) 4458 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] 4459 """ 4460 arrayExpr = Forward() 4461 def countFieldParseAction(s,l,t): 4462 n = t[0] 4463 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 4464 return [] 4465 if intExpr is None: 4466 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 4467 else: 4468 intExpr = intExpr.copy() 4469 intExpr.setName("arrayLen") 4470 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 4471 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 4472 4473def _flatten(L): 4474 ret = [] 4475 for i in L: 4476 if isinstance(i,list): 4477 ret.extend(_flatten(i)) 4478 else: 4479 ret.append(i) 4480 return ret 4481 4482def matchPreviousLiteral(expr): 4483 """ 4484 Helper to define an expression that is indirectly defined from 4485 the tokens matched in a previous expression, that is, it looks 4486 for a 'repeat' of a previous expression. For example:: 4487 first = Word(nums) 4488 second = matchPreviousLiteral(first) 4489 matchExpr = first + ":" + second 4490 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 4491 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 4492 If this is not desired, use C{matchPreviousExpr}. 4493 Do I{not} use with packrat parsing enabled. 4494 """ 4495 rep = Forward() 4496 def copyTokenToRepeater(s,l,t): 4497 if t: 4498 if len(t) == 1: 4499 rep << t[0] 4500 else: 4501 # flatten t tokens 4502 tflat = _flatten(t.asList()) 4503 rep << And(Literal(tt) for tt in tflat) 4504 else: 4505 rep << Empty() 4506 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4507 rep.setName('(prev) ' + _ustr(expr)) 4508 return rep 4509 4510def matchPreviousExpr(expr): 4511 """ 4512 Helper to define an expression that is indirectly defined from 4513 the tokens matched in a previous expression, that is, it looks 4514 for a 'repeat' of a previous expression. For example:: 4515 first = Word(nums) 4516 second = matchPreviousExpr(first) 4517 matchExpr = first + ":" + second 4518 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 4519 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; 4520 the expressions are evaluated first, and then compared, so 4521 C{"1"} is compared with C{"10"}. 4522 Do I{not} use with packrat parsing enabled. 4523 """ 4524 rep = Forward() 4525 e2 = expr.copy() 4526 rep <<= e2 4527 def copyTokenToRepeater(s,l,t): 4528 matchTokens = _flatten(t.asList()) 4529 def mustMatchTheseTokens(s,l,t): 4530 theseTokens = _flatten(t.asList()) 4531 if theseTokens != matchTokens: 4532 raise ParseException("",0,"") 4533 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 4534 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4535 rep.setName('(prev) ' + _ustr(expr)) 4536 return rep 4537 4538def _escapeRegexRangeChars(s): 4539 #~ escape these chars: ^-] 4540 for c in r"\^-]": 4541 s = s.replace(c,_bslash+c) 4542 s = s.replace("\n",r"\n") 4543 s = s.replace("\t",r"\t") 4544 return _ustr(s) 4545 4546def oneOf( strs, caseless=False, useRegex=True ): 4547 """ 4548 Helper to quickly define a set of alternative Literals, and makes sure to do 4549 longest-first testing when there is a conflict, regardless of the input order, 4550 but returns a C{L{MatchFirst}} for best performance. 4551 4552 Parameters: 4553 - strs - a string of space-delimited literals, or a collection of string literals 4554 - caseless - (default=C{False}) - treat all literals as caseless 4555 - useRegex - (default=C{True}) - as an optimization, will generate a Regex 4556 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 4557 if creating a C{Regex} raises an exception) 4558 4559 Example:: 4560 comp_oper = oneOf("< = > <= >= !=") 4561 var = Word(alphas) 4562 number = Word(nums) 4563 term = var | number 4564 comparison_expr = term + comp_oper + term 4565 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) 4566 prints:: 4567 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 4568 """ 4569 if caseless: 4570 isequal = ( lambda a,b: a.upper() == b.upper() ) 4571 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 4572 parseElementClass = CaselessLiteral 4573 else: 4574 isequal = ( lambda a,b: a == b ) 4575 masks = ( lambda a,b: b.startswith(a) ) 4576 parseElementClass = Literal 4577 4578 symbols = [] 4579 if isinstance(strs,basestring): 4580 symbols = strs.split() 4581 elif isinstance(strs, collections.Iterable): 4582 symbols = list(strs) 4583 else: 4584 warnings.warn("Invalid argument to oneOf, expected string or iterable", 4585 SyntaxWarning, stacklevel=2) 4586 if not symbols: 4587 return NoMatch() 4588 4589 i = 0 4590 while i < len(symbols)-1: 4591 cur = symbols[i] 4592 for j,other in enumerate(symbols[i+1:]): 4593 if ( isequal(other, cur) ): 4594 del symbols[i+j+1] 4595 break 4596 elif ( masks(cur, other) ): 4597 del symbols[i+j+1] 4598 symbols.insert(i,other) 4599 cur = other 4600 break 4601 else: 4602 i += 1 4603 4604 if not caseless and useRegex: 4605 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 4606 try: 4607 if len(symbols)==len("".join(symbols)): 4608 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4609 else: 4610 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4611 except Exception: 4612 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 4613 SyntaxWarning, stacklevel=2) 4614 4615 4616 # last resort, just use MatchFirst 4617 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) 4618 4619def dictOf( key, value ): 4620 """ 4621 Helper to easily and clearly define a dictionary by specifying the respective patterns 4622 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 4623 in the proper order. The key pattern can include delimiting markers or punctuation, 4624 as long as they are suppressed, thereby leaving the significant key text. The value 4625 pattern can include named results, so that the C{Dict} results can include named token 4626 fields. 4627 4628 Example:: 4629 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4630 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4631 print(OneOrMore(attr_expr).parseString(text).dump()) 4632 4633 attr_label = label 4634 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) 4635 4636 # similar to Dict, but simpler call format 4637 result = dictOf(attr_label, attr_value).parseString(text) 4638 print(result.dump()) 4639 print(result['shape']) 4640 print(result.shape) # object attribute access works too 4641 print(result.asDict()) 4642 prints:: 4643 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4644 - color: light blue 4645 - posn: upper left 4646 - shape: SQUARE 4647 - texture: burlap 4648 SQUARE 4649 SQUARE 4650 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 4651 """ 4652 return Dict( ZeroOrMore( Group ( key + value ) ) ) 4653 4654def originalTextFor(expr, asString=True): 4655 """ 4656 Helper to return the original, untokenized text for a given expression. Useful to 4657 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 4658 revert separate tokens with intervening whitespace back to the original matching 4659 input text. By default, returns astring containing the original parsed text. 4660 4661 If the optional C{asString} argument is passed as C{False}, then the return value is a 4662 C{L{ParseResults}} containing any results names that were originally matched, and a 4663 single token containing the original matched text from the input string. So if 4664 the expression passed to C{L{originalTextFor}} contains expressions with defined 4665 results names, you must set C{asString} to C{False} if you want to preserve those 4666 results name values. 4667 4668 Example:: 4669 src = "this is test <b> bold <i>text</i> </b> normal text " 4670 for tag in ("b","i"): 4671 opener,closer = makeHTMLTags(tag) 4672 patt = originalTextFor(opener + SkipTo(closer) + closer) 4673 print(patt.searchString(src)[0]) 4674 prints:: 4675 ['<b> bold <i>text</i> </b>'] 4676 ['<i>text</i>'] 4677 """ 4678 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 4679 endlocMarker = locMarker.copy() 4680 endlocMarker.callPreparse = False 4681 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 4682 if asString: 4683 extractText = lambda s,l,t: s[t._original_start:t._original_end] 4684 else: 4685 def extractText(s,l,t): 4686 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] 4687 matchExpr.setParseAction(extractText) 4688 matchExpr.ignoreExprs = expr.ignoreExprs 4689 return matchExpr 4690 4691def ungroup(expr): 4692 """ 4693 Helper to undo pyparsing's default grouping of And expressions, even 4694 if all but one are non-empty. 4695 """ 4696 return TokenConverter(expr).setParseAction(lambda t:t[0]) 4697 4698def locatedExpr(expr): 4699 """ 4700 Helper to decorate a returned token with its starting and ending locations in the input string. 4701 This helper adds the following results names: 4702 - locn_start = location where matched expression begins 4703 - locn_end = location where matched expression ends 4704 - value = the actual parsed results 4705 4706 Be careful if the input text contains C{<TAB>} characters, you may want to call 4707 C{L{ParserElement.parseWithTabs}} 4708 4709 Example:: 4710 wd = Word(alphas) 4711 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 4712 print(match) 4713 prints:: 4714 [[0, 'ljsdf', 5]] 4715 [[8, 'lksdjjf', 15]] 4716 [[18, 'lkkjj', 23]] 4717 """ 4718 locator = Empty().setParseAction(lambda s,l,t: l) 4719 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) 4720 4721 4722# convenience constants for positional expressions 4723empty = Empty().setName("empty") 4724lineStart = LineStart().setName("lineStart") 4725lineEnd = LineEnd().setName("lineEnd") 4726stringStart = StringStart().setName("stringStart") 4727stringEnd = StringEnd().setName("stringEnd") 4728 4729_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 4730_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 4731_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 4732_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 4733_charRange = Group(_singleChar + Suppress("-") + _singleChar) 4734_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 4735 4736def srange(s): 4737 r""" 4738 Helper to easily define string ranges for use in Word construction. Borrows 4739 syntax from regexp '[]' string range definitions:: 4740 srange("[0-9]") -> "0123456789" 4741 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 4742 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 4743 The input string must be enclosed in []'s, and the returned string is the expanded 4744 character set joined into a single string. 4745 The values enclosed in the []'s may be: 4746 - a single character 4747 - an escaped character with a leading backslash (such as C{\-} or C{\]}) 4748 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) 4749 (C{\0x##} is also supported for backwards compatibility) 4750 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) 4751 - a range of any of the above, separated by a dash (C{'a-z'}, etc.) 4752 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) 4753 """ 4754 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 4755 try: 4756 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 4757 except Exception: 4758 return "" 4759 4760def matchOnlyAtCol(n): 4761 """ 4762 Helper method for defining parse actions that require matching at a specific 4763 column in the input text. 4764 """ 4765 def verifyCol(strg,locn,toks): 4766 if col(locn,strg) != n: 4767 raise ParseException(strg,locn,"matched token not at column %d" % n) 4768 return verifyCol 4769 4770def replaceWith(replStr): 4771 """ 4772 Helper method for common parse actions that simply return a literal value. Especially 4773 useful when used with C{L{transformString<ParserElement.transformString>}()}. 4774 4775 Example:: 4776 num = Word(nums).setParseAction(lambda toks: int(toks[0])) 4777 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) 4778 term = na | num 4779 4780 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] 4781 """ 4782 return lambda s,l,t: [replStr] 4783 4784def removeQuotes(s,l,t): 4785 """ 4786 Helper parse action for removing quotation marks from parsed quoted strings. 4787 4788 Example:: 4789 # by default, quotation marks are included in parsed results 4790 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 4791 4792 # use removeQuotes to strip quotation marks from parsed results 4793 quotedString.setParseAction(removeQuotes) 4794 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 4795 """ 4796 return t[0][1:-1] 4797 4798def tokenMap(func, *args): 4799 """ 4800 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 4801 args are passed, they are forwarded to the given function as additional arguments after 4802 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the 4803 parsed data to an integer using base 16. 4804 4805 Example (compare the last to example in L{ParserElement.transformString}:: 4806 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) 4807 hex_ints.runTests(''' 4808 00 11 22 aa FF 0a 0d 1a 4809 ''') 4810 4811 upperword = Word(alphas).setParseAction(tokenMap(str.upper)) 4812 OneOrMore(upperword).runTests(''' 4813 my kingdom for a horse 4814 ''') 4815 4816 wd = Word(alphas).setParseAction(tokenMap(str.title)) 4817 OneOrMore(wd).setParseAction(' '.join).runTests(''' 4818 now is the winter of our discontent made glorious summer by this sun of york 4819 ''') 4820 prints:: 4821 00 11 22 aa FF 0a 0d 1a 4822 [0, 17, 34, 170, 255, 10, 13, 26] 4823 4824 my kingdom for a horse 4825 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 4826 4827 now is the winter of our discontent made glorious summer by this sun of york 4828 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 4829 """ 4830 def pa(s,l,t): 4831 return [func(tokn, *args) for tokn in t] 4832 4833 try: 4834 func_name = getattr(func, '__name__', 4835 getattr(func, '__class__').__name__) 4836 except Exception: 4837 func_name = str(func) 4838 pa.__name__ = func_name 4839 4840 return pa 4841 4842upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 4843"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}""" 4844 4845downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 4846"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}""" 4847 4848def _makeTags(tagStr, xml): 4849 """Internal helper to construct opening and closing tag expressions, given a tag name""" 4850 if isinstance(tagStr,basestring): 4851 resname = tagStr 4852 tagStr = Keyword(tagStr, caseless=not xml) 4853 else: 4854 resname = tagStr.name 4855 4856 tagAttrName = Word(alphas,alphanums+"_-:") 4857 if (xml): 4858 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 4859 openTag = Suppress("<") + tagStr("tag") + \ 4860 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 4861 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4862 else: 4863 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 4864 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 4865 openTag = Suppress("<") + tagStr("tag") + \ 4866 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 4867 Optional( Suppress("=") + tagAttrValue ) ))) + \ 4868 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4869 closeTag = Combine(_L("</") + tagStr + ">") 4870 4871 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 4872 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 4873 openTag.tag = resname 4874 closeTag.tag = resname 4875 return openTag, closeTag 4876 4877def makeHTMLTags(tagStr): 4878 """ 4879 Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches 4880 tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. 4881 4882 Example:: 4883 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 4884 # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple 4885 a,a_end = makeHTMLTags("A") 4886 link_expr = a + SkipTo(a_end)("link_text") + a_end 4887 4888 for link in link_expr.searchString(text): 4889 # attributes in the <A> tag (like "href" shown here) are also accessible as named results 4890 print(link.link_text, '->', link.href) 4891 prints:: 4892 pyparsing -> http://pyparsing.wikispaces.com 4893 """ 4894 return _makeTags( tagStr, False ) 4895 4896def makeXMLTags(tagStr): 4897 """ 4898 Helper to construct opening and closing tag expressions for XML, given a tag name. Matches 4899 tags only in the given upper/lower case. 4900 4901 Example: similar to L{makeHTMLTags} 4902 """ 4903 return _makeTags( tagStr, True ) 4904 4905def withAttribute(*args,**attrDict): 4906 """ 4907 Helper to create a validating parse action to be used with start tags created 4908 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 4909 with a required attribute value, to avoid false matches on common tags such as 4910 C{<TD>} or C{<DIV>}. 4911 4912 Call C{withAttribute} with a series of attribute names and values. Specify the list 4913 of filter attributes names and values as: 4914 - keyword arguments, as in C{(align="right")}, or 4915 - as an explicit dict with C{**} operator, when an attribute name is also a Python 4916 reserved word, as in C{**{"class":"Customer", "align":"right"}} 4917 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 4918 For attribute names with a namespace prefix, you must use the second form. Attribute 4919 names are matched insensitive to upper/lower case. 4920 4921 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 4922 4923 To verify that the attribute exists, but without specifying a value, pass 4924 C{withAttribute.ANY_VALUE} as the value. 4925 4926 Example:: 4927 html = ''' 4928 <div> 4929 Some text 4930 <div type="grid">1 4 0 1 0</div> 4931 <div type="graph">1,3 2,3 1,1</div> 4932 <div>this has no type</div> 4933 </div> 4934 4935 ''' 4936 div,div_end = makeHTMLTags("div") 4937 4938 # only match div tag having a type attribute with value "grid" 4939 div_grid = div().setParseAction(withAttribute(type="grid")) 4940 grid_expr = div_grid + SkipTo(div | div_end)("body") 4941 for grid_header in grid_expr.searchString(html): 4942 print(grid_header.body) 4943 4944 # construct a match with any div tag having a type attribute, regardless of the value 4945 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) 4946 div_expr = div_any_type + SkipTo(div | div_end)("body") 4947 for div_header in div_expr.searchString(html): 4948 print(div_header.body) 4949 prints:: 4950 1 4 0 1 0 4951 4952 1 4 0 1 0 4953 1,3 2,3 1,1 4954 """ 4955 if args: 4956 attrs = args[:] 4957 else: 4958 attrs = attrDict.items() 4959 attrs = [(k,v) for k,v in attrs] 4960 def pa(s,l,tokens): 4961 for attrName,attrValue in attrs: 4962 if attrName not in tokens: 4963 raise ParseException(s,l,"no matching attribute " + attrName) 4964 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 4965 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 4966 (attrName, tokens[attrName], attrValue)) 4967 return pa 4968withAttribute.ANY_VALUE = object() 4969 4970def withClass(classname, namespace=''): 4971 """ 4972 Simplified version of C{L{withAttribute}} when matching on a div class - made 4973 difficult because C{class} is a reserved word in Python. 4974 4975 Example:: 4976 html = ''' 4977 <div> 4978 Some text 4979 <div class="grid">1 4 0 1 0</div> 4980 <div class="graph">1,3 2,3 1,1</div> 4981 <div>this <div> has no class</div> 4982 </div> 4983 4984 ''' 4985 div,div_end = makeHTMLTags("div") 4986 div_grid = div().setParseAction(withClass("grid")) 4987 4988 grid_expr = div_grid + SkipTo(div | div_end)("body") 4989 for grid_header in grid_expr.searchString(html): 4990 print(grid_header.body) 4991 4992 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) 4993 div_expr = div_any_type + SkipTo(div | div_end)("body") 4994 for div_header in div_expr.searchString(html): 4995 print(div_header.body) 4996 prints:: 4997 1 4 0 1 0 4998 4999 1 4 0 1 0 5000 1,3 2,3 1,1 5001 """ 5002 classattr = "%s:class" % namespace if namespace else "class" 5003 return withAttribute(**{classattr : classname}) 5004 5005opAssoc = _Constants() 5006opAssoc.LEFT = object() 5007opAssoc.RIGHT = object() 5008 5009def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): 5010 """ 5011 Helper method for constructing grammars of expressions made up of 5012 operators working in a precedence hierarchy. Operators may be unary or 5013 binary, left- or right-associative. Parse actions can also be attached 5014 to operator expressions. The generated parser will also recognize the use 5015 of parentheses to override operator precedences (see example below). 5016 5017 Note: if you define a deep operator list, you may see performance issues 5018 when using infixNotation. See L{ParserElement.enablePackrat} for a 5019 mechanism to potentially improve your parser performance. 5020 5021 Parameters: 5022 - baseExpr - expression representing the most basic element for the nested 5023 - opList - list of tuples, one for each operator precedence level in the 5024 expression grammar; each tuple is of the form 5025 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 5026 - opExpr is the pyparsing expression for the operator; 5027 may also be a string, which will be converted to a Literal; 5028 if numTerms is 3, opExpr is a tuple of two expressions, for the 5029 two operators separating the 3 terms 5030 - numTerms is the number of terms for this operator (must 5031 be 1, 2, or 3) 5032 - rightLeftAssoc is the indicator whether the operator is 5033 right or left associative, using the pyparsing-defined 5034 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 5035 - parseAction is the parse action to be associated with 5036 expressions matching this operator expression (the 5037 parse action tuple member may be omitted); if the parse action 5038 is passed a tuple or list of functions, this is equivalent to 5039 calling C{setParseAction(*fn)} (L{ParserElement.setParseAction}) 5040 - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) 5041 - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) 5042 5043 Example:: 5044 # simple example of four-function arithmetic with ints and variable names 5045 integer = pyparsing_common.signed_integer 5046 varname = pyparsing_common.identifier 5047 5048 arith_expr = infixNotation(integer | varname, 5049 [ 5050 ('-', 1, opAssoc.RIGHT), 5051 (oneOf('* /'), 2, opAssoc.LEFT), 5052 (oneOf('+ -'), 2, opAssoc.LEFT), 5053 ]) 5054 5055 arith_expr.runTests(''' 5056 5+3*6 5057 (5+3)*6 5058 -2--11 5059 ''', fullDump=False) 5060 prints:: 5061 5+3*6 5062 [[5, '+', [3, '*', 6]]] 5063 5064 (5+3)*6 5065 [[[5, '+', 3], '*', 6]] 5066 5067 -2--11 5068 [[['-', 2], '-', ['-', 11]]] 5069 """ 5070 ret = Forward() 5071 lastExpr = baseExpr | ( lpar + ret + rpar ) 5072 for i,operDef in enumerate(opList): 5073 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 5074 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 5075 if arity == 3: 5076 if opExpr is None or len(opExpr) != 2: 5077 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 5078 opExpr1, opExpr2 = opExpr 5079 thisExpr = Forward().setName(termName) 5080 if rightLeftAssoc == opAssoc.LEFT: 5081 if arity == 1: 5082 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 5083 elif arity == 2: 5084 if opExpr is not None: 5085 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 5086 else: 5087 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 5088 elif arity == 3: 5089 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 5090 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 5091 else: 5092 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 5093 elif rightLeftAssoc == opAssoc.RIGHT: 5094 if arity == 1: 5095 # try to avoid LR with this extra test 5096 if not isinstance(opExpr, Optional): 5097 opExpr = Optional(opExpr) 5098 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 5099 elif arity == 2: 5100 if opExpr is not None: 5101 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 5102 else: 5103 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 5104 elif arity == 3: 5105 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 5106 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 5107 else: 5108 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 5109 else: 5110 raise ValueError("operator must indicate right or left associativity") 5111 if pa: 5112 if isinstance(pa, (tuple, list)): 5113 matchExpr.setParseAction(*pa) 5114 else: 5115 matchExpr.setParseAction(pa) 5116 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 5117 lastExpr = thisExpr 5118 ret <<= lastExpr 5119 return ret 5120 5121operatorPrecedence = infixNotation 5122"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" 5123 5124dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 5125sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 5126quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 5127 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 5128unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") 5129 5130def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): 5131 """ 5132 Helper method for defining nested lists enclosed in opening and closing 5133 delimiters ("(" and ")" are the default). 5134 5135 Parameters: 5136 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression 5137 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression 5138 - content - expression for items within the nested lists (default=C{None}) 5139 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) 5140 5141 If an expression is not provided for the content argument, the nested 5142 expression will capture all whitespace-delimited content between delimiters 5143 as a list of separate values. 5144 5145 Use the C{ignoreExpr} argument to define expressions that may contain 5146 opening or closing characters that should not be treated as opening 5147 or closing characters for nesting, such as quotedString or a comment 5148 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 5149 The default is L{quotedString}, but if no expressions are to be ignored, 5150 then pass C{None} for this argument. 5151 5152 Example:: 5153 data_type = oneOf("void int short long char float double") 5154 decl_data_type = Combine(data_type + Optional(Word('*'))) 5155 ident = Word(alphas+'_', alphanums+'_') 5156 number = pyparsing_common.number 5157 arg = Group(decl_data_type + ident) 5158 LPAR,RPAR = map(Suppress, "()") 5159 5160 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) 5161 5162 c_function = (decl_data_type("type") 5163 + ident("name") 5164 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 5165 + code_body("body")) 5166 c_function.ignore(cStyleComment) 5167 5168 source_code = ''' 5169 int is_odd(int x) { 5170 return (x%2); 5171 } 5172 5173 int dec_to_hex(char hchar) { 5174 if (hchar >= '0' && hchar <= '9') { 5175 return (ord(hchar)-ord('0')); 5176 } else { 5177 return (10+ord(hchar)-ord('A')); 5178 } 5179 } 5180 ''' 5181 for func in c_function.searchString(source_code): 5182 print("%(name)s (%(type)s) args: %(args)s" % func) 5183 5184 prints:: 5185 is_odd (int) args: [['int', 'x']] 5186 dec_to_hex (int) args: [['char', 'hchar']] 5187 """ 5188 if opener == closer: 5189 raise ValueError("opening and closing strings cannot be the same") 5190 if content is None: 5191 if isinstance(opener,basestring) and isinstance(closer,basestring): 5192 if len(opener) == 1 and len(closer)==1: 5193 if ignoreExpr is not None: 5194 content = (Combine(OneOrMore(~ignoreExpr + 5195 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5196 ).setParseAction(lambda t:t[0].strip())) 5197 else: 5198 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 5199 ).setParseAction(lambda t:t[0].strip())) 5200 else: 5201 if ignoreExpr is not None: 5202 content = (Combine(OneOrMore(~ignoreExpr + 5203 ~Literal(opener) + ~Literal(closer) + 5204 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5205 ).setParseAction(lambda t:t[0].strip())) 5206 else: 5207 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 5208 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5209 ).setParseAction(lambda t:t[0].strip())) 5210 else: 5211 raise ValueError("opening and closing arguments must be strings if no content expression is given") 5212 ret = Forward() 5213 if ignoreExpr is not None: 5214 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 5215 else: 5216 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 5217 ret.setName('nested %s%s expression' % (opener,closer)) 5218 return ret 5219 5220def indentedBlock(blockStatementExpr, indentStack, indent=True): 5221 """ 5222 Helper method for defining space-delimited indentation blocks, such as 5223 those used to define block statements in Python source code. 5224 5225 Parameters: 5226 - blockStatementExpr - expression defining syntax of statement that 5227 is repeated within the indented block 5228 - indentStack - list created by caller to manage indentation stack 5229 (multiple statementWithIndentedBlock expressions within a single grammar 5230 should share a common indentStack) 5231 - indent - boolean indicating whether block must be indented beyond the 5232 the current level; set to False for block of left-most statements 5233 (default=C{True}) 5234 5235 A valid block must contain at least one C{blockStatement}. 5236 5237 Example:: 5238 data = ''' 5239 def A(z): 5240 A1 5241 B = 100 5242 G = A2 5243 A2 5244 A3 5245 B 5246 def BB(a,b,c): 5247 BB1 5248 def BBA(): 5249 bba1 5250 bba2 5251 bba3 5252 C 5253 D 5254 def spam(x,y): 5255 def eggs(z): 5256 pass 5257 ''' 5258 5259 5260 indentStack = [1] 5261 stmt = Forward() 5262 5263 identifier = Word(alphas, alphanums) 5264 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") 5265 func_body = indentedBlock(stmt, indentStack) 5266 funcDef = Group( funcDecl + func_body ) 5267 5268 rvalue = Forward() 5269 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") 5270 rvalue << (funcCall | identifier | Word(nums)) 5271 assignment = Group(identifier + "=" + rvalue) 5272 stmt << ( funcDef | assignment | identifier ) 5273 5274 module_body = OneOrMore(stmt) 5275 5276 parseTree = module_body.parseString(data) 5277 parseTree.pprint() 5278 prints:: 5279 [['def', 5280 'A', 5281 ['(', 'z', ')'], 5282 ':', 5283 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 5284 'B', 5285 ['def', 5286 'BB', 5287 ['(', 'a', 'b', 'c', ')'], 5288 ':', 5289 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 5290 'C', 5291 'D', 5292 ['def', 5293 'spam', 5294 ['(', 'x', 'y', ')'], 5295 ':', 5296 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 5297 """ 5298 def checkPeerIndent(s,l,t): 5299 if l >= len(s): return 5300 curCol = col(l,s) 5301 if curCol != indentStack[-1]: 5302 if curCol > indentStack[-1]: 5303 raise ParseFatalException(s,l,"illegal nesting") 5304 raise ParseException(s,l,"not a peer entry") 5305 5306 def checkSubIndent(s,l,t): 5307 curCol = col(l,s) 5308 if curCol > indentStack[-1]: 5309 indentStack.append( curCol ) 5310 else: 5311 raise ParseException(s,l,"not a subentry") 5312 5313 def checkUnindent(s,l,t): 5314 if l >= len(s): return 5315 curCol = col(l,s) 5316 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 5317 raise ParseException(s,l,"not an unindent") 5318 indentStack.pop() 5319 5320 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 5321 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 5322 PEER = Empty().setParseAction(checkPeerIndent).setName('') 5323 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 5324 if indent: 5325 smExpr = Group( Optional(NL) + 5326 #~ FollowedBy(blockStatementExpr) + 5327 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 5328 else: 5329 smExpr = Group( Optional(NL) + 5330 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 5331 blockStatementExpr.ignore(_bslash + LineEnd()) 5332 return smExpr.setName('indented block') 5333 5334alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 5335punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 5336 5337anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 5338_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 5339commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") 5340def replaceHTMLEntity(t): 5341 """Helper parser action to replace common HTML entities with their special characters""" 5342 return _htmlEntityMap.get(t.entity) 5343 5344# it's easy to get these comment structures wrong - they're very common, so may as well make them available 5345cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 5346"Comment of the form C{/* ... */}" 5347 5348htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 5349"Comment of the form C{<!-- ... -->}" 5350 5351restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 5352dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 5353"Comment of the form C{// ... (to end of line)}" 5354 5355cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 5356"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" 5357 5358javaStyleComment = cppStyleComment 5359"Same as C{L{cppStyleComment}}" 5360 5361pythonStyleComment = Regex(r"#.*").setName("Python style comment") 5362"Comment of the form C{# ... (to end of line)}" 5363 5364_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 5365 Optional( Word(" \t") + 5366 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 5367commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 5368"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. 5369 This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}.""" 5370 5371# some other useful expressions - using lower-case class name since we are really using this as a namespace 5372class pyparsing_common: 5373 """ 5374 Here are some common low-level expressions that may be useful in jump-starting parser development: 5375 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>}) 5376 - common L{programming identifiers<identifier>} 5377 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>}) 5378 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>} 5379 - L{UUID<uuid>} 5380 - L{comma-separated list<comma_separated_list>} 5381 Parse actions: 5382 - C{L{convertToInteger}} 5383 - C{L{convertToFloat}} 5384 - C{L{convertToDate}} 5385 - C{L{convertToDatetime}} 5386 - C{L{stripHTMLTags}} 5387 - C{L{upcaseTokens}} 5388 - C{L{downcaseTokens}} 5389 5390 Example:: 5391 pyparsing_common.number.runTests(''' 5392 # any int or real number, returned as the appropriate type 5393 100 5394 -100 5395 +100 5396 3.14159 5397 6.02e23 5398 1e-12 5399 ''') 5400 5401 pyparsing_common.fnumber.runTests(''' 5402 # any int or real number, returned as float 5403 100 5404 -100 5405 +100 5406 3.14159 5407 6.02e23 5408 1e-12 5409 ''') 5410 5411 pyparsing_common.hex_integer.runTests(''' 5412 # hex numbers 5413 100 5414 FF 5415 ''') 5416 5417 pyparsing_common.fraction.runTests(''' 5418 # fractions 5419 1/2 5420 -3/4 5421 ''') 5422 5423 pyparsing_common.mixed_integer.runTests(''' 5424 # mixed fractions 5425 1 5426 1/2 5427 -3/4 5428 1-3/4 5429 ''') 5430 5431 import uuid 5432 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5433 pyparsing_common.uuid.runTests(''' 5434 # uuid 5435 12345678-1234-5678-1234-567812345678 5436 ''') 5437 prints:: 5438 # any int or real number, returned as the appropriate type 5439 100 5440 [100] 5441 5442 -100 5443 [-100] 5444 5445 +100 5446 [100] 5447 5448 3.14159 5449 [3.14159] 5450 5451 6.02e23 5452 [6.02e+23] 5453 5454 1e-12 5455 [1e-12] 5456 5457 # any int or real number, returned as float 5458 100 5459 [100.0] 5460 5461 -100 5462 [-100.0] 5463 5464 +100 5465 [100.0] 5466 5467 3.14159 5468 [3.14159] 5469 5470 6.02e23 5471 [6.02e+23] 5472 5473 1e-12 5474 [1e-12] 5475 5476 # hex numbers 5477 100 5478 [256] 5479 5480 FF 5481 [255] 5482 5483 # fractions 5484 1/2 5485 [0.5] 5486 5487 -3/4 5488 [-0.75] 5489 5490 # mixed fractions 5491 1 5492 [1] 5493 5494 1/2 5495 [0.5] 5496 5497 -3/4 5498 [-0.75] 5499 5500 1-3/4 5501 [1.75] 5502 5503 # uuid 5504 12345678-1234-5678-1234-567812345678 5505 [UUID('12345678-1234-5678-1234-567812345678')] 5506 """ 5507 5508 convertToInteger = tokenMap(int) 5509 """ 5510 Parse action for converting parsed integers to Python int 5511 """ 5512 5513 convertToFloat = tokenMap(float) 5514 """ 5515 Parse action for converting parsed numbers to Python float 5516 """ 5517 5518 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 5519 """expression that parses an unsigned integer, returns an int""" 5520 5521 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) 5522 """expression that parses a hexadecimal integer, returns an int""" 5523 5524 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 5525 """expression that parses an integer with optional leading sign, returns an int""" 5526 5527 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") 5528 """fractional expression of an integer divided by an integer, returns a float""" 5529 fraction.addParseAction(lambda t: t[0]/t[-1]) 5530 5531 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 5532 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 5533 mixed_integer.addParseAction(sum) 5534 5535 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) 5536 """expression that parses a floating point number and returns a float""" 5537 5538 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 5539 """expression that parses a floating point number with optional scientific notation and returns a float""" 5540 5541 # streamlining this expression makes the docs nicer-looking 5542 number = (sci_real | real | signed_integer).streamline() 5543 """any numeric expression, returns the corresponding Python type""" 5544 5545 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) 5546 """any int or real number, returned as float""" 5547 5548 identifier = Word(alphas+'_', alphanums+'_').setName("identifier") 5549 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 5550 5551 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 5552 "IPv4 address (C{0.0.0.0 - 255.255.255.255})" 5553 5554 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 5555 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") 5556 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") 5557 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 5558 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 5559 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 5560 "IPv6 address (long, short, or mixed form)" 5561 5562 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 5563 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 5564 5565 @staticmethod 5566 def convertToDate(fmt="%Y-%m-%d"): 5567 """ 5568 Helper to create a parse action for converting parsed date string to Python datetime.date 5569 5570 Params - 5571 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) 5572 5573 Example:: 5574 date_expr = pyparsing_common.iso8601_date.copy() 5575 date_expr.setParseAction(pyparsing_common.convertToDate()) 5576 print(date_expr.parseString("1999-12-31")) 5577 prints:: 5578 [datetime.date(1999, 12, 31)] 5579 """ 5580 def cvt_fn(s,l,t): 5581 try: 5582 return datetime.strptime(t[0], fmt).date() 5583 except ValueError as ve: 5584 raise ParseException(s, l, str(ve)) 5585 return cvt_fn 5586 5587 @staticmethod 5588 def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): 5589 """ 5590 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime 5591 5592 Params - 5593 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) 5594 5595 Example:: 5596 dt_expr = pyparsing_common.iso8601_datetime.copy() 5597 dt_expr.setParseAction(pyparsing_common.convertToDatetime()) 5598 print(dt_expr.parseString("1999-12-31T23:59:59.999")) 5599 prints:: 5600 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 5601 """ 5602 def cvt_fn(s,l,t): 5603 try: 5604 return datetime.strptime(t[0], fmt) 5605 except ValueError as ve: 5606 raise ParseException(s, l, str(ve)) 5607 return cvt_fn 5608 5609 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 5610 "ISO8601 date (C{yyyy-mm-dd})" 5611 5612 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 5613 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" 5614 5615 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 5616 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" 5617 5618 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 5619 @staticmethod 5620 def stripHTMLTags(s, l, tokens): 5621 """ 5622 Parse action to remove HTML tags from web page HTML source 5623 5624 Example:: 5625 # strip HTML links from normal text 5626 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 5627 td,td_end = makeHTMLTags("TD") 5628 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end 5629 5630 print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' 5631 """ 5632 return pyparsing_common._html_stripper.transformString(tokens[0]) 5633 5634 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') 5635 + Optional( White(" \t") ) ) ).streamline().setName("commaItem") 5636 comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") 5637 """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" 5638 5639 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) 5640 """Parse action to convert tokens to upper case.""" 5641 5642 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) 5643 """Parse action to convert tokens to lower case.""" 5644 5645 5646if __name__ == "__main__": 5647 5648 selectToken = CaselessLiteral("select") 5649 fromToken = CaselessLiteral("from") 5650 5651 ident = Word(alphas, alphanums + "_$") 5652 5653 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5654 columnNameList = Group(delimitedList(columnName)).setName("columns") 5655 columnSpec = ('*' | columnNameList) 5656 5657 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5658 tableNameList = Group(delimitedList(tableName)).setName("tables") 5659 5660 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 5661 5662 # demo runTests method, including embedded comments in test string 5663 simpleSQL.runTests(""" 5664 # '*' as column list and dotted table name 5665 select * from SYS.XYZZY 5666 5667 # caseless match on "SELECT", and casts back to "select" 5668 SELECT * from XYZZY, ABC 5669 5670 # list of column names, and mixed case SELECT keyword 5671 Select AA,BB,CC from Sys.dual 5672 5673 # multiple tables 5674 Select A, B, C from Sys.dual, Table2 5675 5676 # invalid SELECT keyword - should fail 5677 Xelect A, B, C from Sys.dual 5678 5679 # incomplete command - should fail 5680 Select 5681 5682 # invalid column name - should fail 5683 Select ^^^ frox Sys.dual 5684 5685 """) 5686 5687 pyparsing_common.number.runTests(""" 5688 100 5689 -100 5690 +100 5691 3.14159 5692 6.02e23 5693 1e-12 5694 """) 5695 5696 # any int or real number, returned as float 5697 pyparsing_common.fnumber.runTests(""" 5698 100 5699 -100 5700 +100 5701 3.14159 5702 6.02e23 5703 1e-12 5704 """) 5705 5706 pyparsing_common.hex_integer.runTests(""" 5707 100 5708 FF 5709 """) 5710 5711 import uuid 5712 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5713 pyparsing_common.uuid.runTests(""" 5714 12345678-1234-5678-1234-567812345678 5715 """) 5716